Let’s say I took 7 of each STA, MAT, and CSC. Let’s write a function to generate a dataset according to the model.
gen_grades <- function(){
mu_alpha = 80
sigma_alpha = 5
sigma_grade = 8
alpha_STA <- rnorm(1, mu_alpha, sigma_alpha)
alpha_MAT <- rnorm(1, mu_alpha, sigma_alpha)
alpha_CSC <- rnorm(1, mu_alpha, sigma_alpha)
grades <- data.frame(
subj = c(rep("STA", 7), rep("MAT", 7), rep("CSC", 7)),
grades = c(rnorm(7, alpha_STA, sigma_grade), rnorm(7, alpha_MAT, sigma_grade), rnorm(7, alpha_CSC, sigma_grade)))
return(grades)
}
Now, let’s augment it in order to find the differences between the estimates and the actual values.
gen_sserr <- function(){
mu_alpha = 80
sigma_alpha = 5
sigma_grade = 8
alpha_STA <- rnorm(1, mu_alpha, sigma_alpha)
alpha_MAT <- rnorm(1, mu_alpha, sigma_alpha)
alpha_CSC <- rnorm(1, mu_alpha, sigma_alpha)
true_coef <- c(alpha_STA, alpha_MAT, alpha_CSC)
grades <- data.frame(
subj = c(rep("STA", 7), rep("MAT", 7), rep("CSC", 7)),
grades = c(rnorm(7, alpha_STA, sigma_grade), rnorm(7, alpha_MAT, sigma_grade), rnorm(7, alpha_CSC, sigma_grade)))
complete.pool <- mean(grades$grades)
ssq_complete <- sum( (true_coef-complete.pool)^2)
no.pool.fit.coef <- lm(grades~0+subj, data=grades)$coefficients
no.pool <- c(no.pool.fit.coef["subjSTA"],
no.pool.fit.coef["subjMAT"],
no.pool.fit.coef["subjCSC"]
)
ssq_no <- sum((true_coef-no.pool)^2)
part.fit <- lmer(grades~(1|subj), data=grades)
part.pool.fit.coef <- ranef(part.fit)$subj
part.pool <- c(part.pool.fit.coef["STA", "(Intercept)"],
part.pool.fit.coef["MAT", "(Intercept)"],
part.pool.fit.coef["CSC", "(Intercept)"]) + fixef(part.fit)
ssq_partial <- sum( (true_coef-part.pool)^2)
return(c(ssq_no, ssq_complete, ssq_partial))
}
rowMeans(replicate(50, gen_sserr()))
## [1] 26.60372 63.90123 24.72783