Problem 6b

Let’s say I took 7 of each STA, MAT, and CSC. Let’s write a function to generate a dataset according to the model.

gen_grades <- function(){
  mu_alpha = 80
  sigma_alpha = 5
  sigma_grade = 8
  
  alpha_STA <- rnorm(1, mu_alpha, sigma_alpha)
  alpha_MAT <- rnorm(1, mu_alpha, sigma_alpha)
  alpha_CSC <- rnorm(1, mu_alpha, sigma_alpha)

  grades <- data.frame(
    subj = c(rep("STA", 7), rep("MAT", 7), rep("CSC", 7)),
    grades = c(rnorm(7, alpha_STA, sigma_grade),  rnorm(7, alpha_MAT, sigma_grade),  rnorm(7, alpha_CSC, sigma_grade)))
  
  return(grades)
}

Now, let’s augment it in order to find the differences between the estimates and the actual values.

gen_sserr <- function(){
  mu_alpha = 80
  sigma_alpha = 5
  sigma_grade = 8
  
  alpha_STA <- rnorm(1, mu_alpha, sigma_alpha)
  alpha_MAT <- rnorm(1, mu_alpha, sigma_alpha)
  alpha_CSC <- rnorm(1, mu_alpha, sigma_alpha)

  true_coef <- c(alpha_STA, alpha_MAT, alpha_CSC)
  
  grades <- data.frame(
    subj = c(rep("STA", 7), rep("MAT", 7), rep("CSC", 7)),
    grades = c(rnorm(7, alpha_STA, sigma_grade),  rnorm(7, alpha_MAT, sigma_grade),  rnorm(7, alpha_CSC, sigma_grade)))
  
  
  complete.pool <- mean(grades$grades)
  ssq_complete <- sum( (true_coef-complete.pool)^2)
  
  no.pool.fit.coef <- lm(grades~0+subj, data=grades)$coefficients
  no.pool <- c(no.pool.fit.coef["subjSTA"], 
               no.pool.fit.coef["subjMAT"],
               no.pool.fit.coef["subjCSC"]
               )
  ssq_no <- sum((true_coef-no.pool)^2)
  
  part.fit <- lmer(grades~(1|subj), data=grades)
  part.pool.fit.coef <- ranef(part.fit)$subj
  part.pool <- c(part.pool.fit.coef["STA", "(Intercept)"],
                part.pool.fit.coef["MAT", "(Intercept)"],
                part.pool.fit.coef["CSC", "(Intercept)"])  + fixef(part.fit)
  
  ssq_partial <- sum( (true_coef-part.pool)^2)
  
  return(c(ssq_no, ssq_complete, ssq_partial))
}

rowMeans(replicate(50, gen_sserr()))
## [1] 26.60372 63.90123 24.72783