# These functions find maximum likelihood estimates for the model # # a_i ~ N(mu,sigma_a^2), b_j ~ N(mu,sigma_b^2) # # where the a_i and b_j are independent observations. # # The estimates are found by Newton-Raphson iteration, the method of # scoring, and the non-linear Gauss-Seidel method (aka alternating # maximization). # # Sufficient statistics for the data are the sums of the a_i and the b_i, # and the sums of their squares. These are used to reduce computation # time when there are many observations. # # All the functions require a starting value for mu. Suggestions are # the mean of the a_i, the mean of the b_i, and the overall mean. Starting # values for sigma_a and sigma_b to go with the starting value for mu are # chosen automatically (as the maximum likelhood values for that mu). # # The sigma_a^2 and sigma_b^2 parameters are transformed to the parameters # gamma.a = log(sigma_a^2) and gamma.b = log(sigma_b^2). It is in terms # of these parameters that the covariance matrix for the estimate is # produced. # # Convergence is diagnosed with respect to an absolute tolerance of 1e-12. # A maximum of 100 iterations is performed in any case. # The sufficient statistics are kept in a list with the following fields: # # n.a Number of a observations # n.b Number of b observations # sum.a Sum of the a observations # sum.b Sum of the b observations # sum.a2 Sum of the squares of the a observations # sum.b2 Sum of the squares of the b observations # # Note: This choice of sufficient statistics could cause problems if # the mean is far from zero, due to loss of precision in sum.a2 and sum.b2. # # The parameters are sometimes kept as a vector with three elements, and # sometimes in separate variables, as follows: # # mu Mean for both a and b observations # gamma.a Log of the variance for the a observations # gamma.b Log of the variance for the b observations # COMPUTE THE SUFFICIENT STATISTICS FROM THE OBSERVATIONS. sufficient.statistics <- function(a,b) { ss <- list() ss$n.a <- length(a) ss$n.b <- length(b) ss$sum.a <- sum(a) ss$sum.b <- sum(b) ss$sum.a2 <- sum(a^2) ss$sum.b2 <- sum(b^2) ss } # CALCULATE THE LOG LIKELIHOOD, FROM THE SUFFICIENT STATISTICS. log.likelihood <- function(ss,mu,gamma.a,gamma.b) { - ( (ss$n.a/2)*(log(2*pi)+gamma.a) + (ss$sum.a2 - 2*ss$sum.a*mu + ss$n.a*mu^2) / (2*exp(gamma.a)) + (ss$n.b/2)*(log(2*pi)+gamma.b) + (ss$sum.b2 - 2*ss$sum.b*mu + ss$n.b*mu^2) / (2*exp(gamma.b)) ) } # CALCULATE THE GRADIENT OF THE LOG LIKELIHOOD, FROM THE SUFFICIENT STATISTICS. gradient <- function(ss,mu,gamma.a,gamma.b) { c( (ss$sum.a-ss$n.a*mu)/exp(gamma.a) + (ss$sum.b-ss$n.b*mu)/exp(gamma.b), -ss$n.a/2 + exp(-gamma.a)*(ss$sum.a2 - 2*ss$sum.a*mu + ss$n.a*mu^2)/2, -ss$n.b/2 + exp(-gamma.b)*(ss$sum.b2 - 2*ss$sum.b*mu + ss$n.b*mu^2)/2 ) } # CALCULATE THE HESSIAN OF THE LOG LIKELIHOOD, FROM THE SUFFICIENT STATISTICS. hessian <- function(ss,mu,gamma.a,gamma.b) { H <- matrix(0,3,3) H[1,1] <- - ss$n.a*exp(-gamma.a) - ss$n.b*exp(-gamma.b) H[2,2] <- - exp(-gamma.a)*(ss$sum.a2 - 2*ss$sum.a*mu + ss$n.a*mu^2)/2 H[3,3] <- - exp(-gamma.b)*(ss$sum.b2 - 2*ss$sum.b*mu + ss$n.b*mu^2)/2 H[1,2] <- H[2,1] <- - exp(-gamma.a)*(ss$sum.a-ss$n.a*mu) H[1,3] <- H[3,1] <- - exp(-gamma.b)*(ss$sum.b-ss$n.b*mu) H } # FIND MLE BY THE GAUSS-SEIDEL METHOD. The result is a list of values for # mu, gamma.a, and gamma.a, along with the log likelihood, the covariance # matrix for the estimates (found from the observed information), and the # number of iterations required for convergence. mle.gauss.seidel <- function (a,b,mu,debug=F) { ss <- sufficient.statistics(a,b) gamma.a <- log ((ss$sum.a2 - 2*ss$sum.a*mu + ss$n.a*mu^2) / ss$n.a) gamma.b <- log ((ss$sum.b2 - 2*ss$sum.b*mu + ss$n.b*mu^2) / ss$n.b) converged <- F n <- 0 while (!converged && n<100) { if (debug) cat(n,mu,gamma.a,gamma.b,"\n") old <- c(mu,gamma.a,gamma.b) mu <- (ss$sum.a/exp(gamma.a) + ss$sum.b/exp(gamma.b)) / (ss$n.a/exp(gamma.a) + ss$n.b/exp(gamma.b)) gamma.a <- log ((ss$sum.a2 - 2*ss$sum.a*mu + ss$n.a*mu^2) / ss$n.a) gamma.b <- log ((ss$sum.b2 - 2*ss$sum.b*mu + ss$n.b*mu^2) / ss$n.b) new <- c(mu,gamma.a,gamma.b) n <- n + 1 converged <- all (abs(old-new) < 1e-12) } if (!converged) cat("WARNING: Convergence not observed\n") list (mu=mu, gamma.a=gamma.a, gamma.b=gamma.b, log.likelihood = log.likelihood(ss,mu,gamma.a,gamma.b), cov = solve(-hessian(ss,mu,gamma.a,gamma.b)), n.iterations=n) } # FIND MLE BY THE NEWTON-RAPHSON METHOD. The result is a list of values for # mu, gamma.a, and gamma.a, along with the log likelihood, the covariance # matrix for the estimates (found from the observed information), and the # number of iterations required for convergence. mle.newton.raphson <- function (a,b,mu,debug=F) { ss <- sufficient.statistics(a,b) gamma.a <- log ((ss$sum.a2 - 2*ss$sum.a*mu + ss$n.a*mu^2) / ss$n.a) gamma.b <- log ((ss$sum.b2 - 2*ss$sum.b*mu + ss$n.b*mu^2) / ss$n.b) converged <- F n <- 0 while (!converged && n<100) { if (debug) cat(n,mu,gamma.a,gamma.b,"\n") old <- c(mu,gamma.a,gamma.b) g <- gradient(ss,mu,gamma.a,gamma.b) H <- hessian(ss,mu,gamma.a,gamma.b) new <- old - solve(H,g) mu <- new[1] gamma.a <- new[2] gamma.b <- new[3] n <- n + 1 converged <- all (abs(old-new) < 1e-12) } if (!converged) cat("WARNING: Convergence not observed\n") list (mu=mu, gamma.a=gamma.a, gamma.b=gamma.b, log.likelihood = log.likelihood(ss,mu,gamma.a,gamma.b), cov = solve(-hessian(ss,mu,gamma.a,gamma.b)), n.iterations=n) } # FIND MLE BY THE METHOD OF SCORING. The result is a list of values for # mu, gamma.a, and gamma.a, along with the log likelihood, the covariance # matrix for the estimates (found from the expected information), and the # number of iterations required for convergence. mle.method.of.scoring <- function (a,b,mu,debug=F) { ss <- sufficient.statistics(a,b) gamma.a <- log ((ss$sum.a2 - 2*ss$sum.a*mu + ss$n.a*mu^2) / ss$n.a) gamma.b <- log ((ss$sum.b2 - 2*ss$sum.b*mu + ss$n.b*mu^2) / ss$n.b) converged <- F n <- 0 while (!converged && n<100) { if (debug) cat(n,mu,gamma.a,gamma.b,"\n") old <- c(mu,gamma.a,gamma.b) omu <- mu mu <- (ss$sum.a/exp(gamma.a) + ss$sum.b/exp(gamma.b)) / (ss$n.a/exp(gamma.a) + ss$n.b/exp(gamma.b)) gamma.a <- gamma.a - 1 + exp(-gamma.a)*(ss$sum.a2 - 2*ss$sum.a*omu + ss$n.a*omu^2)/ss$n.a gamma.b <- gamma.b - 1 + exp(-gamma.b)*(ss$sum.b2 - 2*ss$sum.b*omu + ss$n.b*omu^2)/ss$n.b new <- c(mu,gamma.a,gamma.b) n <- n + 1 converged <- all (abs(old-new) < 1e-12) } if (!converged) cat("WARNING: Convergence not observed\n") list (mu=mu, gamma.a=gamma.a, gamma.b=gamma.b, log.likelihood = log.likelihood(ss,mu,gamma.a,gamma.b), cov = diag (c(1 / (ss$n.a/exp(gamma.a) + ss$n.b/exp(gamma.b)), 2/ss$n.a, 2/ss$n.b)), n.iterations=n) }