% 260s20Assignment.tex Likelihood Ratio Tests \documentclass[12pt]{article} %\usepackage{amsbsy} % for \boldsymbol and \pmb %\usepackage{graphicx} % To include pdf files! \usepackage{amsmath} \usepackage{amsbsy} \usepackage{amsfonts} \usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue, citecolor=blue, urlcolor=blue]{hyperref} % For links \usepackage{comment} %\usepackage{fullpage} \oddsidemargin=0in % Good for US Letter paper \evensidemargin=0in \textwidth=6.3in \topmargin=-1in \headheight=0.2in \headsep=0.5in \textheight=9.4in %\pagestyle{empty} % No page numbers \begin{document} %\enlargethispage*{1000 pt} \begin{center} {\Large \textbf{STA 260s20 Assignment Eight: Hypothesis testing Part 2 -- Likelihood Ratio Tests}}\footnote{Copyright information is at the end of the last page.} %\vspace{1 mm} \end{center} \noindent The following homework problems are not to be handed in. They are preparation for the final exam. \textbf{Please try each question before looking at the solution}. Use the formula sheet. \begin{enumerate} \item For random sampling from two or more normal distributions with the same variance, the formula sheet has the $t$-test of $H_0:\mu_1=\mu_2$ and the $F$-test of $H_0: \mu_1 = \cdots \mu_k$. Show that if $k=2$, $F=T^2$. \item Let $X_1, \ldots, X_{n_1} \stackrel{i.i.d.}{\sim}$ Normal$(\mu_1,\sigma^2)$ and $Y_1, \ldots, Y_{n_2} \stackrel{i.i.d.}{\sim}$ Normal$(\mu_2,\sigma^2)$ with all the $X_i$ independent of all the $Y_i$. Show that the usual two-sample $t-$test of $H_0: \mu_1=\mu_2$ versus $H_1: \mu_1 \neq \mu_2$ is an exact likelihood ratio test. \item % I'm pretty sure it should be 2 and not 6 to be a density; Gamma(3) = 2!, not 3! Let $X_1, \ldots, X_n$ be a random sample from a distribution with density $f(x|\theta) = \frac{1}{2\theta^3} x^2 e^{-x/\theta} \, I(x>0)$, where the parameter $\theta>0$. \begin{enumerate} \item What is the distribution of the MLE $\widehat{\Theta}$? Show your work; use moment-generating functions. \item Find an exact size $\alpha$ chi-squared likelihood ratio test of $H_0: \theta \leq\theta_0$ versus $H_1: \theta > \theta_0$. Do not forget to show that the test is size $\alpha$. \end{enumerate} \item Let $X_1, \ldots, X_n$ be a random sample from a geometric distribution with parameter $\theta$. We seek to test $H_0: \theta = \frac{1}{2}$ versus $H_0: \theta \neq \frac{1}{2}$. A sample of size $n=50$ yields $\overline{x}=1.56$ and $s^2=4.17$. \begin{comment} > n = 50; theta=0.4 > x = rgeom(n,theta); xbar = mean(x); xbar [1] 1.56 > Gsq = 2*n*(xbar*log(xbar) - (1+xbar)*(log(1+xbar)-log(2))); Gsq [1] 6.174808 > var(x) [1] 4.169796 \end{comment} \begin{enumerate} \item Write a formula for the large-sample likelihood ratio test statistic. Simplify! \item Calculate $G^2_n$. What is the critical value at $\alpha = 0.05$? \item Do you reject $H_0$? Answer Yes or No. \item What do you conclude? Choose one: $\theta<\frac{1}{2}$ ~~~ $\theta = \frac{1}{2}$ ~~~ $\theta>\frac{1}{2}$ \end{enumerate} \item Let $X_1, \ldots, X_n$ be a random sample from a normal distribution with unknown expected value and unknown variance. \begin{enumerate} \item The null hypothesis is that the distribution is standard normal. Obtain and simplify a formula for the large-sample likelihood ratio test statistic $G^2_n$. \item A sample of size $n=200$ yields $\bar{x}_n = 0.062$ and $\widehat{\sigma}^2_n = 1.353$. \begin{comment} %set.seed(4444) n = 200; mu = 0; sigmasq = 1.25; sigma = sqrt(sigmasq) x = rnorm(n,mu,sigma) xbar = mean(x); sigsqhat = (n-1)/n * var(x) c(xbar,sigsqhat) Gsq = n * (xbar^2 + sigsqhat - log(sigsqhat) - 1); Gsq Gsq = n * (0.062^2 + 1.353 - log(1.353) - 1); Gsq \end{comment} \begin{enumerate} \item Calculate $G^2_n$. % 10.904 \item What are the degrees of freedom? \item What is the critical value at significance level $\alpha = 0.05$? % 5.99 \item Do you reject $H_0$? Answer Yes or No. \item Is it possible to draw a directional conclusion here? \end{enumerate} \end{enumerate} \pagebreak \item Dead pixels are a big problem in manufacturing computer and cell phone screens. The physics of the manufacturing process dictates that dead pixels happen according to a spatial Poisson process, so that the numbers of dead pixels in cell phone screens are independent Poisson random variables with parameter $\lambda$, the expected number of dead pixels. Naturally, $\lambda$ depends on details of how the screens are manufactured. In an effort to reduce the expected number of dead pixels, six assembly lines were set up, each with a different version of the manufacturing process. A random sample of 50 phones was taken from each assembly line and sent to the lab for testing. Mysteriously, three phones from one assembly line disappeared in transit, and 15 phones from another assembly line disappeared. Sample sizes and sample mean numbers of dead pixels appear in the table below. \begin{verbatim} Manufacturing Process 1 2 3 4 5 6 ----------------------------------------- ybar 10.68 9.87234 9.56 8.52 10.48571 9.98 n 50 47 50 50 35 50 ----------------------------------------- \end{verbatim} \begin{enumerate} \item What is the parameter space $\Omega$? \item We want to know whether the expected number of dead pixels is different for the six manufacturing processes. What is the null hypothesis, in symbols? Use Greek letters. \item What is the alternative hypothesis? \item What is $\Omega_0$? \item What is $\Omega_1$? \item What is $\widehat{\theta}$? The answer is numerical. Note that it is a point in the parameter space. \item What is $\widehat{\theta}_0$? The answer is numerical. Like $\widehat{\theta}$, it is a point in the parameter space. \item Give a formula for $G^2$. Keep simplifying! \item Calculate $G^2$ for the data above. \item What are the degrees of freedom? \item What is the critical value at $\alpha = 0.05$? \item Do you reject $H_0$? Answer Yes or No. \item Is a directional conclusion possible here? Answer Yes or No. (In practice we would follow up with tests comparing all $\binom{7}{2}$ pairs of means.) \end{enumerate} \end{enumerate} % End of all the questions %\vspace{90mm} \vspace{3mm} \hrule %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \vspace{3mm} \noindent This assignment was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Mathematical and Computational Sciences, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \begin{center} \href{http://www.utstat.toronto.edu/~brunner/oldclass/260s20} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/260s20}} \end{center} \end{document} # Simulate Poisson distributed dead pixels. rm(list=ls()); set.seed(9999) n = c(50,47,50,50,35,50); p = length(n); N = sum(n) lambda = c(10,10,10,9,10,10) ybar = numeric(p) for(j in 1:p) ybar[j] = mean(rpois(n[j],lambda[j])) rbind(ybar,n) # Likelihood ratio test YBAR = sum(n*ybar)/N; YBAR G2 = 2 * ( sum(n*ybar*log(ybar)) - N*YBAR*log(YBAR) ); G2 pval = 1 - pchisq(G2,p-1); pval # G^2 = 14.70682, p = 0.01169133 # Wald test LL = rbind(c(1,-1, 0, 0, 0, 0), c(0, 1,-1, 0, 0, 0), c(0, 0, 1,-1, 0, 0), c(0, 0, 0, 1,-1, 0), c(0, 0, 0, 0, 1,-1) ) Vhat = diag(ybar/n) source("http://www.utstat.utoronto.ca/~brunner/Rfunctions/Wtest.txt") Wtest(LL,ybar,Vhat) # W df p-value # 15.255657486 5.000000000 0.009324025 # Bonferroni multiple comparisons testmatrix = diag(1,6) # Start with a 6x6 identity matrix. for(i in 1:(p-1)) { for(j in (i+1):p) { LL = rbind(c(0,0,0,0,0,0)) LL[i]=1; LL[j]=-1 print(LL) # Just to check W = Wtest(L=LL,Tn=ybar,Vn=Vhat) testmatrix[i,j] = W[1]; testmatrix[j,i]=min(W[3]*p*(p-1)/2,1) # Putting corrected p-values in lower diagonal } # Next j } # Next i # Test statistics (chisq with 1 df) are in the upper triangle, # Bonferroni-corrected p-values in lower round(testmatrix,4)