% 260s20Assignment.tex     Frequentist estimation
\documentclass[12pt]{article} 
%\usepackage{amsbsy} % for \boldsymbol and \pmb 
%\usepackage{graphicx} % To include pdf files!
\usepackage{amsmath}
\usepackage{amsbsy}
\usepackage{amsfonts}
\usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue, citecolor=blue, urlcolor=blue]{hyperref} % For links
\usepackage{comment}
%\usepackage{fullpage}
\oddsidemargin=0in                  % Good for US Letter paper
\evensidemargin=0in
\textwidth=6.3in
\topmargin=-1in
\headheight=0.2in
\headsep=0.5in
\textheight=9.4in

%\pagestyle{empty} % No page numbers

\begin{document}
%\enlargethispage*{1000 pt} 

\begin{center}   
{\Large \textbf{STA 260s20 Assignment Five: Method of Moments, Least Squares and Maximum Likelihood}}\footnote{Copyright information is at the end of the last page.}
%\vspace{1 mm}
\end{center}

\noindent
These homework problems are not to be handed in.  They are preparation for Quiz 5 (Week of Feb.~24) and Term Test 2. \textbf{Please try each question before looking at the solution}.

In preparation for this assignment, you can actually look at the text, starting with Chapter 6. Read pages 297-302. You can skip Section 6.1.1 (bottom of page 302 to page 308. Later in the course, we will use a different and more traditional definition of a sufficient statistic. Read Section 6.2 (bottom of page 308 to page 317).
 
I have noticed that a major obstacle for many students when doing maximum likelihood calculations is a set of basic mathematical operations they actually know. But the mechanics are rusty, or the notation used in Statistics is troublesome. So, with sincere apologies to those who don't need this, here are some basic rules. 
\begin{itemize}
     \item The distributive law: $a(b+c)=ab+ac$. You may see this in a form like
            \begin{displaymath}
            \theta \sum_{i=1}^n x_i = \sum_{i=1}^n \theta x_i
            \end{displaymath}
     \item Power of a product is the product of powers: $(ab)^c = a^c \, b^c$. You may see this in a form like
            \begin{displaymath}
            \left(\prod_{i=1}^n x_i\right)^\alpha =  \prod_{i=1}^n x_i^\alpha
            \end{displaymath}
     \item Multiplication is addition of exponents: $a^b a^c = a^{b+c}$. You may see this in a form like
            \begin{displaymath}
            \prod_{i=1}^n \theta e^{-\theta x_i} = 
            \theta^n \exp(-\theta \sum_{i=1}^n x_i)
            \end{displaymath}
     \item Powering is multiplication of exponents: $(a^b)^c = a^{bc}$. You may see this in a form like
            \begin{displaymath}
            (e^{\mu t + \frac{1}{2}\sigma^2 t^2})^n = 
            e^{n\mu t + \frac{1}{2}n\sigma^2 t^2}
            \end{displaymath}
     \item Log of a product is sum of logs: $\ln(ab) = \ln(a)+\ln(b)$. You may see this in a form like
            \begin{displaymath}
            \ln \prod_{i=1}^n x_i = \sum_{i=1}^n \ln x_i
            \end{displaymath}
     \item Log of a power is the exponent times the log: $\ln(a^b)=b\,\ln(a)$. You may see this in a form like
            \begin{displaymath}
            \ln(\theta^n) = n \ln \theta
            \end{displaymath}
     \item The log is the inverse of the exponential function: $\ln(e^a) = a$. You may see this in a form like
            \begin{displaymath}
            \ln\left(  \theta^n \exp(-\theta \sum_{i=1}^n x_i) \right) =
            n \ln \theta - \theta \sum_{i=1}^n x_i
            \end{displaymath}
\end{itemize}

\pagebreak


%\vspace{5mm}
\begin{enumerate} 


%%%%%%%%% Max Likelihood by hand - Review (See openSEM text for exposition) %%%%%%%%%%%%%%

\item Choose the correct answer.
    \begin{enumerate}
    \item $\prod_{i=1}^n e^{x_i}=$
        \begin{enumerate}
            \item $\exp(\prod_{i=1}^n x_i)$
            \item $e^{nx_i}$
            \item $\exp(\sum_{i=1}^n x_i)$  %
        \end{enumerate}

    \item $\prod_{i=1}^n \lambda e^{-\lambda x_i}=$
        \begin{enumerate}
            \item $\lambda e^{-\lambda^n x_i}$
            \item $\lambda^n e^{-\lambda n x_i}$
            \item $\lambda^n \exp(-\lambda \sum_{i=1}^n x_i)$  %
            \item $\lambda^n \exp(-n\lambda \sum_{i=1}^n x_i)$
            \item $\lambda^n \exp(-\lambda^n \sum_{i=1}^n x_i)$
        \end{enumerate}

    \item $\prod_{i=1}^n a_i^b=$
        \begin{enumerate}
            \item $n a_i^b$
            \item $a_i^{nb}$
            \item $(\prod_{i=1}^n a_i)^b$  %
        \end{enumerate}

    \item $\prod_{i=1}^n a^{b_i}=$
        \begin{enumerate}
            \item $n a^{b_i}$
            \item $a^{n b_i}$
            \item $\sum_{i=1}^n a^{b_i}$
            \item {\Large$a^{\prod_{i=1}^n b_i}$} 
            \item {\Large$a^{\sum_{i=1}^n b_i}$}  %
        \end{enumerate}

    \item $\left( e^{\lambda(e^t-1)} \right)^n = $
        \begin{enumerate}
            \item $n e^{\lambda(e^t-1)}$
            \item $e^{n\lambda(e^t-1)}$  %
            \item $e^{\lambda(e^{nt}-1)}$
            \item $e^{n\lambda(e^{t}-n)}$
        \end{enumerate}

    \item $\left(\prod_{i=1}^n e^{-\lambda x_i}\right)^2=$
        \begin{enumerate}
            \item $e^{-2n\lambda x_i}$
            \item $e^{-2\lambda \sum_{i=1}^n x_i}$  %
            \item $2e^{-\lambda \sum_{i=1}^n x_i}$
        \end{enumerate}
    \end{enumerate}

\pagebreak

    \item True, or False?
        \begin{enumerate}
            \item $\sum_{i=1}^n \frac{1}{x_i} =  \frac{1}{\sum_{i=1}^n x_i}$  %  F
            \item $\prod_{i=1}^n \frac{1}{x_i} =  \frac{1}{\prod_{i=1}^n x_i}$  %  T
            \item $\frac{a}{b+c}=\frac{a}{b}+\frac{a}{c}$  %  F
            \item $\ln(a+b) = \ln(a) + \ln(b)$  %  F
            \item $e^{a+b} = e^a + e^b$  %  F
            \item $e^{a+b} = e^a  e^b$  %  T
            \item $e^{ab} = e^a  e^b$  %  F
            \item $\prod_{i=1}^n (x_i+y_i) = \prod_{i=1}^n x_i +  \prod_{i=1}^n y_i$  %  F
            \item $\ln (\prod_{i=1}^n a_i^b) = b \sum_{i=1}^n \ln(a_i)$  %  T
            \item $\sum_{i=1}^n \prod_{j=1}^n a_j = n \prod_{j=1}^n a_j$  %  T
            \item $\sum_{i=1}^n \prod_{j=1}^n a_i = \sum_{i=1}^n a_i^n$  %  T
            \item $\sum_{i=1}^n \prod_{j=1}^n a_{i,j} = 
                   \prod_{j=1}^n \sum_{i=1}^n a_{i,j}$  %  F
        \end{enumerate}

    \item Simplify as much as possible.
        \begin{enumerate}
            \item  $\ln \prod_{i=1}^n \theta^{x_i} (1-\theta)^{1-{x_i}}$
            \item  $\ln \prod_{i=1}^n \binom{m}{{x_i}} \theta^{x_i} (1-\theta)^{m-x_i}$
            \item  $\ln \prod_{i=1}^n \frac{e^{-\lambda}\lambda^{x_i}}{x_i!}$
            \item  $\ln \prod_{i=1}^n \theta (1-\theta)^{x_i-1}$
            \item  $\ln \prod_{i=1}^n \frac{1}{\theta} e^{-x_i/\theta}$
            \item  $\ln \prod_{i=1}^n \frac{1}{\beta^\alpha \Gamma(\alpha)} 
                    e^{-x_i/\beta} x_i^{\alpha - 1}$
            \item  $\ln \prod_{i=1}^n \frac{1}{2^{\nu/2}\Gamma(\nu/2)}
                    e^{-x_i/2} x_i^{\nu/2 - 1}$
            \item  $\ln \prod_{i=1}^n \frac{1}{\sigma \sqrt{2\pi}}
                    e^{-\frac{(x_i-\mu)^2}{2 \sigma^2}}$
            \item  $\prod_{i=1}^n \frac{1}{\beta-\alpha} 
                    I(\alpha \leq x_i \leq \beta)$ (Express in terms of the minimum and maximum $y_1$ and $y_n$.)
        \end{enumerate} 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% End of math review %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\item Let $X_1, \ldots, X_n$ be a random sample (that is, independent and identically distributed) from a Poisson distribution with parameter $\lambda>0$. The sample mean for a sample of $n=49$ is $\overline{x} = 4.2$.
    \begin{enumerate}
        \item Derive a formula for $\widehat{\lambda}$, the maximum likelihood estimate of $\lambda$.
        \item Carry out the second derivative test.
        \item Give a point estimate of $\lambda$. Your answer is a number.
        \item Give a 95\% confidence interval for $\lambda$. The answer is a pair of numbers. My lower confidence limit is 3.63.
    \end{enumerate} % End of Poisson.


\item Do Exercises 6.2.2 and 6.2.3 in the text. % Bernoulli and invariance.

\item 
    \begin{enumerate}
        \item Do Exercise 6.2.5 in the text. Note that $\alpha_0$ is known. % Gamma(alpha_0,lambda)
        \item Suppose $\alpha_0 = 5$. Calculate your estimate for the following data. The answer is a number. Circle your answer. \texttt{Data: 6.51 3.09 2.87 1.35} % 5/mean(c(6.51, 3.09, 2.87, 1.35)) = 1.447
    \end{enumerate}

\item Let $X_1, \ldots, X_n$ be a random sample from a distribution with probability mass function 
 $p(x|\theta)=\theta(1-\theta)^{x-1}I(x=1, 2, \ldots)$, where $0<\theta<1$. % Geometric starting at one.
    \begin{enumerate}
        \item Derive a general expression for the Maximum Likelihood Estimator (MLE). Carry out the second derivative test to make sure you really have a maximum. Circle your final answer.
        \item Use these data to calculate a numerical estimate: \texttt{5,  1,  2,  1,  2,  4,  3, 17,  4,  1,  5,  4,  7, 17,  1,  1,  2,  2,  7, 11}. Answer: 0.2061856   
    \end{enumerate}

\item Let $X_1, \ldots, X_n$ be a random sample from a distribution with density 
$f(x|\theta) = \frac{1}{\theta} e^{-x/\theta}I(x>0)$, where $\theta>0$.
    \begin{enumerate}
        \item Derive a general expression for the Maximum Likelihood Estimator (MLE). Carry out the second derivative test to make sure you really have a maximum. Circle your final answer.
        \item Use these data to calculate a numerical estimate: \texttt{0.28, 1.72, 0.08, 1.22, 1.86, 0.62, 2.44, 2.48, 2.96} Answer: 1.517778  % Exponential, true theta=2, thetahat = xbar 
    \end{enumerate}

% Shifted exponential(theta=6) 
\item Let $X_1, \ldots, X_n$ be a random sample from a distribution with density 
$f(x|\theta) = 2e^{-2(x-\theta)}I(x \geq \theta)$.
    \begin{enumerate}
        \item Derive a general expression for a Method of Moments Estimator (MOM).
        \item Derive a general expression for the Maximum Likelihood Estimator (MLE).
        \item Is the MLE unbiased? Do the calculation and answer Yes or No.
        \item Is the MLE consistent? Do the calculation and answer Yes or No. 
        \item Calculate both your estimates for the following data: \texttt{ 8.51, 6.11, 6.25, 6.13, 8.43, 6.34, 6.49}. % mean(x2) = 6.894286, min(x2) = 6.11
    \end{enumerate}

\item Let $X_1, \ldots, X_n$ be a random sample from Normal$(\mu,\sigma^2)$ distribution, with both parameters unknown. Find the Maximum Likelihood Estimators. Make sure you prove that the likelihood function really has a maximum at that point.

\item Let $X_1, \ldots, X_n$ be a random sample from a Uniform$(L,R)$ distribution. Find the Maximum Likelihood Estimators of $L$ and $R$.

\item Do Exercise 6.2.11. Delete the final words ``in terms of the likelihood function."

\item Do Exercises 6.2.12,            % N(mu_0,sigma^2) 
                  6.2.14              % Evaluate at local maxima
and               6.2.16 in the text. % Not P(\theta=2)

\pagebreak %%%%%%%%%%%%%%%%%%%%%%%% Least Squares %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

{\small
\item Let $X_1, \ldots, X_n$ be independent random variables, with $X_i \sim$ Binomial$(m_i,\theta)$. The unknown parameter is $\theta$, while $m_1, \ldots, m_n$ are fixed, observable constants. They are all integers greater than or equal to one.
    \begin{enumerate}
        \item Derive the least-squares estimator of $\theta$. Show your work. The final answer is a formula.
        \item Is the least-squares estimator unbiased? Answer Yes or No and show your work.
        \item Derive the Maximum Likelihood Estimator of $\theta$. Show your work; don't forget the second derivative test. The final answer is a formula.
        \item Is the Maximum Likelihood Estimator unbiased? Answer Yes or No and show your work.
        \item Show that the Maximum Likelihood Estimator is consistent. Use the fact that $m_i \geq 1$ and squeeze.
        \item Calculate both estimates for the following data: \\
\begin{tabular}{c | rrrrrrrrrr}
$m$ &  10  & 13  &  5  & 18  & 17  &  6 &  14  &  5 &  10  &  14  \\ \hline  
$x$ &   4  &  7  &  0  & 14  &  6  &  4 &   5  &  4 &   3  &   4  \\
\end{tabular}
    \end{enumerate}
} % End size


\item \label{simplereg} For $i=1, \ldots, n$, let $Y_i = \beta x_i + E_i$, where
    \begin{itemize}
        \item[] $x_1, \ldots, x_n$ are fixed, observable constants like drug doses.
        \item[] $E_1, \ldots, E_n$ are independent random variables with expected value zero and variance $\sigma^2$.
        \item[] $\beta$ and $\sigma^2$ are unknown constants (parameters).
    \end{itemize}

    \begin{enumerate}
        \item What is $E(Y_i)$?
        \item What is $Var(Y_i)$?
        \item Derive a formula for the least squares estimate of $\beta$.
        \item Is this least squares estimate unbiased? Answer Yes or No and show your work.
        \item Assuming that $\lim_{n \rightarrow\infty} \frac{1}{\sum_{i=1}^n x_i^2} = 0$, show that $\widehat{\beta}_n$ is consistent.
        \item \label{regdata} Calculate the least squares estimate of $\beta$ for the following data:
\begin{tabular}{l | ccccc}
$x$ &  1  &  6  &  3  &  7  &  2  \\ \hline
$y$ &  6  & 37  & 12  & 52  &  4  \\
\end{tabular}
        \item Now assume that $E_i \sim$ Normal$(0,\sigma^2)$; this is almost universal in statistics courses, software and applications. 
                \begin{enumerate}
                    \item What is the distribution of $Y_i$? You should be able to just write down the answer.
                    \item Find the MLEs of $\beta$ and $\sigma^2$. Show your work.
                    \item Using the fact that linear combinations of independent normals are normal, what is the distribution of $\widehat{\beta}$, including the parameters?
                    \item Assume that $\sigma^2 = \sigma^2_0$ is fixed and known. Derive a $(1-\alpha)100\%$ confidence interval for $\beta$. (In reality, $\sigma^2$ is never known and you would use the $t$ distribution, but that's a longer story.)
                    \item Assuming $\sigma^2=4$, calculate your confidence interval for the data of Question~(\ref{regdata}). The answer is two numbers, a lower confidence limit and an upper confidence limit. 
                \end{enumerate}
    \end{enumerate}

\pagebreak %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\item Consider the model of Question \ref{simplereg}, except that now there is an unknown intrcept:  $Y_i = \beta_0 + \beta_1 x_i + E_i$. The unknown parameters are $\beta_0$, $\beta_1$ and $\sigma^2$.  
    \begin{enumerate}
        \item Obtain the least-squares estimates of  $\beta_0$ and $\beta_1$. Show your work. Your answer is two formulas.
        \item Suppose $E_i \sim N(0,\sigma^2)$. Show that the MLEs of  $\beta_0$ and $\beta_1$ are the same as the least-squares estimates. 
    \end{enumerate}




\end{enumerate} % End of all the questions

% \vspace{90mm}

\vspace{3mm}
\hrule %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\vspace{3mm}

\noindent
This assignment was prepared by  \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner},
Department of Mathematical and Computational Sciences, University of Toronto. It is licensed under a 
\href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US}
     {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: 
     
\begin{center}
\href{http://www.utstat.toronto.edu/~brunner/oldclass/260s20} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/260s20}}
\end{center}

\end{document}

\begin{verbatim}
# LS
x = rpois(5,10); sort(x)
x = x-5
m = x^2
eps = rpois(5,2)
y = m+eps
cbind(x,m,y); plot(x,y)
rbind(x,y)
xsq = x^2; xy = x*y
cbind(x,y,xsq,xy)
    x  y xsq  xy
[1,] 1  6   1   6
[2,] 6 37  36 222
[3,] 3 12   9  36
[4,] 7 52  49 364
[5,] 2  4   4   8
lm(y ~ 0 + x)
Coefficients:
    x  
6.424  

# More detail

> x = c(1  ,  6  ,  3  ,  7  ,  2)
> y = c(6  , 37  , 12  , 52  ,  4)
> xsq = x^2
> xy = x*y
> d = cbind(x,xsq,y,xy)
> d
     x xsq  y  xy
[1,] 1   1  6   6
[2,] 6  36 37 222
[3,] 3   9 12  36
[4,] 7  49 52 364
[5,] 2   4  4   8
> apply(d,2,sum)
  x xsq   y  xy 
 19  99 111 636 
> 636/99
[1] 6.424242

\end{verbatim}
\begin{verbatim}
# Binomial!
set.seed(999)
urn = 4:20; n = 10; theta = 0.5
m = sample(urn,n,replace=TRUE); m
x = rbinom(n,m,theta)
rbind(m,x)


msq = m^2
mx = m*x
dat = cbind(m,x,msq,mx)
apply(dat,2,sum)

> dat
       m  x msq  mx
 [1,] 10  4 100  40
 [2,] 13  7 169  91
 [3,]  5  0  25   0
 [4,] 18 14 324 252
 [5,] 17  6 289 102
 [6,]  6  4  36  24
 [7,] 14  5 196  70
 [8,]  5  4  25  20
 [9,] 10  3 100  30
[10,] 14  4 196  56
> help(apply)
> apply(dat,2,sum)
   m    x  msq   mx 
 112   51 1460  685 

\end{verbatim}