% \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout mode to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Suppress navigation symbols % \usetheme{Berlin} % Displays sections on top \usetheme{Frankfurt} % Displays section titles on top: Fairly thin but still swallows some material at bottom of crowded slides %\usetheme{Berkeley} \usepackage[english]{babel} \usepackage{amsmath} % for binom % \usepackage{graphicx} % To include pdf files! % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode \title{Double Measurement Regression Part One: A small example\footnote{See last slide for copyright information.}} \subtitle{STA 2101 Fall 2019} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} % \begin{frame} % \frametitle{Overview} % \tableofcontents % \end{frame} \begin{frame} \frametitle{Seeking identifiability} %\framesubtitle{} We have seen that in simple regression, parameters of a model with measurement error are not identifiable. \pause \begin{eqnarray*} Y_i &=& \beta_0 + \beta_1 X_i + \epsilon_i \\ W_i &=& \nu + X_i + e_i, \end{eqnarray*} \pause \vspace{5mm} \begin{itemize} \item For example, $X$ might be number of acres planted and $Y$ might be crop yield. \pause \item Plan the statistical analysis in advance. \pause \item Take 2 independent measurements of the explanatory variable. \pause \item Say, farmer's report and satellite photograph. \end{itemize} \end{frame} \begin{frame} \frametitle{Double measurement} \framesubtitle{Of the explanatory variable} \begin{center} \includegraphics[width=3in]{DoublePath1} \end{center} \end{frame} \begin{frame} \frametitle{Model} %\framesubtitle{Could have written this down based on the path diagram } Independently for $i=1, \ldots, n$, let \begin{eqnarray*} W_{i,1} &=& \nu_1 + X_i + e_{i,1} \\ W_{i,2} &=& \nu_2 + X_i + e_{i,2} \\ Y_{i~~} &=& \beta_0 + \beta_1 X_i + \epsilon_i , \end{eqnarray*} where \pause \begin{itemize} \item $X_i$ is normally distributed with mean $\mu_x$ and variance $\phi>0$ \pause \item $\epsilon_i$ is normally distributed with mean zero and variance $\psi>0$ \pause \item $e_{i,1}$ is normally distributed with mean zero and variance $\omega_1>0$ \pause \item $e_{i,2}$ is normally distributed with mean zero and variance $\omega_2>0$ \pause \item $X_i, e_{i,1}, e_{i,2}$ and $\epsilon_i$ are all independent. \end{itemize} \end{frame} \begin{frame} \frametitle{Does this model pass the test of the Parameter Count Rule?} %\framesubtitle{} \begin{eqnarray*} W_{i,1} &=& \nu_1 + X_i + e_{i,1} \\ W_{i,2} &=& \nu_2 + X_i + e_{i,2} \\ Y_{i~~} &=& \beta_0 + \beta_1 X_i + \epsilon_i , \end{eqnarray*} \pause \begin{itemize} \item[] $\boldsymbol{\theta} = (\nu_1, \nu_2, \beta_0, \mu_x, \beta_1, \phi, \psi, \omega_1, \omega_2)$: 9 parameters. \pause \item Three expected values, three variances and three covariances: 9 moments. \pause \item Yes. There are nine moment structure equations in nine unknown parameters. Identifiability is possible, but not guaranteed. \end{itemize} \end{frame} \begin{frame} \frametitle{What is the distribution of the sample data?} \framesubtitle{Calculate the moments as a function of the model parameters} \pause The model implies that the triples $\mathbf{D}_i = (W_{i,1},W_{i,2},Y_i)^\top$ are independent multivarate normal with \pause \begin{displaymath} E(\mathbf{D}_i) = E\left( \begin{array}{c} W_{i,1} \\ W_{i,1} \\ Y_i \end{array} \right) = \left( \begin{array}{c} \mu_1 \\ \mu_2 \\ \mu_3 \end{array} \right) = \left( \begin{array}{c} \mu_x+\nu_1 \\ \mu_x+\nu_2 \\\beta_0 + \beta_1\mu_x \end{array} \right), \end{displaymath} \pause and variance covariance matrix $cov(\mathbf{D}_i) = \boldsymbol{\Sigma} = $ \begin{displaymath} \left( \begin{array}{c c c } \sigma_{11} & \sigma_{12} & \sigma_{13} \\ & \sigma_{22} & \sigma_{23} \\ & & \sigma_{33} \\ \end{array} \right) = \pause \left( \begin{array}{c c c} \phi+\omega_1 & \phi & \beta_1 \phi \\ & \phi+\omega_2 & \beta_1 \phi \\ & & \beta_1^2 \phi + \psi \end{array} \right). \end{displaymath} \end{frame} \begin{frame} \frametitle{Are the parameters \emph{in the covariance matrix} identifiable?} \framesubtitle{Six equations in five unknowns} \begin{displaymath} \left( \begin{array}{c c c } \sigma_{11} & \sigma_{12} & \sigma_{13} \\ & \sigma_{22} & {\color{blue} \sigma_{23} } \\ & & \sigma_{33} \\ \end{array} \right) = \left( \begin{array}{c c c} \phi+\omega_1 & \phi & \beta_1 \phi \\ & \phi+\omega_2 & {\color{blue} \beta_1 \phi } \\ & & \beta_1^2 \phi + \psi \end{array} \right). \end{displaymath} \pause \begin{eqnarray*} \phi & = & \sigma_{12} \\ \pause \omega_1 & = & \sigma_{11} - \sigma_{12} \\ \pause \omega_2 & = & \sigma_{22} - \sigma_{12} \\ \pause \beta_1 & = & \frac{\sigma_{13}}{\sigma_{12}} \\ \pause \psi & = & \sigma_{33} - \beta_1^2 \phi = \sigma_{33} - \frac{\sigma_{13}^2}{\sigma_{12}} \end{eqnarray*} Yes. \end{frame} \begin{frame} \frametitle{What about the expected values?} \pause %\framesubtitle{} Model equations again: {\scriptsize \begin{eqnarray*} W_{i,1} &=& \nu_1 + X_i + e_{i,1} \\ W_{i,2} &=& \nu_2 + X_i + e_{i,2} \\ Y_{i~~} &=& \beta_0 + \beta_1 X_i + \epsilon_i , \end{eqnarray*} \pause } % End size Expected values: \begin{eqnarray*} \mu_1 & = & \nu_1+\mu_x \\ \mu_2 & = & \nu_2+\mu_x \\ \mu_3 & = & \beta_0 + \beta_1\mu_x \\ \pause \end{eqnarray*} {\small Four parameters appear only in the expected values: $\nu_1,\nu_2,\mu_x,\beta_0$. \pause \begin{itemize} \item Three equations in four unknowns, even with $\beta_1$ identified from the covariance matrix. \pause \item Parameter count rule applies. \pause \item But we don't need it because these are linear equations. \pause \item Re-parameterize. \end{itemize} } % End size \end{frame} \begin{frame} \frametitle{Re-parameterize} \framesubtitle{$\mu_1 = \nu_1+\mu_x$ ~~~ $\mu_2 = \nu_2+\mu_x $ ~~~ $\mu_3 = \beta_0 + \beta_1\mu_x$} \pause {\footnotesize \begin{itemize} \item Absorb $\nu_1, \nu_2, \mu_x, \beta_0$ into $\boldsymbol{\mu}$. \pause \item Parameter was $\boldsymbol{\theta} = (\nu_1, \nu_2, \beta_0, \mu_x, ~ \beta_1, \phi, \psi, \omega_1, \omega_2)$ \item Now it's $\boldsymbol{\theta} = (\mu_1, \mu_2, \mu_3, ~ \beta_1, \phi, \psi, \omega_1, \omega_2)$. \pause \item Dimension of the parameter space is now one less. \pause \item We haven't lost much. \item Especially because the model was already re-parameterized. \pause \item Of course there is measurement error in $Y$. Recall \pause \end{itemize} \begin{eqnarray*} Y &=& \beta_0 + \beta_1 X + \epsilon \\ V &=& \nu_0 + Y + e \\ \pause &=& \nu_0 + (\beta_0 + \beta_1 X + \epsilon) + e \\ \pause &=& (\nu_0 + \beta_0) + \beta_1 X + (\epsilon + e) \\ \pause &=& \beta_0^\prime + \beta X + \epsilon^\prime \end{eqnarray*} } % End size \end{frame} \begin{frame} \frametitle{Re-parameterization} %\framesubtitle{} \begin{itemize} \item Re-parameterization makes maximum likelihood possible. \pause \item Otherwise the maximum is not unique and it's a mess. \pause \item Estimate $\boldsymbol{\mu}$ with $\overline{\mathbf{D}}$ and it simply disappears \pause from {\footnotesize \begin{displaymath} L(\boldsymbol{\mu,\Sigma}) = |\boldsymbol{\Sigma}|^{-n/2} (2\pi)^{-np/2} \exp -\frac{n}{2}\left\{ tr(\boldsymbol{\widehat{\Sigma}\Sigma}^{-1}) + (\overline{\mathbf{D}}-\boldsymbol{\mu})^\top \boldsymbol{\Sigma}^{-1} (\overline{\mathbf{D}}-\boldsymbol{\mu}) \right\} \end{displaymath} \pause } % End size \item This step is so common it becomes silent. \pause \item Model equations are often written without intercepts. \pause \item It's more compact, and we don't have to look at parameters we can't estimate anyway. \end{itemize} \end{frame} \begin{frame} \frametitle{Back to the covariance structure equations} %\framesubtitle{Six equations in five unknowns} \begin{displaymath} \left( \begin{array}{c c c } \sigma_{11} & \sigma_{12} & \sigma_{13} \\ & \sigma_{22} & \sigma_{23} \\ & & \sigma_{33} \\ \end{array} \right) = \left( \begin{array}{c c c} \phi+\omega_1 & \phi & \beta_1 \phi \\ & \phi+\omega_2 & \beta_1 \phi \\ & & \beta_1^2 \phi + \psi \end{array} \right). \end{displaymath} \pause \begin{itemize} \item Notice that the model dictates $\sigma_{1,3}=\sigma_{2,3}$. \pause \item There are two ways to solve for $\beta_1$: \\ \pause $\beta_1=\frac{\sigma_{13}}{\sigma_{12}}$ and $\beta_1=\frac{\sigma_{23}}{\sigma_{12}}$. \pause \item Does this mean the solution for $\beta_1$ is not ``unique?" \pause \item No; everything is okay. Because $\sigma_{1,3}=\sigma_{2,3}$, the two solutions are actually the same. \pause \item If a parameter can be recovered from the moments in any way at all, it is identifiable. \end{itemize} \end{frame} \begin{frame} \frametitle{Testing goodness of fit.} \pause %\framesubtitle{Six equations in five unknowns} \begin{displaymath} \left( \begin{array}{c c c } \sigma_{11} & \sigma_{12} & \sigma_{13} \\ & \sigma_{22} & \sigma_{23} \\ & & \sigma_{33} \\ \end{array} \right) = \left( \begin{array}{c c c} \phi+\omega_1 & \phi & \beta_1 \phi \\ & \phi+\omega_2 & \beta_1 \phi \\ & & \beta_1^2 \phi + \psi \end{array} \right). \end{displaymath} \pause \begin{itemize} \item $\sigma_{1,3}=\sigma_{2,3}$ is a \emph{model-induced constraint} upon $\boldsymbol{\Sigma}$. \pause \item It's a testable null hypothesis. \pause \item If rejected, the model is called into question. \pause \item Likelihood ratio test comparing this model to a completely unrestricted multivariate normal model: \pause {\footnotesize \begin{displaymath} G^2 = -2\log\frac{ L\left(\overline{\mathbf{D}},\boldsymbol{\Sigma}(\widehat{\boldsymbol{\theta}})\right) } {L(\overline{\mathbf{D}},\widehat{\boldsymbol{\Sigma}})} \end{displaymath} \pause } % End size \item Widely used even if the data are not normal. \end{itemize} \end{frame} \begin{frame} \frametitle{The Reproduced Covariance Matrix} %\framesubtitle{} \begin{itemize} \item $\boldsymbol{\Sigma}(\widehat{\boldsymbol{\theta}})$ is called the \emph{reproduced covariance matrix}. \pause \item It is the covariance matrix of the observable data, written as a function of the model parameters and evaluated at the MLE. \pause \begin{displaymath} \boldsymbol{\Sigma}(\widehat{\boldsymbol{\theta}}) = \left( \begin{array}{c c c} \widehat{\phi}+\widehat{\omega}_1 & \widehat{\phi} & \widehat{\beta}_1 \widehat{\phi} \\ & \widehat{\phi}+\widehat{\omega}_2 & \widehat{\beta}_1 \widehat{\phi} \\ & & \widehat{\beta}_1^2 \widehat{\phi} + \widehat{\psi} \end{array} \right) \end{displaymath} \pause \item The reproduced covariance matrix obeys all model-induced constraints, while $\widehat{\boldsymbol{\Sigma}}$ does not. \pause \item But if the model is right they should be close. \pause \item This is a way to think about the likelihood ratio test for goodness of fit. \end{itemize} \end{frame} \begin{frame} \frametitle{General pattern for testing goodness of fit} \pause % \framesubtitle{} \begin{itemize} \item Suppose there are $k$ moment structure equations in $p$ parameters, and all the parameters are identifiable. \pause \item If $p 0\\ \pause \omega_1 & = & \sigma_{11} - \sigma_{12} > 0\\ \pause \omega_2 & = & \sigma_{22} - \sigma_{12} > 0\\ \pause \psi & = & \sigma_{33} - \frac{\sigma_{13}^2}{\sigma_{12}} > 0 \end{eqnarray*} \end{frame} \begin{frame} \frametitle{Inequality constraints} \pause %\framesubtitle{} \begin{itemize} \item Inequality constraints arise because variances are positive. \pause \item Or more generally, covariance matrices are positive definite. \pause \item Could inequality constraints be violated in numerical maximum likelihood? \pause \item Definitely. \pause \item But only a little by sampling error if the model is correct. \pause \item So maybe it's not so dumb to test hypotheses like $H_0: \omega_1=0$. \pause \item Since the model says $\omega_1 = \sigma_{11} - \sigma_{12}$ \pause and $\sigma_{11} - \sigma_{12} > 0$ might not be true. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistical Sciences, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/2101f19} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/2101f19}} \end{frame} \end{document} # Simulate some data for a simple example set.seed(9999) n = 150; beta1 = 1; phi=1; psi = 9; omega1=1; omega2=1 X = rnorm(n,10,sqrt(phi)); epsilon = rnorm(n,0,sqrt(psi)) e1 = rnorm(n,0,sqrt(omega1)); e2 = rnorm(n,0,sqrt(omega2)) Y = 3 + beta1*X + epsilon W1 = X + e1; W2 = X + e2 datta = round(cbind(W1,W2,Y),2) cor(datta) summary(lm(Y~X)) summary(lm(Y~W1+W2)) rnorm(n,0,sqrt()) ########### Nicely ambiguous traditional output ########### > cor(datta) W1 W2 Y W1 1.0000000 0.5748331 0.1714324 W2 0.5748331 1.0000000 0.1791539 Y 0.1714324 0.1791539 1.0000000 > summary(lm(Y~X)) Call: lm(formula = Y ~ X) Residuals: Min 1Q Median 3Q Max -6.8778 -2.0571 -0.0718 2.1200 7.4284 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 3.8122 2.5348 1.504 0.134723 X 0.9288 0.2521 3.684 0.000322 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 3.096 on 148 degrees of freedom Multiple R-squared: 0.08398, Adjusted R-squared: 0.07779 F-statistic: 13.57 on 1 and 148 DF, p-value: 0.0003218 > summary(lm(Y~W1+W2)) Call: lm(formula = Y ~ W1 + W2) Residuals: Min 1Q Median 3Q Max -7.6711 -2.3889 -0.1303 2.3442 7.6879 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 7.9647 2.1148 3.766 0.000239 *** W1 0.2369 0.2282 1.038 0.300870 W2 0.2799 0.2300 1.217 0.225615 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 3.182 on 147 degrees of freedom Multiple R-squared: 0.03918, Adjusted R-squared: 0.02611 F-statistic: 2.997 on 2 and 147 DF, p-value: 0.053 > > W = W1+W2 > summary(lm(Y~W)) Call: lm(formula = Y ~ W) Residuals: Min 1Q Median 3Q Max -7.6787 -2.3707 -0.1431 2.3242 7.6763 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 7.9721 2.1066 3.784 0.000223 *** W 0.2583 0.1052 2.454 0.015280 * --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 3.171 on 148 degrees of freedom Multiple R-squared: 0.0391, Adjusted R-squared: 0.03261 F-statistic: 6.023 on 1 and 148 DF, p-value: 0.01528