% Based on 431f17TRy1.tex % \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout mode to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Suppress navigation symbols % \usetheme{Berlin} % Displays sections on top \usetheme{Frankfurt} % Displays section titles on top: Fairly thin but still swallows some material at bottom of crowded slides %\usetheme{Berkeley} \usepackage[english]{babel} \usepackage{amsmath} % for binom \usepackage{graphpap} % \usepackage{graphicx} % To include pdf files! % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode \title{Measurement Error in the Response Variable\footnote{See last slide for copyright information.}} \subtitle{STA 2101 Fall 2019} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} %\begin{frame} %\frametitle{Overview} %\tableofcontents %\end{frame} %\section{} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Ignoring measurement error} %\framesubtitle{} \begin{itemize} \item We have seen that ignoring measurement error in the explanatory variables can lead to disaster. \item What about measurement error in the response variable? \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Example of Measurement Error in $Y$ only} \framesubtitle{$X$ could be drug dose, $Y$ could be true anxiety, $V$ could be reported anxiety} \pause Independently for $i=1, \ldots,n$, let \begin{eqnarray*} Y_i &=& \beta_0 + \beta_1 X_i + \epsilon_i \\ V_i &=& \nu + Y_i + e_i, \end{eqnarray*} where $Var(X_i)=\sigma^2_x$, $Var(e_i)=\sigma^2_e$, $Var(\epsilon_i)=\sigma^2_\epsilon$, and $X_i, e_i, \epsilon_i$ are all independent. \pause \begin{picture}(100,100)(75,0) % Size of picture (does not matter), origin \put(197,000){$Y$} \put(202,4){\circle{20}} \put(157,50){\framebox{$X$}} % \put(168,25){{\footnotesize $\beta_1$}} % Label the arrow X -> Y \put(182,30){{\footnotesize $\beta_1$}} % Label the arrow X -> Y \put(235,50){\framebox{$V$}} \put(167,42){\vector(1,-1){25}} % X -> Y \put(212,17){\vector(1,1){25}} % Y -> V \put(240,95){$e$} \put(243,90){\vector(0,-1){25}} % e -> V \put(244,01){$\epsilon$} \put(242,03){\vector(-1,0){25}} % e -> V \end{picture} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Parameters of the true model are not identifiable from the means and covariance matrix} %\framesubtitle{} {\small \begin{eqnarray*} Y_i &=& \beta_0 + \beta_1 X_i + \epsilon_i \\ V_i &=& \nu + Y_i + e_i, \end{eqnarray*} where $Var(X_i)=\sigma^2_x$, $Var(e_i)=\sigma^2_e$, and $Var(\epsilon_i)=\sigma^2_\epsilon$. \pause } % End size \begin{itemize} \item Only the $(X_i,V_i)$ pairs are observable. \item There are 5 moments. \pause \item $\boldsymbol{\theta} = (\beta_0, \beta_1, \mu_x, \sigma^2_x, \sigma^2_\epsilon, \nu, \sigma^2_e)$: 7 parameters \item Fails the test of the parameter count rule. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Ignoring measurement error as usual} \pause %\framesubtitle{} True model: \begin{eqnarray*} Y_i &=& \beta_0 + \beta_1 X_i + \epsilon_i \\ V_i &=& \nu + Y_i + e_i, \end{eqnarray*} Naive model: \pause \begin{displaymath} V_i = \beta_0 + \beta_1 X_i + \epsilon_i \end{displaymath} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Fit the Naive Model, using $V_i$ as the response variable} \framesubtitle{$ V_i = \beta_0 + \beta_1 X_i + \epsilon_i$} \pause First note that under the \emph{true} model, $Cov(X_i,V_i) = \beta_1 \sigma^2_x$ and $Var(X_i) = \sigma^2_x$. \pause \begin{eqnarray*} \widehat{\beta}_1 &=& \frac{\sum_{i=1}^n(X_i-\overline{X})(V_i-\overline{V})} {\sum_{i=1}^n(X_i-\overline{X})^2} \nonumber \\ \pause &=& \frac{\widehat{\sigma}_{x,v}}{\widehat{\sigma}^2_x}\nonumber \\ \pause &\stackrel{a.s.}{\rightarrow}& \frac{Cov(X_i,V_i)}{Var(X_i)} \\ \pause &=& \frac{\beta_1 \sigma^2_x}{\sigma^2_x} \nonumber \\ &=& \beta_1 . \nonumber \end{eqnarray*} \pause So $\widehat{\beta}_1$ is consistent, even though the model is mis-specified. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Why does the naive model work so well?} \pause %\framesubtitle{} \begin{eqnarray*} V_i &=& \nu + Y_i + e_i \pause \\ &=& \nu + (\beta_0 + \beta_1 X_i + \epsilon_i) + e_i \pause \\ &=& (\nu + \beta_0) + \beta_1 X_i + (\epsilon_i + e_i) \pause \\ &=& \beta_0^\prime + \beta_1 X_i + \epsilon_i^\prime \end{eqnarray*} \pause \begin{itemize} \item This is a \emph{re-parameterization}. \pause \item Not a one-to-one re-parameterization -- call it a ``collapsing" re-parameterization. \pause \item The pair $(\nu,\beta_0)$ is absorbed into $\beta_0^\prime$. \pause \item $Var(\epsilon_i + e_i)= \sigma^2_\epsilon + \sigma^2_e$ is absorbed into a single unknown variance that will probably be called $\sigma^2$. \pause \item $\nu$ and $\beta_0$ will never be knowable separately, and also $\sigma^2_\epsilon$ and $\sigma^2_e$ will never be knowable separately. \pause \item It's okay. All we care about is $\beta_1$ anyway. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{This is very common} %\framesubtitle{} \begin{itemize} \item In many models, it will appear that the response variable is being measured without error. \pause \item Of course there really is measurement error in $Y_i$, but it has been absorbed into the error term.\pause \item So any model without measurement error in the response variable should be viewed as a re-parameterized version of a more realistic model. \pause \item The measurement error should be independent of $X$, or there is real trouble. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistical Sciences, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/2101f19} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/2101f19}} \end{frame} \end{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{} %\framesubtitle{} \begin{itemize} \item \item \item \end{itemize} \end{frame} {\LARGE \begin{displaymath} \end{displaymath} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%