% Least Squares Target with Centered Random Explanatory Variables % for Applied Stat I % Notes and comments are after the end of the document % \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout mode to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Suppress navigation symbols % \usetheme{Berlin} % Displays sections on top \usetheme{Frankfurt} % Displays section titles on top: Fairly thin but still swallows some material at bottom of crowded slides %\usetheme{Berkeley} \usepackage[english]{babel} \usepackage{amsmath} % for binom \usepackage{euscript} % for \EuScript % \usepackage{graphicx} % To include pdf files! % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode % \mode{\setbeamercolor{background canvas}{bg=black!5}} % Comment this out for handout \title{Large-sample target of least squares regression\footnote{See last slide for copyright information.}} \subtitle{STA442/2101 Fall 2019} \date{} % To suppress date \begin{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \titlepage \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Overview} \tableofcontents \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{The Centered Model} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{The centered model} \framesubtitle{Explanatory variable values are fixed, for now} \pause %\framesubtitle{} \begin{eqnarray*} y_i & = & \beta_0 + \beta_1 x_{i,1} + \cdots + \beta_k x_{i,k} + \epsilon_i \\ \pause & = & \beta_0 + \beta_1 \overline{x}_1 + \cdots + \beta_{k} \overline{x}_k \\ & & + \beta_1 (x_{i,1}-\overline{x}_1) + \cdots + \beta_{k} (x_{i,k}-\overline{x}_{k}) + \epsilon_i \\ \pause & = & \alpha_0 + \alpha_1 (x_{i,1}-\overline{x}_1) + \cdots + \alpha_{k} (x_{i,k}-\overline{x}_{k}) + \epsilon_i \end{eqnarray*} \pause with \begin{itemize} \item[] $\alpha_0 = \beta_0 + \beta_1 \overline{x}_1 + \cdots + \beta_{k} \overline{x}_k$. \pause \item[] $\alpha_j=\beta_j$ for $j = 1, \ldots, k$ \pause \end{itemize} This re-parameterization is one-to-one. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Invariance Principle} \framesubtitle{MLE of a function is that function of the MLE} \begin{itemize} \item Since $\alpha_j=\beta_j$ for $j = 1, \ldots, k$\pause, have $\widehat{\alpha}_j=\widehat{\beta}_j$ for $j = 1, \ldots, k$. \pause \item Least-squares estimates are the same as MLEs under normality. \pause \item So this conclusion applies to the least-squares estimates. \pause \item When the explanatory variables are centered, the intercept of the least-squares plane changes, but the slopes remain the same. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Estimation} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Least-squares Estimation for the Centered Model} \framesubtitle{Working toward a useful formula} \pause {\Large \begin{displaymath} y_i = \alpha_0 + \beta_1 (x_{i,1}-\overline{x}_1) + \cdots + \beta_{k} (x_{i,k}-\overline{x}_{k}) + \epsilon_i \end{displaymath} \pause } % End size Estimation: \pause \begin{itemize} \item $\widehat{\alpha}_0 = \overline{y}$, regardless of the data. \pause \item $\widehat{\beta}_j$ values are the same as for the uncentered model. \pause \item To find the $\widehat{\beta}_j$ (once you have $\widehat{\alpha}_0 = \overline{y}$) \pause minimize \begin{displaymath} Q(\boldsymbol{\beta}) = \sum_{i=1}^n (y_i - \overline{y} ~ - \beta_1 (x_{i,1}-\overline{x}_1) - \cdots - \beta_{k} (x_{i,k}-\overline{x}_{k})^2 \end{displaymath} \pause \item This is the same as centering $y$ as well as $x$, and fitting a regression through the origin. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Estimation of $\boldsymbol{\beta}$} \pause %\framesubtitle{} \begin{itemize} \item Center explanatory variables \emph{and} the response variable by subtracting off sample means. \pause \item Fit a regression through the origin. \pause \item $\widehat{\boldsymbol{\beta}} = (\mathbf{X}^\top \mathbf{X})^{-1} \mathbf{X}^\top \mathbf{y}$ as usual. \pause \item But now the meaning of the notation is a little different because all the variables are centered. \pause \item Again, this is the same as $\widehat{\boldsymbol{\beta}}$ for the uncentered model. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{$\mathbf{X}^\top \mathbf{X}$ for the centered model} \framesubtitle{$k=3$ example} \pause $\mathbf{X}^\top \mathbf{X} = $ \vspace{3mm} \begin{columns} % Use Beamer's columns to use more of the margins! \column{1.2\textwidth} {\scriptsize \begin{displaymath} \left( \begin{array}{lll} \sum_{i=1}^n (x_{i1}-\overline{x}_1)^2 & \sum_{i=1}^n (x_{i1}-\overline{x}_1)(x_{i2}-\overline{x}_2) & \sum_{i=1}^n (x_{i1}-\overline{x}_1)(x_{i3}-\overline{x}_3) \\ \sum_{i=1}^n (x_{i2}-\overline{x}_2)(x_{i1}-\overline{x}_1) & \sum_{i=1}^n (x_{i2}-\overline{x}_2)^2 & \sum_{i=1}^n (x_{i2}-\overline{x}_2)(x_{i3}-\overline{x}_3)\\ \sum_{i=1}^n (x_{i3}-\overline{x}_3)(x_{i1}-\overline{x}_1) & \sum_{i=1}^n (x_{i3}-\overline{x}_3)(x_{i2}-\overline{x}_2) & \sum_{i=1}^n (x_{i3}-\overline{x}_3)^2 \end{array} \right) \end{displaymath} \pause ~~Multiply and divide by $n$, get \pause \renewcommand{\arraystretch}{1.5} \begin{displaymath} n\left( \begin{array}{lll} \frac{1}{n} \sum_{i=1}^n (x_{i1}-\overline{x}_1)^2 & \frac{1}{n} \sum_{i=1}^n (x_{i1}-\overline{x}_1)(x_{i2}-\overline{x}_2) & \frac{1}{n} \sum_{i=1}^n (x_{i1}-\overline{x}_1)(x_{i3}-\overline{x}_3) \\ \frac{1}{n} \sum_{i=1}^n (x_{i2}-\overline{x}_2)(x_{i1}-\overline{x}_1) & \frac{1}{n} \sum_{i=1}^n (x_{i2}-\overline{x}_2)^2 & \frac{1}{n} \sum_{i=1}^n (x_{i2}-\overline{x}_2)(x_{i3}-\overline{x}_3)\\ \frac{1}{n} \sum_{i=1}^n (x_{i3}-\overline{x}_3)(x_{i1}-\overline{x}_1) & \frac{1}{n} \sum_{i=1}^n (x_{i3}-\overline{x}_3)(x_{i2}-\overline{x}_2) & \frac{1}{n} \sum_{i=1}^n (x_{i3}-\overline{x}_3)^2 \end{array} \right) \end{displaymath} \pause \renewcommand{\arraystretch}{1.0} } % End size \end{columns} {\LARGE \begin{displaymath} = n\widehat{\boldsymbol{\Sigma}}_x \end{displaymath} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{$\mathbf{X}^\top \mathbf{y}$ for the centered model} \framesubtitle{Still for the $k=3$ example} \pause %{\small \renewcommand{\arraystretch}{1.5} \begin{eqnarray*} \mathbf{X}^\top \mathbf{y} & = & \left( \begin{array}{c} \sum_{i=1}^n (x_{i1}-\overline{x}_1)(y_i-\overline{y}) \\ \sum_{i=1}^n (x_{i1}-\overline{x}_1)(y_i-\overline{y}) \\ \sum_{i=1}^n (x_{i1}-\overline{x}_1)(y_i-\overline{y}) \end{array} \right) \\ \pause &&\\ & = & n\left( \begin{array}{c} \frac{1}{n}\sum_{i=1}^n (x_{i1}-\overline{x}_1)(y_i-\overline{y}) \\ \frac{1}{n}\sum_{i=1}^n (x_{i1}-\overline{x}_1)(y_i-\overline{y}) \\ \frac{1}{n}\sum_{i=1}^n (x_{i1}-\overline{x}_1)(y_i-\overline{y}) \end{array} \right) \\ \pause &&\\ & = & n\widehat{\boldsymbol{\Sigma}}_{xy} \end{eqnarray*} \renewcommand{\arraystretch}{1.0} %} % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{$\mathbf{X}^\top \mathbf{X} = n\widehat{\boldsymbol{\Sigma}}_x$ and $\mathbf{X}^\top \mathbf{y} = n\widehat{\boldsymbol{\Sigma}}_{xy}$} \framesubtitle{For the centered model} {\LARGE \begin{eqnarray*} \widehat{\boldsymbol{\beta}} \pause & = & (\mathbf{X}^\top \mathbf{X})^{-1} \mathbf{X}^\top \mathbf{y} \\ \pause &&\\ & = & (n\widehat{\boldsymbol{\Sigma}}_x)^{-1} n\widehat{\boldsymbol{\Sigma}}_{xy} \\ \pause &&\\ & = & \frac{1}{n}(\widehat{\boldsymbol{\Sigma}}_x)^{-1} n\widehat{\boldsymbol{\Sigma}}_{xy} \\ \pause &&\\ & = & \widehat{\boldsymbol{\Sigma}}_x^{-1} \widehat{\boldsymbol{\Sigma}}_{xy} \\ \end{eqnarray*} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Convergence} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{$\widehat{\boldsymbol{\beta}} = \widehat{\boldsymbol{\Sigma}}_x^{-1} \widehat{\boldsymbol{\Sigma}}_{xy}$} \pause \framesubtitle{Where $\widehat{\boldsymbol{\beta}} = (\widehat{\beta}_1, \ldots, \widehat{\beta}_k)^\top$; the intercept is not included} The formula applies whether the data are centered or not\pause, and whether the explanatory variables are fixed or random. \pause Suppose they are random. \vspace{3mm} \pause \begin{itemize} \item $\widehat{\boldsymbol{\Sigma}}_x \stackrel{a.s.}{\rightarrow} \boldsymbol{\Sigma}_x$ \pause \item $\widehat{\boldsymbol{\Sigma}}_{xy} \stackrel{a.s.}{\rightarrow} \boldsymbol{\Sigma}_{xy}$ \pause \item Taking the inverse is a sequence of continuous operations. \pause \item So by continuous mapping, \pause \end{itemize} \vspace{5mm} {\LARGE \begin{displaymath} \widehat{\boldsymbol{\beta}}_n = \widehat{\boldsymbol{\Sigma}}_x^{-1} \widehat{\boldsymbol{\Sigma}}_{xy} \stackrel{a.s.}{\rightarrow} \pause \boldsymbol{\Sigma}_x^{-1} \boldsymbol{\Sigma}_{xy} \end{displaymath} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{We have found $\widehat{\boldsymbol{\beta}}_n \stackrel{a.s.}{\rightarrow} \boldsymbol{\Sigma}_x^{-1} \boldsymbol{\Sigma}_{xy}$} \pause %\framesubtitle{} Is it the right target? \pause There are two cases. \pause \begin{itemize} \item The model is correct. \pause \item The model is incorrect (mis-specified). \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Correct model (\emph{Uncentered})} \pause %\framesubtitle{} Independently for $i = 1, \ldots, n$, \pause {\LARGE \begin{displaymath} y_i = \beta_0 + \boldsymbol{\beta}^\top \mathbf{x}_i + \epsilon_i \end{displaymath} \pause } % End size where \pause \begin{itemize} \item[] $\beta_0$ (the intercept) is an unknown scalar constant. \pause \item[] $\boldsymbol{\beta}$ is a $k \times 1$ vector of unknown parameters. \pause \item[] $\mathbf{x}_i$ is a $k \times 1$ random vector with expected value $\boldsymbol{\mu}$ and covariance matrix $\boldsymbol{\Sigma}_x$. \pause \item[] $\epsilon_i$ is a scalar random variable with $E(\epsilon_i) = 0$ and $Var(\epsilon_i) = \sigma^2$. \pause \item[] $cov(\mathbf{x}_i,\epsilon_i) = \mathbf{0}$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Calculate $cov(\mathbf{x}_i,y_i) = \boldsymbol{\Sigma}_{xy}$ for the \emph{uncentered} model} \framesubtitle{$y_i = \beta_0 + \boldsymbol{\beta}^\top \mathbf{x}_i + \epsilon_i$} \pause %{\LARGE \begin{eqnarray*} cov(\mathbf{x}_i,y_i) \pause & = & E\left\{(\mathbf{x}_i-\boldsymbol{\mu}) ({\color{red}y_i}-\beta_0 - \boldsymbol{\beta}^\top \boldsymbol\mu{})^\top\right\} \\ \pause & = & E\left\{(\mathbf{x}_i-\boldsymbol{\mu}) ({\color{red}\beta_0 + \boldsymbol{\beta}^\top \mathbf{x}_i + \epsilon_i} -\beta_0 - \boldsymbol{\beta}^\top \boldsymbol{\mu})^\top\right\} \\ \pause & = & E\left\{(\mathbf{x}_i-\boldsymbol{\mu}) (\boldsymbol{\beta}^\top \mathbf{x}_i - \boldsymbol{\beta}^\top \boldsymbol{\mu} + \epsilon_i)^\top\right\} \\ \pause & = & E\left\{(\mathbf{x}_i-\boldsymbol{\mu}) (\boldsymbol{\beta}^\top (\mathbf{x}_i - \boldsymbol{\mu}) + \epsilon_i)^\top\right\} \\ \pause & = & E\left\{(\mathbf{x}_i-\boldsymbol{\mu}) \left((\mathbf{x}_i - \boldsymbol{\mu})^\top \boldsymbol{\beta} + \epsilon_i^\top\right)\right\} \\ \pause & = & E\left\{(\mathbf{x}_i-\boldsymbol{\mu}) (\mathbf{x}_i - \boldsymbol{\mu})^\top\right\} \boldsymbol{\beta} + E\left\{ (\mathbf{x}_i-\boldsymbol{\mu})(\epsilon_i-0)^\top \right\} \\ \pause & = & \boldsymbol{\Sigma}_x \boldsymbol{\beta} \pause + \mathbf{0} \end{eqnarray*} %} % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Convergence} %\framesubtitle{} Have $\boldsymbol{\Sigma}_{xy} = \boldsymbol{\Sigma}_x \boldsymbol{\beta}$ for the uncentered model. \pause So whether the variables are centered or not, \pause {\LARGE \begin{eqnarray*} \widehat{\boldsymbol{\beta}}_n & = & \widehat{\boldsymbol{\Sigma}}_x^{-1} \widehat{\boldsymbol{\Sigma}}_{xy}\\ \pause & \stackrel{a.s.}{\rightarrow} & \boldsymbol{\Sigma}_x^{-1} \boldsymbol{\Sigma}_{xy} \\ \pause & = & \boldsymbol{\Sigma}_x^{-1} \boldsymbol{\Sigma}_x \boldsymbol{\beta} \\ \pause & = & \boldsymbol{\beta} \pause \end{eqnarray*} } % End size And $\widehat{\boldsymbol{\beta}}_n$ is strongly consistent for $\boldsymbol{\beta}$. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Model Mis-specification} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Model Mis-specification} %\framesubtitle{} What if the model is wrong (mis-specified)? \pause \begin{itemize} \item Think of a particular way in which the regression model might be wrong. \pause \item Call this the ``true model." \pause \item Still have $\widehat{\boldsymbol{\beta}}_n \stackrel{a.s.}{\rightarrow} \boldsymbol{\Sigma}_x^{-1} \boldsymbol{\Sigma}_{xy}$. \pause \item Calculate $\boldsymbol{\Sigma}_x^{-1} \boldsymbol{\Sigma}_{xy}$ assuming the true model. \pause \item This is the large-sample target of $\widehat{\boldsymbol{\beta}}$. \pause \item Is it what you want? \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Measurement Error} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Measurement Error} \pause %\framesubtitle{} \begin{itemize} \item Snack food consumption \pause \item Exercise \pause \item Income \pause \item Cause of death \pause \item Even amount of drug that reaches animal’s blood stream in an experimental study \pause \item Is there anything that is \emph{not} measured with error? \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{The problem with measurement error} \framesubtitle{$Y_i = \beta_0 + \beta_1 X_{i,1} + \beta_2 X_{i,2} + \epsilon_i $} \pause \begin{itemize} \item Trouble may arise if you take the regression model seriously as a model of how $Y$ is produced from $X$. \pause \item If your objective is pure prediction and \emph{not interpretation}, there is no problem. \pause \item In nature, there are relationships between true variables\pause, and this is what we are interested in. \pause \item Relationships between observable variables result from relationships between true variables\pause, combined with the measurement error process. \pause \item Measurement error does not just weaken the relationships. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Measurement error in two explanatory variables} \framesubtitle{An example} \begin{center} \includegraphics[width=3in]{MeReg2Path} \end{center} \pause Want to assess the relationship of $X_2$ to $Y$ controlling for $X_1$ by testing $H_0:\beta_2=0$. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Statement of the model} \framesubtitle{Independently for $i=1, \ldots,n$} \pause \begin{eqnarray} Y_i &=& \beta_0 + \beta_1 X_{i,1} + \beta_2 X_{i,2} + \epsilon_i \nonumber \\ \pause W_{i,1} & = & X_{i,1} + e_{i,1} \nonumber \\ \pause W_{i,2} & = & X_{i,2} + e_{i,2}, \pause \nonumber \end{eqnarray} {\footnotesize where \pause \begin{itemize} \item[] $E(X_{i,1})=\mu_1$, $E(X_{i,2})=\mu_2$, \pause $E(\epsilon_i) = E(e_{i,1}) = E(e_{i,2}) = 0$, \pause \item[] $Var(\epsilon_i)=\psi$, $Var(e_{i,1})=\omega_1$, $Var(e_{i,2})=\omega_2$, \pause \item[] The errors $\epsilon_i, e_{i,1}$ and $e_{i,2}$ are all independent, \pause \item[] $X_{i,1}$ and $X_{i,2}$ are independent of $\epsilon_i, e_{i,1}$ and $e_{i,2}$\pause, and \pause \end{itemize} \begin{displaymath} cov\left( \begin{array}{c} X_{i,1} \\ X_{i,2} \end{array} \right) = \pause\left( \begin{array}{c c} \phi_{11} & \phi_{12} \\ \phi_{12} & \phi_{22} \end{array} \right). \end{displaymath} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Reliability} \framesubtitle{As the term is used in psychometrics} \pause \begin{eqnarray*} W_{i,1} & = & X_{i,1} + e_{i,1} \\ \pause W_{i,2} & = & X_{i,2} + e_{i,2}, \pause \end{eqnarray*} %{\footnotesize where \begin{itemize} \item[] $Var(X_{i,1})=\phi_{11}$, $Var(X_{i,2})=\phi_{22}$ \pause \item[] $Var(e_{i,1})=\omega_1$, $Var(e_{i,2})=\omega_2$, \end{itemize} \pause \vspace{3mm} %} % End size \begin{itemize} \item Because $X$ and $e$ are independent, $Var(W) = Var(X)+Var(e) = \phi + \omega$. \pause \item The proportion of the variance in $W$ that comes from the ``true" variable $X$ (and not error) is $\frac{\phi}{\phi+\omega}$. \pause \item Call it the ``reliability." \pause \item Reliability of $W_1$ is $\frac{\phi_{11}}{\phi_{11}+\omega_1}$. \item Reliability of $W_2$ is $\frac{\phi_{22}}{\phi_{22}+\omega_2} $. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{True Model versus Naive Model} %\framesubtitle{Independently for $i=1, \ldots,n$} \pause %\vspace{1mm} True model: \begin{eqnarray} Y_i &=& \beta_0 + \beta_1 X_{i,1} + \beta_2 X_{i,2} + \epsilon_i \nonumber \\ W_{i,1} & = & X_{i,1} + e_{i,1} \nonumber \\ W_{i,2} & = & X_{i,2} + e_{i,2}, \pause \nonumber \end{eqnarray} Naive model: $Y_i = \beta_0 + \beta_1 W_{i,1} + \beta_2 W_{i,2} + \epsilon_i$ \pause \vspace{3mm} \begin{itemize} \item Fit the naive model. \pause \item See what happens to $\widehat{\boldsymbol{\beta}}$ (especially $\widehat{\beta}_2$) as $n \rightarrow \infty$ \pause when the true model holds. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{$\widehat{\boldsymbol{\beta}}_n \stackrel{a.s.}{\rightarrow} \boldsymbol{\Sigma}_w^{-1} \boldsymbol{\Sigma}_{wy}$} \framesubtitle{For the naive model $ Y_i = \beta_0 + \beta_1 W_{i,1} + \beta_2 W_{i,2} + \epsilon_i$} \pause Calculation of $\boldsymbol{\Sigma}_w$ and $\boldsymbol{\Sigma}_{wy}$ by hand is not bad. \pause \begin{displaymath} \boldsymbol{\Sigma}_w = \left(\begin{array}{cc} \omega_{1} + \phi_{11} & \phi_{12} \\ \phi_{12} & \omega_{2} + \phi_{22} \\ \end{array}\right) \hspace{5mm} \pause \boldsymbol{\Sigma}_{wy} = \left(\begin{array}{c} \beta_{1} \phi_{11} + \beta_{2} \phi_{12} \\ \beta_{1} \phi_{12} + \beta_{2} \phi_{22} \end{array}\right) \end{displaymath} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{After some work} \framesubtitle{$ Y_i = \beta_0 + \beta_1 W_{i,1} + \beta_2 W_{i,2} + \epsilon_i$} \begin{displaymath} \widehat{\boldsymbol{\beta}}_n \stackrel{a.s.}{\rightarrow} \pause \boldsymbol{\Sigma}_w^{-1} \boldsymbol{\Sigma}_{wy} = \pause \left(\begin{array}{r} \frac{ \beta_{2} \omega_{2} \phi_{12} + \beta_{1} (\omega_{2} \phi_{11} + \phi_{11} \phi_{22}- \phi_{12}^{2})}{(\phi_{1,1} + \omega_1)(\phi_{2,2} + \omega_2) - \phi_{12}^{2}} \\ \frac{{\beta_{1} \omega_{1} \phi_{12} + \beta_{2} (\omega_{1}\phi_{22} + \phi_{11} \phi_{22}-\phi_{12}^{2}} ) } {(\phi_{1,1} + \omega_1)(\phi_{2,2} + \omega_2) - \phi_{12}^{2}} \end{array}\right) \pause \neq \left(\begin{array}{r} \beta_1 \\ \beta_2 \end{array}\right) \end{displaymath} \pause When $H_0: \beta_2=0$ is true, this reduces to \ldots \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{The Target under $H_0: \beta_2=0$} \pause %\framesubtitle{Recalling } \renewcommand{\arraystretch}{2.0} {\LARGE \begin{displaymath} \left(\begin{array}{c} \widehat{\beta}_1 \\ \widehat{\beta}_2 \end{array}\right) \stackrel{a.s.}{\rightarrow} \pause \left(\begin{array}{r} \beta_{1} \left(\frac{\phi_{11} \phi_{22} - \phi_{12}^{2} } {(\phi_{1,1} + \omega_1)(\phi_{2,2} + \omega_2) - \phi_{12}^{2} } \right) \\ \frac{\beta_{1} \omega_{1} \phi_{12}} {(\phi_{1,1} + \omega_1)(\phi_{2,2} + \omega_2) - \phi_{12}^{2}} \end{array}\right) \end{displaymath} \pause } % End size \renewcommand{\arraystretch}{1.0} Note $\phi_{11} \phi_{22} - \phi_{12}^{2} = |\boldsymbol{\Sigma}_x|$, and $\omega_1 = Var(e_1)$, where $W_1 = X_1+e_1$. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{When $H_0:\beta_2=0$ is true} \pause %\framesubtitle{} {\LARGE \begin{displaymath} \widehat{\beta}_2 \stackrel{a.s.}{\rightarrow} \frac{\beta_{1} \omega_{1} \phi_{12}} {(\phi_{1,1} + \omega_1)(\phi_{2,2} + \omega_2) - \phi_{12}^{2}} \end{displaymath} \pause } % End size So $ \widehat{\beta}_2$ goes to the wrong target unless \pause \begin{itemize} \item There is no relationship between $X_1$ and $Y$, or \pause \item There is no measurement error in $W_1$, or \pause \item There is no covariance between $X_1$ and $X_2$. \pause \end{itemize} Also, $t$ statistic goes to plus or minus $\infty$ and $p$-value $\stackrel{a.s.}{\rightarrow} 0$. \pause Remember, $H_0$ is true. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{How bad is it for finite sample sizes?} \framesubtitle{$\widehat{\beta}_2 \stackrel{a.s.}{\rightarrow} \frac{\beta_{1} \omega_{1} \phi_{12}} {(\phi_{1,1} + \omega_1)(\phi_{2,2} + \omega_2) - \phi_{12}^{2}}$} \pause A big simulation study \pause (Brunner and Austin, 2009) \pause with six factors \pause \begin{itemize} \item Sample size: $n$ = 50, 100, 250, 500, 1000 \pause \item $Corr(X_1,X_2)$: $\phi_{12}$ = 0.00, 0.25, 0.75, 0.80, 0.90 \pause \item Proportion of variance in $Y$ explained by $X_1$: 0.25, 0.50, 0.75 \pause \item Reliability of $W_1$: 0.50, 0.75, 0.80, 0.90, 0.95 \pause \item Reliability of $W_2$: 0.50, 0.75, 0.80, 0.90, 0.95 \pause \item Distribution of latent variables and error terms: Normal, Uniform, $t$, Pareto. \pause \end{itemize} There were $5\times 5\times 3\times 5\times5\times 4$ = 7,500 treatment combinations. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Simulation study procedure} %\framesubtitle{} Within each of the $5\times 5\times 3\times 5\times5\times 4$ = 7,500 treatment combinations, \pause \begin{itemize} \item 10,000 random data sets were generated \pause \item For a total of 75 million data sets \pause \item All generated according to the true model, with $\beta_2=0$. \pause \item Fit naive model, test $H_0:\beta_2=0$ at $\alpha= 0.05$. \pause \item Proportion of times $H_0$ is rejected is a Monte Carlo estimate of the Type I Error Probability. \pause \item It should be around 0.05. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Representative subset of the results} \pause %\framesubtitle{} \begin{itemize} \item All random variables are normally distributed. \pause \item Both reliabilities equal 0.90. \pause \item Separate slides for weak, moderate and strong relationship between $X_1$ and $Y$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[fragile] \frametitle{$X_1$ explains 25\% of the variance in $Y$} \pause %\framesubtitle{} Numbers in the table below are proportions of tests for which $H_0:\beta_2=0$ was rejected in 10,000 simulated data sets. \pause %{\footnotesize % or scriptsize \begin{verbatim} Correlation Between X1 and X2 N 0.00 0.20 0.40 0.60 0.80 50 0.04760 0.05050 0.06360 0.07150 0.09130 100 0.05040 0.05210 0.08340 0.09400 0.12940 250 0.04670 0.05330 0.14020 0.16240 0.25440 500 0.04680 0.05950 0.23000 0.28920 0.46490 1000 0.05050 0.07340 0.40940 0.50570 0.74310 \end{verbatim} %} % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[fragile] \frametitle{$X_1$ explains 50\% of the variance in $Y$} %\framesubtitle{} Numbers in the table below are proportions of tests for which $H_0:\beta_2=0$ was rejected in 10,000 simulated data sets. \pause %{\footnotesize % or scriptsize \begin{verbatim} Correlation Between X1 and X2 N 0.00 0.20 0.40 0.60 0.80 50 0.04600 0.05200 0.09630 0.11060 0.16330 100 0.05350 0.05690 0.14610 0.18570 0.28370 250 0.04830 0.06250 0.30680 0.37310 0.58640 500 0.05150 0.07800 0.53230 0.64880 0.88370 1000 0.04810 0.11850 0.82730 0.90880 0.99070 \end{verbatim} %} % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[fragile] \frametitle{$X_1$ explains 75\% of the variance in $Y$} %\framesubtitle{} Numbers in the table below are proportions of tests for which $H_0:\beta_2=0$ was rejected in 10,000 simulated data sets. \pause %{\footnotesize % or scriptsize \begin{verbatim} Correlation Between X1 and X2 N 0.00 0.20 0.40 0.60 0.80 50 0.04850 0.05790 0.17270 0.20890 0.34420 100 0.05410 0.06790 0.31010 0.37850 0.60310 250 0.04790 0.08560 0.64500 0.75230 0.94340 500 0.04450 0.13230 0.91090 0.96350 0.99920 1000 0.05220 0.21790 0.99590 0.99980 1.00000 \end{verbatim} %} % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Summary} %\framesubtitle{} \begin{itemize} \item Ignoring measurement error in the independent variables can seriously inflate Type I error probabilities. \pause \item The poison combination is measurement error in the variable for which you are ``controlling," and correlation between latent explanatory variables. \pause \item If either is zero, there is no problem. \pause \item Factors affecting severity of the problem are (next slide) \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Factors affecting severity of the problem} \pause \framesubtitle{Problem of inflated Type I error probability} \begin{itemize} \item As the correlation between $X_1$ and $X_2$ increases, the problem gets worse. \pause \item As the correlation between $X_1$ and $Y$ increases, the problem gets worse. \pause \item As the amount of measurement error in $X_1$ increases, the problem gets worse. \pause \item As the amount of measurement error in $X_2$ increases, the problem gets less severe. \pause \item As the sample size increases, the problem gets worse. \pause \item Distribution of the variables does not matter much. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{As the sample size increases, the problem gets worse} \pause %\framesubtitle{} For a large enough sample size, no amount of measurement error in the explanatory variables is safe, assuming that the latent explanatory variables are correlated. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Other kinds of regression, other kinds of measurement error} \pause %\framesubtitle{} \begin{itemize} \item Logistic regression \pause \item Proportional hazards regression in survival analysis \pause \item Log-linear models: Test of conditional independence in the presence of classification error \pause \item Median splits \pause \item Even converting $X_1$ to ranks inflates Type I Error probability. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{If $X_1$ is randomly assigned} \pause %\framesubtitle{} \begin{itemize} \item Then it is independent of $X_2$: Zero correlation. \pause \item So even if an experimentally manipulated variable is measured (implemented) with error, there will be no inflation of Type I error probability. \pause \item If $X_2$ is randomly assigned and $X_1$ is a covariate observed with error (very common)\pause, then again there is no correlation between $X_1$ and $X_2$, and so no inflation of Type I error. \pause \item Measurement error may decrease the precision of experimental studies, but in terms of Type I error it creates no problems. \pause \item For observational studies, the news is not so good. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Observational studies} \pause %\framesubtitle{} \begin{itemize} \item Measurement error in the explanatory variables is almost universal. \pause \item Standard statistical methods are almost guaranteed to yield inconsistent estimates. \pause \item Conclusions may be incorrect \pause -- or they may not. \pause With more than 2 explanatory variables, the impact of measurement error depends on the covariances between the $x$ variables, in a complicated way. \pause \item Instrumental variables can help. \pause \item Statistical models that incorporate measurement error are available. \pause \item But problems with identifiability prevent them from being applied to typical data sets. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistics, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/appliedf19} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/2101f19}} \end{frame} \end{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{} %\framesubtitle{} \begin{itemize} \item \item \item \end{itemize} \end{frame} {\LARGE \begin{displaymath} \end{displaymath} } \begin{displaymath} \left( \begin{array}{ccc} \sum_{i=1}^n (x_{i1}-\overline{x}_1)^2 & \sum_{i=1}^n (x_{i1}-\overline{x}_1)(x_{i2}-\overline{x}_2) & \sum_{i=1}^n (x_{i1}-\overline{x}_1)(x_{i3}-\overline{x}_3)\\ \end{array} \right) \end{displaymath} =============== =============== =============== ===============