% Instrumental variables as a solution to measurement error % \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout mode to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Suppress navigation symbols % \usetheme{Berlin} % Displays sections on top \usetheme{Frankfurt} % Displays section titles on top: Fairly thin but still swallows some material at bottom of crowded slides %\usetheme{Berkeley} \usepackage[english]{babel} \usepackage{amsmath} % for binom % \usepackage{graphicx} % To include pdf files! % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode % \mode{\setbeamercolor{background canvas}{bg=black!5}} % Comment this out for handout \title{Instrumental Variables Again\footnote{See last slide for copyright information.}} \subtitle{STA2101 Fall 2019} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} \begin{frame} \frametitle{Overview} \tableofcontents \end{frame} \section{Omitted Variables} \begin{frame} \frametitle{Remember the problem of omitted variables} \framesubtitle{Example: $X$ is income, $Y$ is credit card debt.} \pause \begin{itemize} \item Omitted explanatory variables are part of the error term. \item Usually they are correlated with explanatory variables that are in the model. \item This makes the error term correlated with $X$. \begin{center} \includegraphics[width=2.5in]{OmittedAgain} \end{center} \pause \item Parameters are not identifiable. \item Estimation and inference fail. \end{itemize} \end{frame} \begin{frame} \frametitle{Instrumental variable method saved the day} \framesubtitle{Phillip Wright, 1928} \pause {\footnotesize An instrumental variable (for an explanatory variable) \begin{itemize} \item Is related to the explanatory variable in question. \pause % \item Is unrelated to any other explanatory variable in the model. \pause % Not so fast buddy. \item Is unrelated to any error term in the model. \pause \item Is connected to the response variable only through $X$. \pause \begin{center} \includegraphics[width=3in]{InstruAgain} \end{center} \pause \item Real estate agents: $X$ is income, $Y$ is credit card debt, $Z$ is median home price. \item Interest is in $\beta_2$. \end{itemize} } % End size \end{frame} \begin{frame} \frametitle{Technically everything worked great} \framesubtitle{$X_i = \alpha_1 + \beta_1W_i +\epsilon_{i1}$ and $Y_i = \alpha_2 + \beta_2X_i +\epsilon_{i2}$} \pause $\boldsymbol{\Sigma} =$ {\footnotesize \renewcommand{\arraystretch}{1.5} \begin{tabular}{|c|ccc|} \hline & $Z$ & $X$ & $Y$ \\ \hline $Z$ & $\sigma^2_z$ & $\beta_1\sigma^2_z$ & $\beta_1\beta_2\sigma^2_z$ \\ $X$ & $\cdot$ & $\beta_1^2\sigma^2_z+\sigma^2_1$ & $\beta_2(\beta_1^2\sigma^2_z+\sigma^2_1)+c$ \\ $Y$ & $\cdot$ & $\cdot$ & $\beta_1^2\beta_2^2\sigma^2_z + \beta_2^2\sigma^2_1 + 2\beta_2c + \sigma^2_2$ \\ \hline \end{tabular} \pause \renewcommand{\arraystretch}{1.0} } % End size % \begin{center} % \includegraphics[width=3in]{InstruVar} % \end{center} \pause \vspace{5mm} \begin{itemize} \item Nine moment structure equations in 9 unknown parameters. \pause \item $ \beta_2 = \frac{\sigma_{13}}{\sigma_{12}}$. \item All the other parameters are identifiable too. \pause % Homework \item But of course there is measurement error. \end{itemize} \end{frame} % STA2101f16 continues from here \begin{frame} \frametitle{The model needs improvement} \framesubtitle{$X$ is income, $Y$ is credit card debt, $Z$ is median home price.} \pause Same picture: \begin{center} \includegraphics[width=3in]{InstruAgain} \end{center} \pause \begin{itemize} \item $X=$ Income is measured with error. \pause \item So is $Y=$ Debt. \pause \item There are still unmeasured variables that impact them both. \end{itemize} \end{frame} \section{Including Measurement Error} \begin{frame} \frametitle{An improved Model} \framesubtitle{$X$ is income, $Y$ is credit card debt, $Z$ is median home price.} \begin{columns} \column{0.5\textwidth} \begin{center} \includegraphics[width=2in]{ImprovedInstru1} \end{center} \pause \column{0.5\textwidth} {\footnotesize \begin{itemize} \item Common omitted variables are affecting true $X$ and true $Y$. \pause \item Common omitted variables are affecting measurement of $X$ and measurement of $Y$. \pause \item Factor loadings are realistic:\pause ~Positive but not = 1. \pause \item Six covariance structure equations in 11 unknowns. \pause \item And it's still not realistic enough. \pause \item Housing prices are only estimated. \end{itemize} } % End size \end{columns} \end{frame} \begin{frame} \frametitle{Easier to defend, but impossible to estimate} \framesubtitle{$X$ is income, $Y$ is credit card debt, $Z$ is median home price.} \begin{columns} \column{0.5\textwidth} \begin{center} \includegraphics[width=2in]{ImprovedInstru2} \end{center} \pause \column{0.5\textwidth} Fortunately the instrumental variable only has to be \emph{correlated} with the explanatory variable. \end{columns} \end{frame} \begin{frame} \frametitle{Here's the Model}% \pause \framesubtitle{$X$ is reported income, $Y$ is reported credit card debt, $Z$ is estimated median resale home price.} \begin{columns} \column{0.5\textwidth} \begin{center} \includegraphics[width=2in]{ImprovedInstru3} \end{center} \column{0.5\textwidth} {\footnotesize \begin{itemize} \item Fairly realistic. \pause \item Still six covariance structure equations in 11 unknowns (poison). \pause \item Explanatory variable correlated with the error term (poison). \pause \item Correlated measurement errors (poison). \pause \item But we have an instrumental variable. \pause \item Calculate the covariance matrix. \end{itemize} } % End size \end{columns} \end{frame} \begin{frame} \frametitle{Show part of the calculation} \framesubtitle{$Z$ is estimated median resale home price, $Y$ is reported credit card debt} \begin{center} \includegraphics[width=1.5in]{ImprovedInstru3} \end{center} {\footnotesize \begin{eqnarray*} Cov(Z,Y) & = & \pause Cov(Z, \lambda_2 T_y + e_2) \\ \pause & = & Cov(Z, \lambda_2(\beta T_x+\epsilon) + e_2) \\ \pause & = & Cov(Z, \lambda_2\beta T_x+ \lambda_2\epsilon + e_2) \\ \pause & = & \lambda_2\beta Cov(Z,T_x) + \lambda_2Cov(Z,\epsilon) + Cov(Z,e_2)\\ \pause & = & \lambda_2 \beta \phi_{12} + 0 + 0 % \pause \end{eqnarray*} % Definitely $\phi_{12}>0$ and $\lambda_2>0$. } % End size \end{frame} \begin{frame} \frametitle{Covariance matrix of the observable data} \framesubtitle{$Z$ is estimated median resale home price, $X$ is reported income, $Y$ is reported credit card debt} \pause \begin{displaymath} cov\left( \begin{array}{c} Z \\ X \\ Y \\ \end{array} \right) = \left(\begin{array}{ccc} \phi_{11} & \lambda_{1} \phi_{12} & \beta \lambda_{2} \phi_{12} \\ \cdot & \lambda_{1}^{2} \phi_{22} + \omega_{11} & \beta \lambda_{1} \lambda_{2} \phi_{22} + c \lambda_{1} \lambda_{2} + \omega_{12} \\ \cdot & \cdot & \beta^{2} \lambda_{2}^{2} \phi_{22} + 2 \, \beta c \lambda_{2}^{2} + \lambda_{2}^{2} \psi + \omega_{22} \end{array}\right) \end{displaymath} \begin{columns} \column{0.5\textwidth} \begin{center} \includegraphics[width=1.5in]{ImprovedInstru3} \end{center} \pause \column{0.5\textwidth} {\footnotesize \begin{itemize} \item $\beta$ is not identifiable. \pause \item But $\phi_{12}>0$ and $\lambda_2>0$. \pause \item So the sign of $\beta$ is identifiable from $\sigma_{13}$. \pause \item $H_0: \beta=0$ is testable. \pause \item It's possible to answer the basic question of the study. \end{itemize} } % End size \end{columns} \end{frame} \begin{frame} \frametitle{It's a miracle} % \framesubtitle{} \begin{itemize} \item Instrumental variables can help with measurement error and omitted variables at the same time. \pause \item If there is measurement error, regression coefficients of interest are not identifiable and cannot be estimated consistently, but their signs can. \pause \item Often, that's all you really want to know. \pause \item Matrix version is available. \pause \item The usual rule in Econometrics is (at least) one instrumental variable for each explanatory variable. % \pause % \item The $p \times p$ matrix of covariances between $\mathbf{X}$ and $\mathbf{Z}$ must have an inverse. % HW \end{itemize} \end{frame} \begin{frame} \frametitle{Independence of the instrumental variable and error terms is critical.} \begin{columns} \column{0.5\textwidth} \begin{center} \includegraphics[width=2in]{ImprovedInstru3} \end{center} \pause \column{0.5\textwidth} \begin{itemize} \item Instrumental variables need to come from another world. \pause % \item Instrumental variables are related to $\mathbf{X}$ for reasons that are \emph{separate} from why $\mathbf{X}$ is related to $\mathbf{Y}$. \pause \item For example, does academic ability contribute to higher salary? \pause \begin{itemize} \item Study adults who were adopted as children. \pause \item $X$ is academic ability. \pause \item $Y$ is salary at age 40. \pause \item $W$ is measured IQ at 40. \pause \item $Z$ is birth mother's IQ score. % \pause (there are studies like this). \end{itemize} \end{itemize} \end{columns} \end{frame} \begin{frame} \frametitle{It's a partial solution} \pause % \framesubtitle{} \begin{itemize} \item Good instrumental variables are not easy to find. \pause \item They will not be in a data set casually collected for other purposes. \pause \item Advance planning is needed. \pause \item The ultimate instrumental variable is randomly assigned. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistics, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/2101f19} {\footnotesize \texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/2101f19}} \end{frame} \end{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Example} \end{frame} \begin{frame} \frametitle{} % \framesubtitle{} \begin{itemize} \item \item \item \end{itemize} \end{frame} {\LARGE \begin{displaymath} \end{displaymath} } % End Size %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% sem = 'http://www.utstat.toronto.edu/~brunner/openSEM/sage/sem.sage' load(sem) # In EsqVar, eta = beta eta + gamma xi, with cov(xi) = Phi B = ZeroMatrix(3,3); B[2,1] = var('lambda2'); B # beta matrix G = ZeroMatrix(3,5) # gamma matrix G[0,1] = var('lambda1'); G[0,3] = 1 G[1,1] = var('beta'); G[1,2] = 1 G[2,4] = 1 G P = ZeroMatrix(5,5) # Phi = cov(xi) P[0:2,0:2] = SymmetricMatrix(2,'phi') # 2x2 in upper left: Ends of ranges not included P[3:5,3:5] = SymmetricMatrix(2,'omega') # Even though 5 is out of bounds. P[2,2] = var('psi'); P[1,2] = var('c'); P[2,1] = P[1,2] show(P) pickout = 4,1,3 # Indices of observable variables Z,X,Y in order eta, xi Sigma = EqsVar(B,G,P,pickout); Sigma print(latex(Sigma)) \left(\begin{array}{rrr} \phi_{11} & \lambda_{1} \phi_{12} & \beta \lambda_{2} \phi_{12} \\ \lambda_{1} \phi_{12} & \lambda_{1}^{2} \phi_{22} + \omega_{11} & \beta \lambda_{1} \lambda_{2} \phi_{22} + c \lambda_{1} \lambda_{2} + \omega_{12} \\ \beta \lambda_{2} \phi_{12} & \beta \lambda_{1} \lambda_{2} \phi_{22} + c \lambda_{1} \lambda_{2} + \omega_{12} & \beta^{2} \lambda_{2}^{2} \phi_{22} + 2 \, \beta c \lambda_{2}^{2} + \lambda_{2}^{2} \psi + \omega_{22} \end{array}\right)