% \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout mode to ignore pause statements \hypersetup{colorlinks,linkcolor=,urlcolor=red} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Supress navigation symbols \usetheme{Berlin} % Displays sections on top \usepackage[english]{babel} \usepackage{comment} % To comment out sections with \begin{comment} and \end{comment} % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode % \mode{\setbeamercolor{background canvas}{bg=black!5}} \title{A bit of regression: Quick and very applied\footnote{See last slide for copyright information.}} \subtitle{STA2101 Fall 2019} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Fixed Effects Linear Regression} %\framesubtitle{} {\LARGE \begin{displaymath} \mathbf{y} = \mathbf{X} \boldsymbol{\beta} + \boldsymbol{\epsilon} \end{displaymath} } % End size \begin{itemize} \item $\mathbf{X}$ is an $n \times p$ matrix of known constants. \item $\boldsymbol{\beta}$ is a $p \times 1$ vector of unknown constants. \item $\boldsymbol{\epsilon} \sim N(\mathbf{0},\sigma^2 \mathbf{I}_n)$ , where $\sigma^2 > 0$ is an unknown constant. \item[] \pause \item $\widehat{\boldsymbol{\beta}} = (\mathbf{X}^\top \mathbf{X})^{-1} \mathbf{X}^\top \mathbf{y} $ \item $\widehat{\mathbf{y}}=\mathbf{X}\widehat{\boldsymbol{\beta}} $ \item $\mathbf{e}= (\mathbf{y}-\widehat{\mathbf{y}})$ \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Comparing scalar and matrix form} %\framesubtitle{} Scalar form is $y_i = \beta_0 + \beta_1 x_{i,1} + \cdots + \beta_{p-1}x_{i,p-1} + \epsilon_i $ \pause \begin{equation*} \begin{array}{cccccccc} % 6 columns \mathbf{y} & = & \mathbf{X} & \boldsymbol{\beta} & + & \boldsymbol{\epsilon} \\ \pause &&&&& \\ % Another space \left( \begin{array}{c} y_1 \\ y_2 \\ y_3 \\ \vdots \\ y_n \end{array} \right) &=& \left(\begin{array}{cccc} 1 & 14.2 & \cdots & 1 \\ 1 & 11.9 & \cdots & 0 \\ 1 & ~3.7 & \cdots & 0 \\ \vdots & \vdots & \vdots & \vdots \\ 1 & ~6.2 & \cdots & 1 \end{array}\right) & \left( \begin{array}{c} \beta_0 \\ \beta_1 \\ \vdots \\ \beta_{p-1} \end{array} \right) &+& \left( \begin{array}{c} \epsilon_1 \\ \epsilon_2 \\ \epsilon_3 \\ \vdots \\ \epsilon_n \end{array} \right) \end{array} \end{equation*} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Vocabulary} %\framesubtitle{} \begin{itemize} \item Explanatory variables are $x$ \item Response variable is $y$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{``Control" means hold constant} %\framesubtitle{} \begin{itemize} \item Regression model with four explanatory variables. \item Hold $x_1$, $x_2$ and $x_4$ constant at some fixed values. \pause \begin{eqnarray*} E(Y|\boldsymbol{X}=\boldsymbol{x}) & = & \beta_0 + \beta_1x_1 + \beta_2x_2 +\beta_3x_3 + \beta_4x_4 \\ \pause & = & (\beta_0 + \beta_1x_1 + \beta_2x_2 + \beta_4x_4) + \beta_3x_3 \\ \pause \end{eqnarray*} \item The equation of a straight line with slope $\beta_3$. \pause \item Values of $x_1$, $x_2$ and $x_4$ affect only the intercept. \pause \item So $\beta_3$ is the rate at which $E(Y|\mathbf{x})$ changes as a function of $x_3$ with all other variables held constant at fixed levels. \pause \item \emph{According to the model}. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{More vocabulary} \framesubtitle{$E(Y|\boldsymbol{X}=\boldsymbol{x}) = (\beta_0 + \beta_1x_1 + \beta_2x_2 + \beta_4x_4) + \beta_3x_3$} \pause \begin{itemize} \item If $\beta_3>0$, describe the relationship between $x_3$ and (expected) $y$ as ``positive," controlling for the other variables. If $\beta_3<0$, negative. \pause \item Useful ways of saying ``controlling for" or ``holding constant" include \pause \begin{itemize} \item Allowing for \item Correcting for \item Taking into account \end{itemize} \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Partitioning Sums of Squares} %\framesubtitle{} {\large \renewcommand{\arraystretch}{2.0} \begin{displaymath} \begin{array}{ccccc} SST & = & SSR & + & SSE \\ \sum_{i=1}^n(Y_i-\overline{Y})^2 & = & \sum_{i=1}^n(\widehat{Y}_i-\overline{Y})^2 & + &\sum_{i=1}^n(Y_i-\widehat{Y}_i)^2 \end{array} \end{displaymath} \renewcommand{\arraystretch}{1.0} } % End size \pause \vspace{5mm} {\LARGE \begin{displaymath} R^2 = \frac{SSR}{SST} \end{displaymath} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Categorical Explanatory Variables} \pause \framesubtitle{Unordered categories} \begin{itemize} \item $X=1$ means Drug, $X=0$ means Placebo. \pause \item Population mean is $E(Y|X=x) = \beta_0 + \beta_1 x$. \pause \item For patients getting the drug, mean response is $E(Y|X=1) = \beta_0 + \beta_1$ \pause \item For patients getting the placebo, mean response is $E(Y|X=0) = \beta_0$ \pause \item And $\beta_1$ is the difference between means, the average treatment effect. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % I cut out a bit correlation-causation bit here. See 2018. It goes better later, with SEMs. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{More than Two Categories} \pause Suppose a study has 3 treatment conditions. For example Group 1 gets Drug 1, Group 2 gets Drug 2, and Group 3 gets a placebo, so that the Explanatory Variable is Group (taking values 1,2,3) and there is some Response Variable $Y$ (maybe response to drug again). \pause \vspace{10mm} Why is $E[Y|X=x] = \beta_0 + \beta_1x$ (with $x$ = Group) a silly model? \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Indicator Dummy Variables} \framesubtitle{With intercept} \pause \begin{itemize} \item $x_1 = 1$ if Drug A, zero otherwise \item $x_2 = 1$ if Drug B, zero otherwise \pause \item $E[Y|\boldsymbol{X}=\boldsymbol{x}] = \beta_0 + \beta_1x_1 + \beta_2 x_2$. \pause \item Fill in the table. \pause \end{itemize} {\begin{center} \begin{tabular}{|c|c|c|l|} \hline Drug & $x_1$ & $x_2$ & $E(Y|\mathbf{x}) = \beta_0 + \beta_1x_1 + \beta_2 x_2$ \\ \hline $A$ & & & $\mu_1$ = \\ \hline $B$ & & & $\mu_2$ = \\ \hline Placebo & & & $\mu_3$ = \\ \hline \end{tabular} \end{center}} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Answer} \begin{itemize} \item $x_1 = 1$ if Drug A, zero otherwise \item $x_2 = 1$ if Drug B, zero otherwise \item $E[Y|\boldsymbol{X}=\boldsymbol{x}] = \beta_0 + \beta_1x_1 + \beta_2 x_2$. \end{itemize} {\begin{center} \begin{tabular}{|c|c|c|l|} \hline Drug & $x_1$ & $x_2$ & $E(Y|\mathbf{x}) = \beta_0 + \beta_1x_1 + \beta_2 x_2$ \\ \hline $A$ & 1 & 0 & $\mu_1$ = $\beta_0 + \beta_1$ \\ \hline $B$ & 0 & 1 & $\mu_2$ = $\beta_0 + \beta_2$ \\ \hline Placebo & 0 & 0 & $\mu_3$ = $\beta_0$ \\ \hline \end{tabular} \end{center}} \pause Regression coefficients are contrasts with the category that has no indicator -- the \emph{reference category}. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Indicator dummy variable coding with intercept} %\framesubtitle{} \begin{itemize} \item With an intercept in the model, need $p-1$ indicators to represent a categorical explanatory variable with $p$ categories. \pause \item If you use $p$ dummy variables and an intercept, trouble. \pause \item Regression coefficients are differences from the category that has no indicator. \pause \item Call this the \emph{reference category}. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Comment this slide out. \begin{comment} \begin{frame} \frametitle{$x_1 = 1$ if Drug A, zero o.w., $x_2 = 1$ if Drug B, zero o.w.} \pause %\framesubtitle{3-d Scatterplot} Recall $\sum_{i=1}^n (y_i-m)^2$ is minimized at $m = \overline{y}$ \pause \begin{center} \includegraphics[width=3in]{ABCscatter} \end{center} \end{frame} \end{comment} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{What null hypotheses would you test?} {\begin{center} \begin{tabular}{|c|c|c|l|} \hline Drug & $x_1$ & $x_2$ & $E(Y|\mathbf{x}) = \beta_0 + \beta_1x_1 + \beta_2 x_2$ \\ \hline $A$ & 1 & 0 & $\mu_1$ = $\beta_0 + \beta_1$ \\ \hline $B$ & 0 & 1 & $\mu_2$ = $\beta_0 + \beta_2$ \\ \hline Placebo & 0 & 0 & $\mu_3$ = $\beta_0$ \\ \hline \end{tabular} \end{center}} \pause \begin{itemize} \item Is the effect of Drug $A$ different from the placebo? \pause $H_0: \beta_1=0$ \pause \item Is Drug $A$ better than the placebo? \pause $H_0: \beta_1=0$ \pause \item Did Drug $B$ work? \pause $H_0: \beta_2=0$ \pause \item Did experimental treatment have an effect? \pause $H_0: \beta_1=\beta_2=0$ \pause \item Is there a difference between the effects of Drug $A$ and Drug $B$? \pause $H_0: \beta_1=\beta_2$ \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Now add a quantitative explanatory variable (covariate)} \framesubtitle{Covariates often come first in the regression equation} \pause \begin{itemize} \item $x_1 = 1$ if Drug A, zero otherwise \item $x_2 = 1$ if Drug B, zero otherwise \item $x_3$ = Age \pause \item $E[Y|\boldsymbol{X}=\boldsymbol{x}] = \beta_0 + \beta_1x_1 + \beta_2 x_2 + \beta_3 x_3$. \pause \end{itemize} {\begin{center} \begin{tabular}{|c|c|c|l|} \hline Drug & $x_1$ & $x_2$ & $E(Y|\mathbf{x}) = \beta_0+\beta_1x_1+\beta_2x_2+\beta_3x_3$\\ \hline A & 1 & 0 & $\mu_1$ = $(\beta_0+\beta_1)+\beta_3x_3$ \\ \hline B & 0 & 1 & $\mu_2$ = $(\beta_0+\beta_2)+\beta_3x_3$ \\ \hline Placebo & 0 & 0 & $\mu_3$ = ~~~~~$\beta_0$~~~~~$+\beta_3x_3$ \\ \hline \end{tabular} \end{center}} \pause Parallel regression lines. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{More comments} %\framesubtitle{} {\begin{center} \begin{tabular}{|c|c|c|l|} \hline Drug & $x_1$ & $x_2$ & $E(Y|\mathbf{x}) = \beta_0+\beta_1x_1+\beta_2x_2+\beta_3x_3$\\ \hline A & 1 & 0 & $\mu_1$ = $(\beta_0+\beta_1)+\beta_3x_3$ \\ \hline B & 0 & 1 & $\mu_2$ = $(\beta_0+\beta_2)+\beta_3x_3$ \\ \hline Placebo & 0 & 0 & $\mu_3$ = ~~~~~$\beta_0$~~~~~$+\beta_3x_3$ \\ \hline \end{tabular} \end{center}} \pause \begin{itemize} \item If more than one covariate, parallel regression planes. \pause \item Non-parallel (interaction) is testable. \pause \item ``Controlling" interpretation holds. \pause \item In an experimental study, quantitative covariates are usually just observed. \pause \item Could age be related to drug if there is random assignment to drug? \pause \item Good covariates reduce MSE, make testing of categorical variables more sensitive. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Hypothesis Testing} \framesubtitle{Standard tests when errors are normal} \pause \begin{itemize} \item Overall $F$-test for all the explanatory variables at once \pause $H_0: \beta_1 = \beta_2 = \cdots = \beta_{p-1} = 0$ \pause \item $t$-tests for each regression coefficient: Controlling for all the others, does that explanatory variable matter? \pause $H_0: \beta_j=0$ \pause \item Test a collection of explanatory variables controlling for another collection \pause $H_0: \beta_2 = \beta_3 = \beta_5 = 0$ \pause \item Example: Controlling for mother's education and father's education, are (any of) total family income, assessed value of home and total market value of all vehicles owned by the family related to High School GPA? \pause \item Most general: Testing whether sets of linear combinations of regression coefficients differ from specified constants. \pause $H_0: \mathbf{L}\boldsymbol{\beta} = \mathbf{h}$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Full versus Restricted Model} % Changing vocabulary: Reduced -> restricted \framesubtitle{Restricted by $H_0$} \pause \begin{itemize} \item You have 2 sets of variables, $A$ and $B$. Want to test $B$ controlling for $A$. \pause \item Fit a model with both $A$ and $B$: Call it the \emph{Full Model}, or the \emph{Unrestricted Model}. \pause \item Fit a model with just $A$: Call it the \emph{Restricted Model}. \\ \pause $R^2_F \geq R^2_R$. \pause \item The $F$-test is a likelihood ratio test (exact). \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{When you add the $r$ additional explanatory variables in set $B$, $R^2$ can only go up} \pause %\framesubtitle{} By how much? Basis of the $F$ test. \pause {\LARGE \begin{eqnarray*} F & = & \frac{(R^2_F-R^2_R)/r}{(1-R^2_F)/(n-p)} \\ \pause &&\\ & = & \frac{(SSR_F-SSR_R)/r}{MSE_F} \pause ~ \stackrel{H_0}{\sim} ~ F(r,n-p) \end{eqnarray*} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Strength of Relationship: Change in $R^2$ is not enough} \pause %\framesubtitle{} {\LARGE \begin{eqnarray*} F & = & \frac{(R^2_F-R^2_R)/r}{(1-R^2_F)/(n-p)} \\ \pause && \\ & = & \left( \frac{n-p}{r} \right) \left( \frac{a}{1-a} \right) \end{eqnarray*} \pause } % End size where {\LARGE \begin{displaymath} a = \frac{R^2_F - R^2_R}{1-R^2_R} \pause = \frac{rF}{n-p+rF} \end{displaymath} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{General Linear Test of $H_0: \mathbf{L}\boldsymbol{\beta} = \mathbf{h}$} \framesubtitle{$\mathbf{L}$ is $r \times p$, rows linearly independent} \pause {\LARGE \begin{eqnarray*} F &=& \frac{(\mathbf{L}\widehat{\boldsymbol{\beta}}-\mathbf{h})^\top (\mathbf{L}(\mathbf{X}^\top \mathbf{X})^{-1}\mathbf{L}^\top)^{-1} (\mathbf{L}\widehat{\boldsymbol{\beta}}-\mathbf{h})} {r \, MSE_F} \\ &&\\ & \stackrel{H_0}{\sim} & F(r,n-p) \end{eqnarray*} \pause } % End size Equal to full-restricted formula. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistics, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/2101f19} {\footnotesize \texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/2101f19}} \end{frame} \end{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{} %\framesubtitle{} \begin{itemize} \item \item \item \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # For scatterplot slides HS_GPA <- round(rnorm(100,80,7)); sort(HS_GPA) HS_GPA[HS_GPA>100] <- 100 Univ_GPA <- round(5 + .9 * HS_GPA + rnorm(100,0,5)); sort(Univ_GPA) cbind(HS_GPA,Univ_GPA) b <- coefficients(lm(Univ_GPA~HS_GPA)); b; b[1] x1 <- 60; x2 <- 97 y1 <- b[1] + b[2] * x1 y2 <- b[1] + b[2] * x2 plot(HS_GPA,Univ_GPA) lines(c(x1,x2),c(y1,y2)) % 3-d x1 = c(0,0,1,1); x2 = c(0,1,0,1) plot(x1,x2,pch=' ',xlab=expression(x[1]),ylab=expression(x[2])) text(1,0,'A'); text(0,1,'B'); text(0,0,'C')