\documentclass[11pt]{article} %\usepackage{amsbsy} % for \boldsymbol and \pmb %\usepackage{graphicx} % To include pdf files! \usepackage{amsmath} \usepackage{amsbsy} \usepackage{amsfonts} \usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue, citecolor=blue, urlcolor=blue]{hyperref} % For links \usepackage{comment} % To comment out sections with \begin{comment} and \end{comment} \oddsidemargin=-.25in % Good for US Letter paper \evensidemargin=0in \textwidth=6.3in \topmargin=-0.5in \headheight=0.1in \headsep=0.1in \textheight=9.4in %\pagestyle{empty} % No page numbers \begin{document} %\enlargethispage*{1000 pt} \begin{center} {\Large \textbf{~~~~~STA2101 Formulas}}\\ % Version 2 \vspace{1 mm} \end{center} % Spectral decomposition, linear independence. % MGFs % Random vectors % Linear model % Distribution facts, incl x2 addup? % Test stats and CIs \noindent \renewcommand{\arraystretch}{2.0} \begin{tabular}{lll} %%%%%%%%%%%%%%%%%%%%%%%%%%%%% Linear Algebra %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \parbox{7 cm}{Columns of $\mathbf{A}$ \emph{linearly dependent} means there is a vector $\mathbf{v} \neq \mathbf{0}$ with $\mathbf{Av} = \mathbf{0}$.} & ~~~~~ & \parbox{7 cm}{Columns of $\mathbf{A}$ \emph{linearly independent} means that $\mathbf{Av} = \mathbf{0}$ implies $\mathbf{v} = \mathbf{0}$.} \\ \multicolumn{3}{l}{$\mathbf{A}$ \emph{positive definite} means $\mathbf{v}^\top \mathbf{Av} > 0$ for all vectors $\mathbf{v} \neq \mathbf{0}$.} \\ $\boldsymbol{\Sigma} = \mathbf{P} \boldsymbol{\Lambda}\mathbf{P}^\top$ & ~~~~~ & $\boldsymbol{\Sigma}^{-1} = \mathbf{P} \boldsymbol{\Lambda}^{-1} \mathbf{P}^\top$ \\ $\boldsymbol{\Sigma}^{1/2} = \mathbf{P} \boldsymbol{\Lambda}^{1/2} \mathbf{P}^\top$ & ~~~~~ & $\boldsymbol{\Sigma}^{-1/2} = \mathbf{P} \boldsymbol{\Lambda}^{-1/2} \mathbf{P}^\top$ \\ %\begin{tabular}{lll} %%%%%%%%%%%%%%%%%%%%%%%% Large sample %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \multicolumn{3}{l}{If $\lim_{n \rightarrow \infty} E(T_n) = \theta$ and $\lim_{n \rightarrow \infty} Var(T_n) = 0$, then $T_n \stackrel{p}{\rightarrow} \theta$} \\ \multicolumn{3}{l}{If $\sqrt{n}(T_n-\mu) \stackrel{d}{\rightarrow} T \sim N(0,\sigma^2)$, then $\sqrt{n}\left(g(T_n)-g(\mu)\right) \stackrel{d}{\rightarrow} g^\prime(\mu)T \sim N(0,g^\prime(\mu)^2\sigma^2)$} \\ If $\mathbf{T}_n \stackrel{d}{\rightarrow} \mathbf{T}$ and $\mathbf{Y}_n \stackrel{p}{\rightarrow} \mathbf{c}$, then $\left( \begin{array}{cc} \mathbf{T}_n \\ \mathbf{Y}_n \end{array} \right) \stackrel{d}{\rightarrow} \left( \begin{array}{cc} \mathbf{T} \\ \mathbf{c} \end{array} \right)$ & ~~~~~ & $\sqrt{n}(\overline{\mathbf{x}}_n-\boldsymbol{\mu}) \stackrel{d}{\rightarrow} \mathbf{x} \sim N(\mathbf{0},\boldsymbol{\Sigma})$ \\ \multicolumn{3}{l}{Let $g: \mathbb{R}^d \rightarrow \mathbb{R}^k$ etc. If $\sqrt{n}(\mathbf{T}_n-\boldsymbol{\theta}) \stackrel{d}{\rightarrow} \mathbf{T}$, then $\sqrt{n}(g(\mathbf{T}_n)-g(\boldsymbol{\theta})) \stackrel{d}{\rightarrow} \mbox{\.{g}} (\boldsymbol{\theta}) \mathbf{T}$, where \.{g}$(\boldsymbol{\theta}) = \left[ \frac{\partial g_i}{\partial \theta_j} \right]_{k \times d}$} \\ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Random vectors and MVN %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% $cov(\mathbf{w}) = E\left\{(\mathbf{w}-\boldsymbol{\mu}_w)(\mathbf{w}-\boldsymbol{\mu}_w)^\top\right\}$ & ~~~~~ & $cov(\mathbf{w,t}) = E\left\{ (\mathbf{w}-\boldsymbol{\mu}_w) (\mathbf{t}-\boldsymbol{\mu}_t)^\top\right\}$ \\ $cov(\mathbf{w}) = E\{\mathbf{ww}^\top\} - \boldsymbol{\mu}_w\boldsymbol{\mu}_w^\top$ & ~~~~~ & $cov(\mathbf{Aw}) = \mathbf{A}cov(\mathbf{w}) \mathbf{A}^\top$ \\ %%%%%%%%%%%%%%%%%%%%%%%% MVN %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% If $\mathbf{w} \sim N_p(\boldsymbol{\mu},\boldsymbol{\Sigma} )$, then $\mathbf{Aw}+\mathbf{c} \sim N_r(\mathbf{A}\boldsymbol{\mu} + \mathbf{c}, \mathbf{A}\boldsymbol{\Sigma}\mathbf{A}^\top )$ & ~~~~~ & and $(\mathbf{w}-\boldsymbol{\mu})^\top \boldsymbol{\Sigma}^{-1}(\mathbf{w}-\boldsymbol{\mu}) \sim \chi^2 (p)$ \\ \multicolumn{3}{l}{$L(\boldsymbol{\mu,\Sigma}) = |\boldsymbol{\Sigma}|^{-n/2} (2\pi)^{-np/2} \exp -\frac{n}{2}\left\{ tr(\boldsymbol{\widehat{\Sigma}\Sigma}^{-1}) + (\overline{\mathbf{y}}-\boldsymbol{\mu})^\top \boldsymbol{\Sigma}^{-1} (\overline{\mathbf{y}}-\boldsymbol{\mu}) \right\}$, where $\boldsymbol{\widehat{\Sigma}} = \frac{1}{n}\sum_{i=1}^n (\mathbf{y}_i-\overline{\mathbf{y}}) (\mathbf{y}_i-\overline{\mathbf{y}})^\top $} \\ %%%%%%%%%%%%%%%%%%%%%%%%%% Multiple Regression %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% $y_i = \beta_0 + \beta_1 x_{i,1} + \cdots + \beta_{p-1} x_{i,p-1} + \epsilon_i$ & ~~~~~ & $\epsilon_1, \ldots, \epsilon_n$ independent $N(0,\sigma^2)$ \\ $\mathbf{y} = \mathbf{X} \boldsymbol{\beta} + \boldsymbol{\epsilon}$ & ~~~~~ & $\boldsymbol{\epsilon} \sim N_n(\mathbf{0},\sigma^2\mathbf{I}_n)$ \\ $\widehat{\boldsymbol{\beta}} = (\mathbf{X}^\top \mathbf{X})^{-1} \mathbf{X}^\top \mathbf{y} \sim N_p\left(\boldsymbol{\beta}, \sigma^2 (\mathbf{X}^\top \mathbf{X})^{-1}\right)$ & ~~~~~ & $\widehat{\mathbf{y}} = \mathbf{X}\widehat{\boldsymbol{\beta}} = \mathbf{Hy}$, $\mathbf{e} = \mathbf{y} - \widehat{\mathbf{y}}$ \\ %%%%%%%%%% Large sample Tests %%%%%%%%%% $G^2 = -2 \log \left( \frac{\max_{\theta \in \Theta_0} L(\theta)} {\max_{\theta \in \Theta} L(\theta)} \right) = -2 \log \left( \frac{L(\widehat{\theta}_0)}{L(\widehat{\theta})} \right)$ & ~~~~~ & $W_n = (\mathbf{L}\widehat{\boldsymbol{\theta}}_n-\mathbf{h})^\top \left(\mathbf{L} \widehat{\mathbf{V}}_n \mathbf{L}^\top\right)^{-1} (\mathbf{L}\widehat{\boldsymbol{\theta}}_n-\mathbf{h})$ \\ \end{tabular} \renewcommand{\arraystretch}{1.0} \vspace{35mm} \noindent \begin{center}\begin{tabular}{l} \hspace{6.5in} \\ \hline \end{tabular}\end{center} This formula sheet was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistics, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \href{http://www.utstat.toronto.edu/~brunner/oldclass/2101f19} {\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/2101f19}} \end{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{verbatim} > # Chi-squared critical values > df = 1:6 > Critical_Value = qchisq(0.95,df) > cbind(df,Critical_Value) df Critical_Value [1,] 1 3.841459 [2,] 2 5.991465 [3,] 3 7.814728 [4,] 4 9.487729 [5,] 5 11.070498 [6,] 6 12.591587 \end{verbatim} \vspace{30mm} \begin{verbatim} > df = 1:12 > Critical_Value = qchisq(0.95,df) > cbind(df,Critical_Value) df Critical_Value [1,] 1 3.841459 [2,] 2 5.991465 [3,] 3 7.814728 [4,] 4 9.487729 [5,] 5 11.070498 [6,] 6 12.591587 [7,] 7 14.067140 [8,] 8 15.507313 [9,] 9 16.918978 [10,] 10 18.307038 [11,] 11 19.675138 [12,] 12 21.026070 \end{verbatim} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Univariate MGF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% $M_y(t) = E(e^{yt})$ & ~~~~~ & $M_{ay}(t) = M_y(at)$ \\ $M_{y+a}(t) = e^{at}M_y(t)$ & ~~~~~ & $M_{_{\sum_{i=1}^n y_i}}(t) = \prod_{i=1}^n M_{y_i}(t)$ \\ $y \sim N(\mu,\sigma^2)$ means $M_{_y}(t) = e^{\mu t + \frac{1}{2}\sigma^2t^2}$ & ~~~~~ & $y \sim \chi^2(\nu)$ means $M_{_y}(t) = (1-2t)^{-\nu/2}$ \\ \multicolumn{3}{l}{If $W=W_1+W_2$ with $W_1$ and $W_2$ independent, $W\sim\chi^2(\nu_1+\nu_2)$, $W_2\sim\chi^2(\nu_2)$ then $W_1\sim\chi^2(\nu_1)$} \\ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Simple regression %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% $y_i = \beta_0 + \beta_1 x_i + \epsilon_i$ & ~~~~~ & $\widehat{\beta}_0 = \overline{y} - \widehat{\beta}_1\overline{x}$ \\ $\widehat{\beta}_1 = \frac{\sum_{i=1}^n(x_i-\overline{x})(y_i-\overline{y})} {\sum_{i=1}^n(x_i-\overline{x})^2} = \frac{\sum_{i=1}^n x_iy_i - n \, \overline{x} \, \overline{y}} {\sum_{i=1}^n x_i^2 - n\overline{x}^2}$ & ~~~~~ & $r = \frac{\sum_{i=1}^n (x_i-\overline{x})(y_i-\overline{y})} {\sqrt{\sum_{i=1}^n (x_i-\overline{x})^2} \sqrt{\sum_{i=1}^n (y_i-\overline{y})^2}}$ \\ \end{tabular} \pagebreak \begin{tabular}{lll} %%%%%%%%%%%%%%%%%%%%%%%%%% Multiple Regression %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% $y_i = \beta_0 + \beta_1 x_{i,1} + \cdots + \beta_{p-1} x_{i,p-1} + \epsilon_i$ & ~~~~~ & $\epsilon_1, \ldots, \epsilon_n$ independent $N(0,\sigma^2)$ \\ $\mathbf{y} = \mathbf{X} \boldsymbol{\beta} + \boldsymbol{\epsilon}$ & ~~~~~ & $\boldsymbol{\epsilon} \sim N_n(\mathbf{0},\sigma^2\mathbf{I}_n)$ \\ $\widehat{\boldsymbol{\beta}} = (\mathbf{X}^\top \mathbf{X})^{-1} \mathbf{X}^\top \mathbf{y} \sim N_p\left(\boldsymbol{\beta}, \sigma^2 (\mathbf{X}^\top \mathbf{X})^{-1}\right)$ & ~~~~~ & $\widehat{\mathbf{y}} = \mathbf{X}\widehat{\boldsymbol{\beta}} = \mathbf{Hy}$, where $\mathbf{H} = \mathbf{X}(\mathbf{X}^\top \mathbf{X})^{-1} \mathbf{X}^\top $ \\ $\mathbf{e} = \mathbf{y} - \widehat{\mathbf{y}} = (\mathbf{I}-\mathbf{H})\mathbf{y}$, ~~ $\mathbf{X}^\top\mathbf{e} = \mathbf{0}$ & ~~~~~ & $\widehat{\boldsymbol{\beta}}$ and $\textbf{e}$ are independent under normality. \\ $\sum_{i=1}^n(y_i-\overline{y})^2 = \sum_{i=1}^n(y_i-\widehat{y}_i)^2 + \sum_{i=1}^n(\widehat{y}_i-\overline{y})^2$ & ~~~~~ & $SST=SSE+SSR$ and $R^2 = \frac{SSR}{SST}$ \\ $\frac{SSE}{\sigma^2} = \frac{\textbf{e}^\top \textbf{e}}{\sigma^2} \sim \chi^2(n-p)$ & ~~~~~ & $MSE = \frac{SSE}{n-p}$ \\ $T = \frac{Z}{\sqrt{W/\nu}} \sim t(\nu)$ & ~~~~~ & $F = \frac{W_1/\nu_1}{W_2/\nu_2} \sim F(\nu_1,\nu_2)$ \\ \multicolumn{3}{l}{Under $H_0:\mathbf{L}\boldsymbol{\beta}=\mathbf{h}$, $F^* = \frac{(\mathbf{L}\widehat{\boldsymbol{\beta}}-\mathbf{h})^\top (\mathbf{L}(\mathbf{X}^\top \mathbf{X})^{-1}\mathbf{L}^\top)^{-1} (\mathbf{L}\widehat{\boldsymbol{\beta}}-\mathbf{h})} {r \, MSE} = \frac{SSR_F-SSR_R}{r \, MSE_F} \sim F(r,n-p)$} \\ $\pi(\theta|x) = \frac{f(x|\theta)\pi(\theta)}{\int f(x|t) \pi(t) \, dt}$ & ~~~~~ & $E(L(d,x)|X=x) = \int L\left(d(x),\theta\right) \pi(\theta|x) \, d\theta$ \\ \end{tabular} \vspace{2mm} \begin{tabular}{|l|l|c|c|} \hline \textbf{Distribution} & \multicolumn{1}{c|}{\textbf{Density or pmf} } & % Dumb trick to center \textbf{Mean} & \textbf{Variance} \\ \hline Exponential & $f(x) = \theta e^{-\theta x}$ for $x>0$ & $1/\theta$ & $1/\theta^2$ \\ \hline Gamma & $f(x) = \frac{\beta^\alpha}{\Gamma(\alpha)} e^{-\beta x} \, x^{\alpha-1}$ for $x>0$ & $\alpha/\beta$ & $\alpha/\beta^2$ \\ \hline Exponential & $f(x) = \theta e^{-\theta x}$ for $x>0$ & $1/\theta$ & $1/\theta^2$ \\ \hline Normal & $f(x) = \frac{\tau^{1/2}}{\sqrt{2\pi}}e^{-\frac{\tau}{2}(x-\mu)^2}$ & $\mu$ & $1/\tau$ \\ \hline Beta & $f(x) = \frac{\Gamma(\alpha+\beta)}{\Gamma(\alpha)\Gamma(\beta)} \, x^{\alpha-1} (1-x)^{\beta-1}$ for $0 # Chi-squared critical values > df = 1:6 > Critical_Value = qchisq(0.95,df) > cbind(df,Critical_Value) df Critical_Value [1,] 1 3.841459 [2,] 2 5.991465 [3,] 3 7.814728 [4,] 4 9.487729 [5,] 5 11.070498 [6,] 6 12.591587 \end{verbatim} %%%%%%%%%%%%%%%%%%%%%%%%% Logistic regression %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%% Poisson regression %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%% Multinomial logit models %%%%%%%%%%%%%%%%%%%%%%%%