\documentclass[11pt]{article} 
%\usepackage{amsbsy} % for \boldsymbol and \pmb 
%\usepackage{graphicx} % To include pdf files!
\usepackage{amsmath}
\usepackage{amsbsy}
\usepackage{amsfonts}
\usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue, citecolor=blue, urlcolor=blue]{hyperref} % For links
\usepackage{comment} % To comment out sections with \begin{comment} and \end{comment}


\oddsidemargin=-.25in                  % Good for US Letter paper
\evensidemargin=0in
\textwidth=6.3in
\topmargin=-0.5in
\headheight=0.1in
\headsep=0.1in
\textheight=9.4in

%\pagestyle{empty} % No page numbers

\begin{document}
%\enlargethispage*{1000 pt} 


\begin{center}   
{\Large \textbf{~~~~~STA2101 Formulas}}\\   % Version 2
\vspace{1 mm}
\end{center}

% Spectral decomposition, linear independence.
% MGFs
% Random vectors
% Linear model
% Distribution facts, incl x2 addup?
% Test stats and CIs


\noindent
\renewcommand{\arraystretch}{2.0}
\begin{tabular}{lll}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Linear Algebra %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\parbox{7 cm}{Columns of  $\mathbf{A}$ \emph{linearly dependent} means there is a vector $\mathbf{v} \neq \mathbf{0}$ with $\mathbf{Av} = \mathbf{0}$.} & ~~~~~ &
\parbox{7 cm}{Columns of  $\mathbf{A}$ \emph{linearly independent} means that $\mathbf{Av} = \mathbf{0}$ implies $\mathbf{v} = \mathbf{0}$.}
\\
\multicolumn{3}{l}{$\mathbf{A}$ \emph{positive definite} means  $\mathbf{v}^\top \mathbf{Av} > 0$ for all vectors $\mathbf{v} \neq \mathbf{0}$.} \\
$\boldsymbol{\Sigma} = \mathbf{P} \boldsymbol{\Lambda}\mathbf{P}^\top$
& ~~~~~ & 
$\boldsymbol{\Sigma}^{-1} = \mathbf{P} \boldsymbol{\Lambda}^{-1} \mathbf{P}^\top$
\\
$\boldsymbol{\Sigma}^{1/2} = \mathbf{P} \boldsymbol{\Lambda}^{1/2} \mathbf{P}^\top$
& ~~~~~ &
$\boldsymbol{\Sigma}^{-1/2} = \mathbf{P} \boldsymbol{\Lambda}^{-1/2} \mathbf{P}^\top$
\\

%\begin{tabular}{lll}
%%%%%%%%%%%%%%%%%%%%%%%% Large sample %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\multicolumn{3}{l}{If $\lim_{n \rightarrow \infty} E(T_n) = \theta$  and $\lim_{n \rightarrow \infty} Var(T_n) = 0$, then $T_n \stackrel{p}{\rightarrow} \theta$} \\ 
\multicolumn{3}{l}{If $\sqrt{n}(T_n-\mu)  \stackrel{d}{\rightarrow} T \sim N(0,\sigma^2)$, then 
$\sqrt{n}\left(g(T_n)-g(\mu)\right)  \stackrel{d}{\rightarrow} g^\prime(\mu)T  \sim N(0,g^\prime(\mu)^2\sigma^2)$} \\ 
If $\mathbf{T}_n \stackrel{d}{\rightarrow} \mathbf{T}$ and
   $\mathbf{Y}_n \stackrel{p}{\rightarrow} \mathbf{c}$, then
   $\left( \begin{array}{cc}  
                        \mathbf{T}_n \\ \mathbf{Y}_n
                        \end{array} \right)
                          \stackrel{d}{\rightarrow}
                        \left( \begin{array}{cc}  
                        \mathbf{T} \\ \mathbf{c}
                        \end{array} \right)$
& ~~~~~ &
$\sqrt{n}(\overline{\mathbf{x}}_n-\boldsymbol{\mu}) \stackrel{d}{\rightarrow} \mathbf{x} \sim 
N(\mathbf{0},\boldsymbol{\Sigma})$ 
\\
\multicolumn{3}{l}{Let $g: \mathbb{R}^d \rightarrow \mathbb{R}^k$ etc. If $\sqrt{n}(\mathbf{T}_n-\boldsymbol{\theta}) \stackrel{d}{\rightarrow} \mathbf{T}$, then 
$\sqrt{n}(g(\mathbf{T}_n)-g(\boldsymbol{\theta})) \stackrel{d}{\rightarrow} 
\mbox{\.{g}} (\boldsymbol{\theta}) \mathbf{T}$, where \.{g}$(\boldsymbol{\theta}) = 
\left[ \frac{\partial g_i}{\partial \theta_j} \right]_{k \times d}$}
\\
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Random vectors and MVN %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
$cov(\mathbf{w}) = 
E\left\{(\mathbf{w}-\boldsymbol{\mu}_w)(\mathbf{w}-\boldsymbol{\mu}_w)^\top\right\}$ 
& ~~~~~ & 
$cov(\mathbf{w,t}) = E\left\{ (\mathbf{w}-\boldsymbol{\mu}_w)
                             (\mathbf{t}-\boldsymbol{\mu}_t)^\top\right\}$
\\
$cov(\mathbf{w}) = E\{\mathbf{ww}^\top\} - \boldsymbol{\mu}_w\boldsymbol{\mu}_w^\top$
& ~~~~~ &
$cov(\mathbf{Aw}) = \mathbf{A}cov(\mathbf{w}) \mathbf{A}^\top$
\\

%%%%%%%%%%%%%%%%%%%%%%%% MVN %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

If $\mathbf{w} \sim N_p(\boldsymbol{\mu},\boldsymbol{\Sigma} )$, then
$\mathbf{Aw}+\mathbf{c} \sim N_r(\mathbf{A}\boldsymbol{\mu} + \mathbf{c},
                    \mathbf{A}\boldsymbol{\Sigma}\mathbf{A}^\top )$
& ~~~~~ & 
and $(\mathbf{w}-\boldsymbol{\mu})^\top
                 \boldsymbol{\Sigma}^{-1}(\mathbf{w}-\boldsymbol{\mu}) \sim \chi^2 (p)$
\\
\multicolumn{3}{l}{$L(\boldsymbol{\mu,\Sigma}) = |\boldsymbol{\Sigma}|^{-n/2} (2\pi)^{-np/2} 
    \exp -\frac{n}{2}\left\{ tr(\boldsymbol{\widehat{\Sigma}\Sigma}^{-1}) +
    (\overline{\mathbf{y}}-\boldsymbol{\mu})^\top \boldsymbol{\Sigma}^{-1} 
    (\overline{\mathbf{y}}-\boldsymbol{\mu}) \right\}$,  
where $\boldsymbol{\widehat{\Sigma}} = 
\frac{1}{n}\sum_{i=1}^n (\mathbf{y}_i-\overline{\mathbf{y}}) 
                        (\mathbf{y}_i-\overline{\mathbf{y}})^\top $} 
\\

%%%%%%%%%%%%%%%%%%%%%%%%%% Multiple Regression %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

$y_i = \beta_0 + \beta_1 x_{i,1} + \cdots + \beta_{p-1} x_{i,p-1} + \epsilon_i$
& ~~~~~ & 
$\epsilon_1, \ldots, \epsilon_n$ independent $N(0,\sigma^2)$
\\
$\mathbf{y} = \mathbf{X} \boldsymbol{\beta} + \boldsymbol{\epsilon}$ 
& ~~~~~ &
$\boldsymbol{\epsilon} \sim N_n(\mathbf{0},\sigma^2\mathbf{I}_n)$
\\
$\widehat{\boldsymbol{\beta}} = (\mathbf{X}^\top \mathbf{X})^{-1} 
                   \mathbf{X}^\top \mathbf{y} \sim N_p\left(\boldsymbol{\beta}, 
                   \sigma^2 (\mathbf{X}^\top \mathbf{X})^{-1}\right)$
& ~~~~~ &
$\widehat{\mathbf{y}} = \mathbf{X}\widehat{\boldsymbol{\beta}} = \mathbf{Hy}$, $\mathbf{e} = \mathbf{y} - \widehat{\mathbf{y}}$
\\ 
%%%%%%%%%% Large sample Tests %%%%%%%%%%
$G^2 = -2 \log \left(   
           \frac{\max_{\theta \in \Theta_0} L(\theta)}
                {\max_{\theta \in \Theta} L(\theta)}
           \right) = -2 \log \left( 
                     \frac{L(\widehat{\theta}_0)}{L(\widehat{\theta})}
                     \right)$
& ~~~~~ &
$W_n = (\mathbf{L}\widehat{\boldsymbol{\theta}}_n-\mathbf{h})^\top 
\left(\mathbf{L} \widehat{\mathbf{V}}_n \mathbf{L}^\top\right)^{-1} 
(\mathbf{L}\widehat{\boldsymbol{\theta}}_n-\mathbf{h})$ 
\\
\end{tabular}
\renewcommand{\arraystretch}{1.0}


\vspace{35mm}


\noindent
\begin{center}\begin{tabular}{l}
\hspace{6.5in} \\ \hline
\end{tabular}\end{center}
This formula sheet was prepared by  \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner},
Department of Statistics, University of Toronto. It is licensed under a 
\href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US}
     {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website:
\href{http://www.utstat.toronto.edu/~brunner/oldclass/2101f19} {\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/2101f19}}



\end{document}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{verbatim}
> # Chi-squared critical values
> df = 1:6
> Critical_Value = qchisq(0.95,df)
> cbind(df,Critical_Value)
     df Critical_Value
[1,]  1       3.841459
[2,]  2       5.991465
[3,]  3       7.814728
[4,]  4       9.487729
[5,]  5      11.070498
[6,]  6      12.591587
\end{verbatim} 



\vspace{30mm}

\begin{verbatim}
> df = 1:12
> Critical_Value = qchisq(0.95,df)
> cbind(df,Critical_Value)
      df Critical_Value
 [1,]  1       3.841459
 [2,]  2       5.991465
 [3,]  3       7.814728
 [4,]  4       9.487729
 [5,]  5      11.070498
 [6,]  6      12.591587
 [7,]  7      14.067140
 [8,]  8      15.507313
 [9,]  9      16.918978
[10,] 10      18.307038
[11,] 11      19.675138
[12,] 12      21.026070

\end{verbatim} 



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Univariate MGF %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
$M_y(t) = E(e^{yt})$ & ~~~~~ & $M_{ay}(t) = M_y(at)$ \\
$M_{y+a}(t) = e^{at}M_y(t)$ & ~~~~~ & 
$M_{_{\sum_{i=1}^n y_i}}(t) = \prod_{i=1}^n M_{y_i}(t)$
\\
$y \sim N(\mu,\sigma^2)$ means $M_{_y}(t) = e^{\mu t + \frac{1}{2}\sigma^2t^2}$
& ~~~~~ & 
$y \sim \chi^2(\nu)$ means $M_{_y}(t) = (1-2t)^{-\nu/2}$
\\
\multicolumn{3}{l}{If  $W=W_1+W_2$ with $W_1$ and $W_2$ independent, $W\sim\chi^2(\nu_1+\nu_2)$, $W_2\sim\chi^2(\nu_2)$ then $W_1\sim\chi^2(\nu_1)$} \\ 


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Simple regression %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

$y_i = \beta_0 + \beta_1 x_i + \epsilon_i$
& ~~~~~ & 
$\widehat{\beta}_0 = \overline{y} - \widehat{\beta}_1\overline{x}$
\\
$\widehat{\beta}_1 = \frac{\sum_{i=1}^n(x_i-\overline{x})(y_i-\overline{y})}
           {\sum_{i=1}^n(x_i-\overline{x})^2} 
     = \frac{\sum_{i=1}^n x_iy_i - n \, \overline{x} \, \overline{y}}
            {\sum_{i=1}^n x_i^2 - n\overline{x}^2}$
& ~~~~~ & 
$r = \frac{\sum_{i=1}^n (x_i-\overline{x})(y_i-\overline{y})}
               {\sqrt{\sum_{i=1}^n (x_i-\overline{x})^2} \sqrt{\sum_{i=1}^n (y_i-\overline{y})^2}}$
\\
\end{tabular}

\pagebreak

\begin{tabular}{lll}
%%%%%%%%%%%%%%%%%%%%%%%%%% Multiple Regression %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

$y_i = \beta_0 + \beta_1 x_{i,1} + \cdots + \beta_{p-1} x_{i,p-1} + \epsilon_i$
& ~~~~~ & 
$\epsilon_1, \ldots, \epsilon_n$ independent $N(0,\sigma^2)$
\\
$\mathbf{y} = \mathbf{X} \boldsymbol{\beta} + \boldsymbol{\epsilon}$ 
& ~~~~~ &
$\boldsymbol{\epsilon} \sim N_n(\mathbf{0},\sigma^2\mathbf{I}_n)$
\\
$\widehat{\boldsymbol{\beta}} = (\mathbf{X}^\top \mathbf{X})^{-1} 
                   \mathbf{X}^\top \mathbf{y} \sim N_p\left(\boldsymbol{\beta}, 
                   \sigma^2 (\mathbf{X}^\top \mathbf{X})^{-1}\right)$
& ~~~~~ &
$\widehat{\mathbf{y}} = \mathbf{X}\widehat{\boldsymbol{\beta}} = \mathbf{Hy}$, where 
$\mathbf{H} = \mathbf{X}(\mathbf{X}^\top \mathbf{X})^{-1} 
                   \mathbf{X}^\top $
\\ 
$\mathbf{e} = \mathbf{y} - \widehat{\mathbf{y}} = (\mathbf{I}-\mathbf{H})\mathbf{y}$, ~~ $\mathbf{X}^\top\mathbf{e} = \mathbf{0}$ 
& ~~~~~ &
$\widehat{\boldsymbol{\beta}}$ and $\textbf{e}$ are independent under normality.
\\
$\sum_{i=1}^n(y_i-\overline{y})^2 = \sum_{i=1}^n(y_i-\widehat{y}_i)^2 + \sum_{i=1}^n(\widehat{y}_i-\overline{y})^2$
& ~~~~~ &
$SST=SSE+SSR$ and $R^2 = \frac{SSR}{SST}$
\\
$\frac{SSE}{\sigma^2} = \frac{\textbf{e}^\top \textbf{e}}{\sigma^2}  \sim \chi^2(n-p)$
& ~~~~~ &
$MSE = \frac{SSE}{n-p}$
\\
$T = \frac{Z}{\sqrt{W/\nu}} \sim t(\nu)$ 
& ~~~~~ & 
$F = \frac{W_1/\nu_1}{W_2/\nu_2} \sim F(\nu_1,\nu_2)$ 
\\
\multicolumn{3}{l}{Under $H_0:\mathbf{L}\boldsymbol{\beta}=\mathbf{h}$, 
$F^* = \frac{(\mathbf{L}\widehat{\boldsymbol{\beta}}-\mathbf{h})^\top
            (\mathbf{L}(\mathbf{X}^\top \mathbf{X})^{-1}\mathbf{L}^\top)^{-1}
            (\mathbf{L}\widehat{\boldsymbol{\beta}}-\mathbf{h})} {r \, MSE} 
= \frac{SSR_F-SSR_R}{r \, MSE_F} \sim F(r,n-p)$} \\
$\pi(\theta|x) = \frac{f(x|\theta)\pi(\theta)}{\int f(x|t) \pi(t) \, dt}$ 
& ~~~~~ & 
$E(L(d,x)|X=x) = \int L\left(d(x),\theta\right) \pi(\theta|x) \, d\theta$ \\
\end{tabular}

\vspace{2mm}

\begin{tabular}{|l|l|c|c|} \hline
\textbf{Distribution} & \multicolumn{1}{c|}{\textbf{Density or pmf} } & % Dumb trick to center
 \textbf{Mean} & \textbf{Variance} \\ \hline
Exponential & $f(x) = \theta e^{-\theta x}$ for $x>0$ & $1/\theta$ & $1/\theta^2$ \\ \hline
Gamma  &  $f(x) = \frac{\beta^\alpha}{\Gamma(\alpha)} e^{-\beta x} \, x^{\alpha-1}$ for $x>0$  &
$\alpha/\beta$ & $\alpha/\beta^2$  \\ \hline
Exponential &  $f(x) = \theta e^{-\theta x}$ for $x>0$ & $1/\theta$ & $1/\theta^2$ \\ \hline
Normal & $f(x) =  \frac{\tau^{1/2}}{\sqrt{2\pi}}e^{-\frac{\tau}{2}(x-\mu)^2}$ & 
$\mu$ & $1/\tau$ \\ \hline
Beta & $f(x) = \frac{\Gamma(\alpha+\beta)}{\Gamma(\alpha)\Gamma(\beta)} \, x^{\alpha-1} (1-x)^{\beta-1}$ for $0<x<1$ & $\frac{\alpha}{\alpha+\beta}$ &  
$\frac{\alpha\beta}{(\alpha+\beta)^2(\alpha+\beta+1)}$  \\ \hline
Binomial &  $p(x) = \binom{n}{x}\theta^x(1-\theta)^{n-x}$ for $x = 0, 1, \ldots, n$ &
$n\theta$ & $n\theta(1-\theta)$   \\ \hline
Poisson & $p(x) = \frac{e^{-\lambda}\, \lambda^x}{x!}$ for $x = 0, 1, \ldots $ &
$\lambda$ & $\lambda$   \\ \hline 
\end{tabular}

\vspace{5mm}

\begin{tabular}{lll}
%%%%%%%%%%%%%%%%%%%%%%%%% Logistic regression %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
$ \log\left(\frac{\pi_i}{1-\pi_i} \right) = 
    \beta_0 + \beta_1 x_{i,1} + \ldots + \beta_{p-1} x_{i,p-1}$
& ~~~~~ &
   $\pi_i  =  \frac{e^{\beta_0 + \beta_1 x_{i,1} + \ldots + \beta_{p-1} x_{i,p-1}}}
                    {1+e^{\beta_0 + \beta_1 x_{i,1} + \ldots + \beta_{p-1} x_{i,p-1}}}$
\\
%%%%%%%%%%%%%%%%%%%%%%%%% Poisson regression %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
$ \log(\lambda_i) = 
    \beta_0 + \beta_1 x_{i,1} + \ldots + \beta_{p-1} x_{i,p-1}$
& ~~~~~ &
\\   
%%%%%%%%%%%%%%%%%%%%%%%%% Multinomial logit models %%%%%%%%%%%%%%%%%%%%%%%%
\parbox{7 cm}{ 
\begin{eqnarray*}
\log\left(\frac{\pi_1}{\pi_3} \right ) & = & 
     \beta_{0,1} + \beta_{1,1} x_1 + \ldots + \beta_{p-1,1} x_{p-1}  = L_1 \\ \\
\log\left(\frac{\pi_2}{\pi_3} \right ) & = & 
     \beta_{0,2} + \beta_{1,2} x_1 + \ldots + \beta_{p-1,2} x_{p-1} = L_2
\end{eqnarray*}
} % End parbox
& ~~~~~ &
\parbox{7 cm}{ 
\begin{eqnarray*}
\pi_1     & = & \frac{e^{L_1}}{1+e^{L_1}+e^{L_2}} \\ \\
\pi_2     & = & \frac{e^{L_2}}{1+e^{L_1}+e^{L_2}} \\ \\
\pi_3     & = & \frac{1}{1+e^{L_1}+e^{L_2}}
\end{eqnarray*}
} % End parbox
\\  
% Still okay?
\end{tabular}
\renewcommand{\arraystretch}{1.0}

\pagebreak
\begin{verbatim}
> # Chi-squared critical values
> df = 1:6
> Critical_Value = qchisq(0.95,df)
> cbind(df,Critical_Value)
     df Critical_Value
[1,]  1       3.841459
[2,]  2       5.991465
[3,]  3       7.814728
[4,]  4       9.487729
[5,]  5      11.070498
[6,]  6      12.591587
\end{verbatim} 



%%%%%%%%%%%%%%%%%%%%%%%%% Logistic regression %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%% Poisson regression %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%% Multinomial logit models %%%%%%%%%%%%%%%%%%%%%%%%