\documentclass[11pt]{article} 
%\usepackage{amsbsy} % for \boldsymbol and \pmb 
\usepackage{graphicx} % To include pdf files!
\usepackage{amsmath}
\usepackage{amsbsy}
\usepackage{amsfonts}
\usepackage{euscript} % for \EuScript
\usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue, citecolor=blue, urlcolor=blue]{hyperref} % For links

\oddsidemargin=-.25in                  % Good for US Letter paper
\evensidemargin=0in
\textwidth=6.3in
\topmargin=-0.7in
\headheight=0.1in
\headsep=0.1in
\textheight=9.4in

\pagestyle{empty} % No page numbers

\begin{document}
% \enlargethispage*{1000 pt} 


\begin{center}   
{\Large \textbf{\emph{Revised} STA 256 Formulas}}\\   % 
\vspace{1 mm}
\end{center}

\noindent
\renewcommand{\arraystretch}{1.5}
\begin{tabular}{llll}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Math %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
$ \displaystyle  \sum_{k=j}^{\infty} a^k = \frac{a^j}{1-a}$ &
$  \displaystyle \sum_{k=0}^{\infty} \frac{x^k}{k!} = e^x$ 
& ~~~~~ &
$\displaystyle (a+b)^n = \sum_{k=0}^n \binom{n}{k} a^k b^{n-k}$ 
\\
\multicolumn{2}{l} { $  \lim_{x \rightarrow c} \frac{g(x)}{h(x)} =
\lim_{x \rightarrow c} \frac{g^\prime(x)}{h^\prime(x)}$ if $\frac{0}{0}$ or $\frac{\infty}{\infty}$ etc.} & & \\
$\displaystyle \lim_{n \rightarrow \infty}\left(1 + \frac{x}{n}\right)^n = e^x$&
$\Gamma(\alpha) = \int_0^\infty e^{-t} t^{\alpha-1} \, dt$
& ~~~~~ &
$\Gamma(\alpha+1) = \alpha \, \Gamma(\alpha)$ \\
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Sets %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Distributive Laws of Sets: &
$A \cap \left(\cup_{j=1}^\infty B_j\right) = \cup_{j=1}^\infty (A \cap B_j)$
& ~~~~~ &
$A \cup \left(\cap_{j=1}^\infty B_j\right) = \cap_{j=1}^\infty (A \cup B_j)$
\\
De Morgan Laws: &
$(\cap_{j=1}^\infty A_j)^c = \cup_{j=1}^\infty A_j^c$
& ~~~~~ &
$(\cup_{j=1}^\infty A_j)^c = \cap_{j=1}^\infty A_j^c$
\\
\end{tabular}
\renewcommand{\arraystretch}{1.0}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Foundations %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\vspace{2mm}
\noindent
\hspace{3mm}Axioms: % Use hspace to line up with tabular -- oh well.
(1) $P(\Omega)=1$ \hspace{5mm} (2) $P(A) \geq 0$ for any $A \subset \Omega$

\hspace{12mm} (3) If $A_1, A_2 \ldots$ are disjoint, 
    $P\left( \cup_{k=1}^\infty A_k \right) = \sum_{k=1}^\infty P(A_k)$

~ %\vspace{1mm}

\noindent
\hspace{3mm}Properties: \textbf{A}. $P(A^c) = 1-P(A)$ \hspace{4mm} \textbf{C}. If $A \subseteq B$ then $P(A) \leq P(B)$ \hspace{4mm} 

\hspace{17mm} \textbf{D}. $P(A \cup B) = P(A)+P(B)-P(A\cap B)$

\noindent
\renewcommand{\arraystretch}{1.75}
\begin{tabular}{lll}
%Properties: &  &  \\
%\textbf{A}. $P(A^c) = 1-P(A)$ & \textbf{C}. If $A \subseteq B$ then $P(A) \leq P(B)$ &
%\textbf{D}. $P(A \cup B) = P(A)+P(B)-P(A\cap B)$ 
%\\
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Counting %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
$_nP_r = \frac{n!}{(n-r)!} $ & $\binom{n}{r} = \frac{n!}{r! \, (n-r)!} $  &
$\binom{n}{n_1~\cdots~n_k}=\frac{n!}{n_1!~\cdots~n_k!}$ 
\\
%%%%%%%%%%%%% Conditional probability and independence %%%%%%%%%%%%%
$P(A|B) \stackrel{def}{=} \frac{P(A\cap B)}{P(B)}$ & $P(A\cap B) = P(A|B)P(B)$ & 
$P(A) = \sum_{k=1}^\infty P(A|B_k)P(B_k)$
\\
\multicolumn{2}{l} {$P(B_j|A) = \frac{P(A|B_j)P(B_j)}{\sum_{k=1}^\infty P(A|B_k)P(B_k)}$} &
$P(B|A) = \frac{P(A|B)P(B)}{P(A|B)P(B) + P(A|B^c)P(B^c)}$.
\\
\multicolumn{2}{l} {$A$ and $B$ independent means $P(A \cap B)=P(A)P(B)$.} &
\\
\end{tabular}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Common Distributions %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\noindent
\begin{tabular}{|l|l|l|} \hline
\textbf{Distribution} & \textbf{Density or probability mass function} & \textbf{MGF} $$M(t)  \\ \hline
Bernoulli ($p$)  & $p(x) = p^x(1-p)^{1-x}$ for $x=0,1$ & $pe^t + 1-p$ \\ \hline
Binomial ($n,p$) & $p(k) = \binom{n}{k}p^k(1-p)^{n-k}$ 
    for $k = 0, 1, \ldots, n$ & $(pe^t + 1-p)^n$ \\ \hline
Geometric ($p$) & $p(k) = (1-p)^{k-1}\,p$ for $k = 1, 2, \ldots$  & $p(e^{-t}+p-1)^{-1}$ \\ \hline
Negative Binomial ($r,p$) & $\binom{k-1}{r-1}  p^r \, (1-p)^{k-r}$
    for $k = r, r+1, \ldots $  & ~~~~~ \\ \hline
Hypergeometric ($n,r,m$)  & $p(k) = \frac{ \binom{r}{k}\binom{n-r}{m-k} } {\binom{n}{m}}$
    for $k = 0, \ldots r$ & ~~~~~ \\ \hline
Poisson ($\lambda$) & $p(k) = \frac{e^{-\lambda}\, \lambda^k}{k!}$ for $k = 0, 1, \ldots $ & $e^{\lambda(e^t-1)}$ \\ \hline
Multinomial ($n,p_1, \ldots, p_r$) &  $p(n_1, \ldots, n_r) = 
    \binom{n}{n_1 \cdots n_r } p_1^{n_1} \cdots p_r^{n_r}$   & ~~~~~   \\ \hline
Uniform $(a,b)$ & $f(x) = \frac{1}{b-a}$ for $a \leq x \leq b$  & $\frac{e^{bt}-e^{at}}{t(b-a)}$ for $t\neq 0$ \\ \hline
Exponential ($\lambda$) & $f(x) = \lambda e^{-\lambda x}$ for $x \geq 0$ 
%   \hspace{5mm} $F(x) = 1-e^{-\lambda x}$ for $x \geq 0$
 & $(1-\frac{t}{\lambda})^{-1}$ \\ \hline
Gamma ($\alpha,\lambda$) & 
$f(x) = \frac{\lambda^\alpha}{\Gamma(\alpha)} e^{-\lambda x} \, x^{\alpha-1}$ for $x \geq 0$ & $(1-\frac{t}{\lambda})^{-\alpha}$ \\ \hline
Normal ($\mu,\sigma$) & $\frac{1}{\sigma \sqrt{2\pi}}\exp - \left\{{\frac{(x-\mu)^2}{2\sigma^2}}\right\}$ \hspace{10mm} $\frac{X-\mu}{\sigma} \sim N(0,1)$ & $e^{\mu t+\frac{1}{2}\sigma^2t^2}$ \\ \hline
Chi-squared ($\nu$) & $\frac{1}{2^\frac{\nu}{2}\Gamma(\frac{\nu}{2})} e^{-\frac{x}{2}} \, x^{\frac{\nu}{2}-1}$ for $x \geq 0$ & $(1-2t)^{-\frac{\nu}{2}}$ \\ \hline 
Beta  & 
$f(x) = \frac{\Gamma(\alpha+\beta)}{\Gamma(\alpha)\Gamma(\beta)} \, x^{\alpha-1} (1-x)^{\beta-1}$
for $0 \leq x \leq 1$ & ~~~~~ \\ \hline
\end{tabular}

\pagebreak 
\enlargethispage*{1000 pt} 
\begin{tabular}{lcl}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Joint Distributions %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
$F_x(x) \stackrel{def}{=} P(X \leq x)$ & ~ & $F_{xy}(x,y) \stackrel{def}{=} P(X \leq x, Y \leq y)$ \\
$ F_{xy}(x,y)   = \int_{-\infty}^x \, \int_{-\infty}^y f_{xy}(s,t) \, dt \, ds$ 
& ~ & 
$f_{xy}(x,y) = \frac{\partial^2}{\partial x \partial y} F_{xy}(x,y)$  \\
$F_x(x) = \lim_{y \rightarrow \infty} F_{xy}(x,y)$
& ~ & 
$f_x(x) = \int_{-\infty}^\infty f_{xy}(x,y) \, dy$ \\
$p_x(x) \stackrel{def}{=} P(X=x))$ & ~ & $p_x(x) = \sum_y p_{xy}(x,y)$ \\
Independence: $ F_{xy}(x,y) =F_x(x)F_y(y)$
& $\Leftrightarrow$ & 
$p_{xy}(x,y) = p_x(x) \, p_y(y)$ or $f_{xy}(x,y) = f_x(x) \, f_y(y)$ \\
$p_{y|x}(y|x) \stackrel{def}{=} \frac{p_{x,y}(x,y)}{p_x(x)}$
& ~ & 
$f_{x|y}(x|y) \stackrel{def}{=} \frac{f_{x,y}(x,y)}{f_y(y)}$ \\
\multicolumn{3}{l} {Convolution formulas: If $X$ and $Y$ are independent random variables, and $Z = X + Y$ } \\
$ p_z(z) = \sum_x p_x(x) p_y(z-x)$
& ~ & 
$f_z(z) = \int_{-\infty}^\infty  f_x(x)f_y(z-x)  \, dx$ \\
\multicolumn{3}{l} {Jacobian formula: $Y_1 = g_1(X_1,X_2)$ and $Y_2 = g_2(X_1,X_2)$}  \\
\multicolumn{3}{l} {
    $ f_{y_1y_2}(y_1,y_2) = f_{x_1x_2}(\, x_1(y_1,y_2),x_2(y_1,y_2) \,) \cdot abs
    \renewcommand{\arraystretch}{1.5}
    \left| \begin{array}{cc}
    \frac{\partial x_1}{\partial y_1} & \frac{\partial x_1}{\partial y_2} \\
    \frac{\partial x_2}{\partial y_1} & \frac{\partial x_2}{\partial y_2}
    \end{array}\right|
    = f_{x_1x_2}(\, x_1(y_1,y_2),x_2(y_1,y_2) \,) 
    \left|\frac{\partial x_1}{\partial y_1} \frac{\partial x_2}{\partial y_2} - 
    \frac{\partial x_1}{\partial y_2} \frac{\partial x_2}{\partial y_1}\right|$ } \\
$dx \, dy = r \, dr \, d\theta$ & ~ & \\
%%%%%%%%%%%%%%%%%%%%%%%%%%%% Expected value, variance and covariance %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
$E(X) \stackrel{def}{=} \sum_x x \, p_x(x)$ or $\int_{-\infty}^\infty x \, f_x(x) \, dx$
& ~ & 
$E(g(X)) = \sum_x g(x) \, p_x(x)$ or $\int_{-\infty}^\infty g(x) \, f_x(x) \, dx$    \\
$E\left(\sum_{i=1}^n a_iX_i \right) = \sum_{i=1}^n a_iE(X_i)$ 
& ~ &  $E(X) = E(E[X|Y])$ \\
$Var(X) \stackrel{def}{=} E\left( (X-\mu)^2 \right)$ & ~ &  $Var(X) = E(X^2)-[E(X)]^2$ \\
$Var(a+bX) = b^2Var(X)$ 
& ~ &  $Var(aX+bY) = a^2Var(X)+b^2Var(Y)+2abCov(X,Y)$  \\
$Cov(X,Y) \stackrel{def}{=} E[(X-\mu_x)(Y-\mu_y)]$ 
& ~ & $Cov(X,Y) = E(XY) - E(X)E(Y)$ \\
$Cov(a+bX,c+dY) = bd \, Cov(X,Y)$ 
& ~ &  
$Cov(X,aY+bZ) = a \, Cov(X,Y) + b \, Cov(X,Z)$  \\
\multicolumn{3}{l} {$Var\left(\sum_{i=1}^n a_iX_i \right) = \sum_{i=1}^n a_i^2Var(X_i) 
\, + \, \sum\sum_{i \neq j} a_ib_j Cov(X_i,X_j)$ } \\
Markov's inequality & ~ & Chebyshev's inequality \\
If $P(Y \geq 0)=1$, then $P(Y \geq t) \leq E(Y)/t$
& ~ &  
$P(|X-\mu| \geq k\sigma) \leq \frac{1}{k^2}$ \\
$M(t) \stackrel{def}{=} E(e^{Xt})$ & ~ & $M^{(k)}(0) = E(X^k)$ \\
$M_{aX}(t) = M_X(at)$ & ~ & $M_{\sum X_i}(t) = \prod_{i=1}^n M_{X_i}(t)$ if the $X_i$ are independent\\
%%%%%%%%%%%%%%%%%%%%%%%%%%%% Limits %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\multicolumn{3}{l} {Law of Large Numbers: For all $\epsilon>0$, $\lim_{n \rightarrow \infty}P\{|\overline{X}_n-\mu|\geq\epsilon\}=0$.} \\
\multicolumn{3}{l} {Central Limit Theorem: $Z_n = \frac{\sqrt{n}(\overline{X}_n-\mu)}{\sigma}$ converges in distribution to a standard normal.}
\end{tabular}
\renewcommand{\arraystretch}{1.0}

\begin{center}\begin{tabular}{l}
\hspace{6.5in} \\ \hline
\end{tabular}\end{center}
This formula sheet was prepared by  \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner},
Department of Mathematical and Computational Sciences, University of Toronto Mississauga. It is licensed under a 
\href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US}
     {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website:
\begin{center}
\href{http://www.utstat.toronto.edu/~brunner/oldclass/256f18} {\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/256f18}}
\end{center}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\pagebreak %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{center}
\textbf{Glossary}
\end{center}   % \textbf{}: 
{ \small
\begin{itemize}
    \item[] \textbf{CDF}: Cumulative Distribution Function $F(x) = P(X \leq x)$
    \item[] \textbf{Central Limit Theorem (CLT)}: Sample mean is approximately normal for large samples; see formula sheet for details.
    \item[] \textbf{Conditional density}: The density of continuous $X$ given that continuous $Y$ equals $y$.  $f_{x|y}(x|y)$. Probability is area under this curve. To get probabilities and expected values, integrate.
    \item[] \textbf{Conditional probability mass function}: The PMF of discrete $X$ given that discrete $Y$ equals $y$.  $p_{x|y}(x|y) = P(X=x|Y=y)$.  To get probabilities and expected values, add. 
    \item[] \textbf{Continuous random variable}: $X$ assumes an uncountably infinte number of value. Probability is area under the curve $f(x)$. Integrate to find probabilities and expected values.
    \item[] \textbf{Convergence in Distribution}: A sequence of cumulative distribution functions converges to a target cumulative distribution function at all continuity points of the target. 
    \item[] \textbf{Convolution}: $X$ and $Y$ are independent, $Z=X+Y$. The convolution formulas are $p_z(z) = \sum_x p_x(x) p_y(z-x)$ and $f_z(z) = \int_{-\infty}^\infty  f_x(x)f_y(z-x)  \, dx$,
    \item[] \textbf{Density}: Probability density function $f(x)$. Probability of a continuous random variable is area under this curve. To get probabilities and expected values, integrate. 
    \item[] \textbf{Discrete random variable}: $X$ assumes a finite or countably infinite number of values. Add to find probabilities and expected values.
    \item[] \textbf{Disjoint}: Mutually exclusive, non-overlapping, $A \cap B = \emptyset$. 
    \item[] \textbf{Frequency function}: Same as a probability mass function $p(x)=P(X=x)$. Applies to discrete random variables. To get probabilities and expected values, add.
    \item[] \textbf{Joint density}: Applies to continuous random variables. $f_{xy}(x,y)$. Probability is volume under this surface. To get probabilities or expected values, integrate. 
    \item[] \textbf{Joint probability mass function}: Applies to discrete random variables. $p_{xy}(x,y) = P(X=x,Y=y)$. To get probabilities or expected values, add.
    \item[] \textbf{Joint frequency function}: Same as joint probability mass function.
    \item[] \textbf{Marginal density}: The density of one of the continuous random variables along the edge (margin), integrating over the other one. $f_x(x) = \int_{-\infty}^\infty f_{xy}(x,y) \, dy$.
    \item[] \textbf{Marginal probability mass function}: The PMF of one of the discrete random variables along the edge (margin), adding over the other one. $p_x(x) = \sum_y p_{xy}(x,y)$ 
    \item[] \textbf{Moment-generating function}: $M(t) = E(e^{Xt})$. Corresponds uniquely to the probability distribution function of $X$. To get $E(X^k)$, differentiate $k$ times and set $t=0$.
    \item[] \textbf{MGF}: Moment-Generating Function. See above.
    \item[] \textbf{PDF}: Probability density function $f(x)$. Probability is area under this curve. To get probabilities and expected values, integrate.
    \item[] \textbf{PMF}: Probability mass function $p(x)=P(X=x)$, same as a frequency function. Applies to discrete random variables. To get probabilities and expected values, add.
    \item[] \textbf{CBC}: Canadian Broadcasting Corporation
%    \item[] \textbf{}: 
\end{itemize}

\noindent
The table on the reverse side is from \emph{Mathematical statistics and data analysis} by Rice.
} % End size
\pagebreak

%\begin{center}
\includegraphics[width=6in]{Normal-Table}
%\end{center}


% 
% CBC: Canadian Broadcasting Corporation



\end{document}




%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

& ~~~~~ &
\\


%\multicolumn{3}{l} {$P(B_j|A) = \frac{P(A|B_j)P(B_j)}{\sum_{k=1}^n P(A|B_k)P(B_k)}$} 
% $P(B_j|A) = \frac{P(A|B_j)P(B_j)}{\sum_{k=1}^n P(A|B_k)P(B_k)}$ & & 
% \multicolumn{3}{l} {$P(B_j|A) = P(A|B_j)P(B_j) / \sum_{k=1}^n P(A|B_k)P(B_k)$}