% \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout to ignore pause statements. \hypersetup{colorlinks,linkcolor=,urlcolor=red} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Suppress navigation symbols % \usetheme{Berlin} % Displays sections on top \usetheme{Frankfurt} % Displays section titles on top: Fairly thin but still swallows some material at bottom of crowded slides %\usetheme{Berkeley} \usepackage[english]{babel} \usepackage{amsmath} % for binom \usepackage{amsfonts} % for \mathbb{R} The set of reals % \usepackage{graphicx} % To include pdf files! % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode \title{Discrete Random Variables\footnote{ This slide show is an open-source document. See last slide for copyright information.}} \subtitle{STA 256: Fall 2018} \date{} % To suppress date \begin{document} \begin{frame} \titlepage \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Random Variables} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Random Variable: The idea} \pause %\framesubtitle{} The idea of a random variable is a \emph{measurement} conducted on the elements of the sample space. \pause \begin{itemize} \item $\Omega$ could be the set of Canadian households, all equally likely to be sampled. \pause $X(\omega)$ is the number of people in household $\omega$. \pause \item Toss a coin with $P(\mbox{Head})=p$, three times. \pause $\Omega = \{HHH, HHT, HTH, HTT, THH, THT, TTH, TTT\}$. \pause $X(\omega)$ is the number of Heads for outcome $\omega$. \pause \item $X(\omega)$ could be one if $\omega$ is employed, and zero if $\omega$ is unemployed. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Formal Definition of a random variable} \pause %\framesubtitle{} {\Large A random variable is a function from $\Omega$ to the set of real numbers. \pause } % End size \vspace{5mm} \begin{itemize} \item This is consistent with the idea of measurement. \pause \item It takes an element $\omega$, and assigns a numerical value to it. \pause \item This is why we were writing $X(\omega)$. \pause \item Often, a random variable is denoted by $X$\pause, \item But it's really the function $X(\omega)$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Probability statements about a random variable} \pause %\framesubtitle{} The probability that $X(\omega)$ will take on various numerical values is \emph{determined} by the probability measure on the subsets of $\Omega$. \pause \vspace{4mm} {\LARGE \begin{displaymath} P(X=2) \pause = \pause P\{ \omega \in \Omega: X(\omega) = 2 \} \end{displaymath} \pause \begin{displaymath} P(X=x) = P\{ \omega \in \Omega: X(\omega) = x \} \end{displaymath} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Example} %\framesubtitle{} Toss a fair coin twice. \pause \begin{itemize} \item $P\{HH\} = P\{HT\} = P\{TH\} = P\{TT\} = \frac{1}{4}$. \pause \item Let $X$ equal the number of heads. \pause \item $P(X=0) = P\{TT\} = \frac{1}{4}$. \pause \item $P(X=1) = P\{HT, TH\} = \frac{1}{2}$. \pause \item $P(X=2) = P\{HH\} = \frac{1}{4}$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Probability Mass Function} \framesubtitle{Also called the \textbf{frequency function}} Suppose the random variable $X$ takes on the values $x_1, x_2, \ldots$ with non-zero probability. \pause The \emph{probability mass function} \pause (frequency function) \pause of $X$ is written \pause {\LARGE \begin{displaymath} p(x_i) = P(X=x_i) \end{displaymath} \pause } % End size \vspace{5mm} Note $\sum_i p(x_i) = 1$ \pause \vspace{2mm} For the 2 fair coins example, $p(0) = \frac{1}{4}$, $p(1)=\frac{1}{2}$ and $p(2)=\frac{1}{4}$.\pause $p(14)=0$. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Cumulative Distribution Function} \pause %\framesubtitle{} The \emph{cumulative distribution function} of a random variable $X$ is defined by \pause {\LARGE \begin{displaymath} F(x) = P(X \leq x) \end{displaymath} \pause } % End size \begin{itemize} \item Note that $X$ is the random variable, and $x$ is a particular numerical value. \pause \item You will frequently see things like $P(X=x)$. \pause There is a critical difference between capital $X$ and little $x$. \pause \item $F(x)$ is defined for all real $x$. \pause \item $F(x)$ is non-decreasing. \pause This is because \pause \item If $x_1 < x_2$, $\{\omega: X(\omega) \leq x_1 \} \subseteq \{\omega: X(\omega) \leq x_2 \}$. \pause \item $\lim_{x \rightarrow - \infty} F(x) = 0$ and $\lim_{x \rightarrow \infty} F(x) = 1$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Cumulative distribution function for the coin toss example} \framesubtitle{Fig. 2.2 on page 37 is incorrect. CDFs are right continuous.} \begin{center} \includegraphics[width=4in]{cdf} \end{center} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Common Discrete Distributions} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{The Bernoulli Distribution} \pause %\framesubtitle{} \begin{itemize} \item Simple probability model: Toss a coin with $P(\mbox{Head})=p$, one time. Let $X$ equal the number of heads. \pause \item Probability mass function (frequency function) of $X$: \pause \begin{displaymath} p(x) = \left\{ \begin{array}{ll} % ll means left left p^x(1-p)^{1-x} & \mbox{for $x = 0$ or 1} \\ 0 & \mbox{Otherwise} \end{array} \right. % Need that crazy invisible right period! \end{displaymath} \pause \item An \emph{indicator random variable} equals one if some event happens, and zero if it does not happen. \pause \begin{itemize} \item 1=Female, 0=Male \pause \item 1=Lived, 0=Died \pause \item 1=Passed, 0=Failed \pause \end{itemize} \item Indicators are usually assumed to have a Bernoulli distribution. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{The Binomial Distribution} \pause %\framesubtitle{} \begin{itemize} \item Simple probability model: Toss a coin with $P(\mbox{Head})=p$. \pause Toss it $n$ times. Let $X$ equal the number of heads. \pause \item Probability mass function (frequency function) of $X$: \pause \begin{displaymath} p(k) = P(X=k) \pause = \left\{ \begin{array}{ll} % ll means left left \binom{n}{k}p^k(1-p)^{n-k} & \mbox{for $k = 0, 1, \ldots, n$} \\ ~0 & \mbox{Otherwise} \end{array} \right. % Need that crazy invisible right period! \end{displaymath} \pause \item The Bernoulli is a special case of the Binomial, with $n=1$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Why does $p(k) = \binom{n}{k}p^k(1-p)^{n-k}$} \framesubtitle{For the Binomial Distribution?} \pause Toss a coin $n$ times with $P(\mbox{Head})=p$, and let $X$ equal the number of heads. Why does $P(X=k) = \binom{n}{k}p^k(1-p)^{n-k}$? \pause \begin{itemize} \item The sample space is the set of all strings of $n$ letters composed of H and T. \pause \item By the Multiplication Principle, there are $2^n$ elements. \pause % Think of a tree. \pause \item If two strings have $k$ heads (and $n-k$ tails), \pause they have the same probability. \pause \item For example, $P\{HHTH\} = P\{THHH\} \pause = p^3(1-p)$ \pause by independence. \pause \item Count the number of ways that $k$ positions out of $n$ can be chosen to have the symbol H. \pause \item $n$ choose $k$ is $\binom{n}{k} = \frac{n!}{k!(n-k)!}$. \pause \item So $P(X=k) = \binom{n}{k}p^k(1-p)^{n-k}$ ~ \pause $\blacksquare$ \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Geometric Distribution} \pause %\framesubtitle{} \begin{itemize} \item Simple probability model: Toss a coin with $P(\mbox{Head})=p$ until the first head appears, and then stop. Let $X$ equal the number of times the coin is tossed. \pause \item Probability mass function (frequency function) of $X$: \pause \begin{displaymath} p(k) = P(X=k) = \pause \left\{ \begin{array}{ll} % ll means left left (1-p)^{k-1}\,p & \mbox{for $k = 0, 1, \ldots $} \\ ~0 & \mbox{Otherwise} \end{array} \right. % Need that crazy invisible right period! \end{displaymath} \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Negative Binomial Distribution} \pause %\framesubtitle{} \begin{itemize} \item Simple probability model: Toss a coin with $P(\mbox{Head})=p$ until $r$ heads appear, and then stop. Let $X$ equal the number of times the coin is tossed. \pause \item Probability mass function (frequency function) of $X$: \pause \begin{displaymath} p(k) = P(X=k) = \left\{ \begin{array}{ll} % ll means left left \binom{k-1}{r-1} p^r \, (1-p)^{k-r} & \mbox{for $k = r, r+1, \ldots $} \\ ~~~0 & \mbox{Otherwise} \end{array} \right. % Need that crazy invisible right period! \end{displaymath} \pause \item In the first $k-1$ trials there are $r-1$ successes. \pause So the frequency function could be written \pause \begin{displaymath} p(k) = \binom{k-1}{r-1} p^{r-1} \, (1-p)^{(k-1)-(r-1)} \pause \, p \end{displaymath} \item The Geometric distribution is a special case of the negative binomial, with $r=1$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Hypergeometric Distribution} \pause %\framesubtitle{} \begin{itemize} \item Simple probability model: Jar with $n$ marbles, of which $r$ are black and $n-r$ are white. Randomly sample $m$ without replacement. Let $X$ denote the number of black balls in the sample. \pause \item Probability mass function (frequency function) of $X$: \pause \begin{displaymath} p(k) = P(X=k) = \left\{ \begin{array}{ll} % ll means left left \frac{ \binom{r}{k}\binom{n-r}{m-k} }{\binom{n}{m}} & \mbox{for $k = 0, \ldots r$} \\ ~~~0 & \mbox{Otherwise} \end{array} \right. % Need that crazy invisible right period! \end{displaymath} \pause \item This just summarizes what we have done earlier. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Poisson distribution} \pause %\framesubtitle{} Useful for count data. For example, \pause \begin{itemize} \item Number of rasins in a loaf of rasin bread. \pause \item Number of alpha particles emitted from a radioactive substance in a given time interval. \pause \item Number of calls per minute coming in to a customer service line. \pause \item Bomb craters in London during WWII. \pause \item Number of rat hairs in a jar of peanut butter. \pause \item Number of deaths per year from horse kicks in the Prussian army, 1878-1898. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Conditions for the Poisson distribution} \pause %\framesubtitle{} We are usually counting events that happen in an interval, or in a region of time or space (or both). \pause The following are rough translations for the technical conditions for the number of events to have a Poisson distribution. \pause \begin{itemize} \item Independent increments: \pause The occurrence of events in separate intervals (regions) are independent. \pause \item The probability of observing at least one event in an interval or region is roughly proportional to the size of the interval or region. \pause \item As the size of the region or interval approaches zero, the probability of more than one event in the region or interval goes to zero. \pause \end{itemize} If these conditions are approximately satisfied, the probability distribution of the number of events will be approximately Poisson. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Poisson Probability Mass Function with parameter $\lambda$} \framesubtitle{Frequency Function} \pause {\LARGE \begin{displaymath} p(k) = \left\{ \begin{array}{ll} % ll means left left \frac{e^{-\lambda}\, \lambda^k}{k!} & \mbox{for $k = 0, 1, \ldots $} \\ ~~~0 & \mbox{Otherwise} \end{array} \right. % Need that crazy invisible right period! \end{displaymath} \pause } % End size \vspace{5mm} Where the parameter $\lambda>0$. \pause \vspace{5mm} Note $\sum_{k=0}^\infty p(k) \pause = \sum_{k=0}^\infty \frac{e^{-\lambda}\, \lambda^k}{k!} \pause = e^{-\lambda}\,\sum_{k=0}^\infty \frac{ \lambda^k}{k!} \pause = e^{-\lambda} e^{\lambda} \pause = 1$. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{The big Three} %\framesubtitle{} The most useful discrete distributions in applications are \begin{itemize} \item Bernoulli \item Binomial \item Poisson \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistical Sciences, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \vspace{5mm} \href{http://www.utstat.toronto.edu/~brunner/oldclass/256f18} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/256f18}} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \end{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% $ = \{\omega \in \Omega: \}$ \begin{frame} \frametitle{} \pause %\framesubtitle{} \begin{itemize} \item \pause \item \pause \item \end{itemize} \end{frame}