% \documentclass[serif]{beamer} % Serif for Computer Modern math font. \documentclass[serif, handout]{beamer} % Handout to ignore pause statements. \hypersetup{colorlinks,linkcolor=,urlcolor=red} \usefonttheme{serif} % Looks like Computer Modern for non-math text -- nice! \setbeamertemplate{navigation symbols}{} % Suppress navigation symbols % \usetheme{Berlin} % Displays sections on top \usetheme{Frankfurt} % Displays section titles on top: Fairly thin but still swallows some material at bottom of crowded slides %\usetheme{Berkeley} \usepackage[english]{babel} \usepackage{amsmath} % for binom \usepackage{amsfonts} % for \mathbb{R} The set of reals \usepackage{mathtools} % For vertical equal sign using \equalto{1+1}{2} \newcommand{\verteq}{\rotatebox{90}{$\,=$}} \newcommand{\equalto}[2]{\underset{\scriptstyle\overset{\mkern4mu\verteq}{#2}}{#1}} % \usepackage{graphicx} % To include pdf files! % \definecolor{links}{HTML}{2A1B81} % \definecolor{links}{red} \setbeamertemplate{footline}[frame number] \mode \title{Limit Theorems\footnote{ This slide show is an open-source document. See last slide for copyright information.}\\ Sections 4.2 and 4.4} \subtitle{STA 256: Fall 2019} \date{} % To suppress date \begin{document} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \titlepage \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Overview} \tableofcontents \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Law of Large Numbers} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Infinite Sequence of random variables} %\framesubtitle{} {\Large $T_1, T_2, \ldots$ } \pause \vspace{5mm} \begin{itemize} \item We are interested in what happens to $T_n$ as $n \rightarrow \infty$. \pause \item Why even think about this? \pause \item For fun. \pause \item And because $T_n$ could be a sequence of \emph{statistics}, numbers computed from sample data. \pause \item For example, $T_n = \overline{X}_n = \frac{1}{n}\sum_{i=1}^nX_i$. \pause \item $n$ is the sample size. \pause \item $n \rightarrow \infty$ is an approximation of what happens for large samples. \pause \item Good things should happen when estimates are based on more information. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Convergence} %\framesubtitle{} \begin{itemize} \item Convergence of $T_n$ as $n \rightarrow \infty$ is not an ordinary limit, because probability is involved. \pause \item There are several different types of convergence. \pause \item We will work with \emph{convergence in probability} and \emph{convergence in distribution}. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Convergence in Probability to a random variable} %\framesubtitle{} Definition: The sequence of random variables $X_1, X_2, \ldots$ is said to converge in probability to the random variable $Y$ if for all $\epsilon > 0$, $\displaystyle \lim_{n \rightarrow \infty}P\{|X_n-Y|\geq\epsilon\} = 0$, and we write $X_n \stackrel{p}{\rightarrow} Y$.\pause \begin{columns} \column{0.4\textwidth} {\small \begin{eqnarray*} |X_n-Y| < \epsilon \pause & \Leftrightarrow & -\epsilon < X_n-Y < \epsilon \\ \pause & \Leftrightarrow & Y-\epsilon < X_n < Y+\epsilon \\ \end{eqnarray*} \pause } % End size \vspace{30mm} ~ \column{0.6\textwidth} \includegraphics[width=2.75in]{strip} \end{columns} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Convergence in Probability to a constant} \framesubtitle{More immediate applications in statistics: We will focus on this.} \pause Definition: The sequence of random variables $T_1, T_2, \ldots$ is said to converge in probability to the constant $c$ if for all $\epsilon > 0$, {\LARGE \begin{displaymath} \lim_{n \rightarrow \infty}P\{|T_n-c|\geq\epsilon\} = 0 \end{displaymath} } % End size % Or equivalently, \pause % \begin{displaymath} % \lim_{n \rightarrow \infty}P\{|T_n-c|\leq\epsilon\} = 1 % \end{displaymath} \pause and we write $T_n \stackrel{p}{\rightarrow} c$. \pause \begin{eqnarray*} |T_n-c| < \epsilon \pause & \Leftrightarrow & -\epsilon < T_n-c < \epsilon \\ \pause & \Leftrightarrow & c-\epsilon < T_n < c+\epsilon \\ \pause \end{eqnarray*} \begin{picture}(10,10) % Line, direction (1,0), horizontal extent 200, starting point (50,0) \put(50,0){\line(1,0){200} } \put(150,5){\line(0,-1){10} } \put(148,-15){$c$} \put(100,-2){(} % Left parenthesis \put(200,-2){)} % Right parenthesis \put(90,-15){$c-\epsilon$} \put(190,-15){$c+\epsilon$} \end{picture} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Example: $T_n \sim U(-\frac{1}{n}, \frac{1}{n})$} \framesubtitle{Convergence in probability means $\lim_{n \rightarrow \infty}P\{|T_n-c|\geq\epsilon\} = 0$} \begin{picture}(10,10)(25,-25) % Line, direction (1,0), horizontal extent 200, starting point (50,0) \put(50,0){\line(1,0){200} } \put(150,5){\line(0,-1){10} } \put(148,-15){$c$} \put(100,-2){(} % Left parenthesis \put(200,-2){)} % Right parenthesis \put(90,-15){$c-\epsilon$} \put(190,-15){$c+\epsilon$} \end{picture} \pause \begin{itemize} \item $T_1$ is uniform on $(-1,1)$. \pause Height of the density is $\frac{1}{2}$. \pause \item $T_2$ is uniform on $(-\frac{1}{2},\frac{1}{2})$. \pause Height of the density is 1. \pause \item $T_3$ is uniform on $(-\frac{1}{3},\frac{1}{3})$. \pause Height of the density is $\frac{3}{2}$. \pause \item Eventually, $\frac{1}{n} < \epsilon$ \pause and $P\{|T_n-0|\geq\epsilon\} = 0$\pause, forever. \pause \item Eventually means for all $n>\frac{1}{\epsilon}$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Example: $X_1, \ldots, X_n$ are independent $U(0,\theta)$} \framesubtitle{Convergence in probability means $\lim_{n \rightarrow \infty}P\{|T_n-c|\geq\epsilon\} = 0$} \pause For $0 < x < \theta$, \pause \begin{itemize} \item[] $F_{_{X_i}}(x) = \int_0^x \frac{1}{\theta} \, dt \pause = \frac{x}{\theta}$. \pause \item[] $Y_n = \max_i (X_i)$. \pause \item[] $F_{_{Y_n}}(y) = \left(\frac{y}{\theta}\right)^n$ \pause \end{itemize} \vspace{2mm} \begin{picture}(10,10) % (25,-25) % Line, direction (1,0), horizontal extent 200, starting point (50,0) \put(50,0){\line(1,0){200} } \put(150,5){\line(0,-1){10} } \put(148,-15){$\theta$} \put(100,-2){(} % Left parenthesis \put(200,-2){)} % Right parenthesis \put(90,-15){$\theta-\epsilon$} \put(190,-15){$\theta+\epsilon$} \end{picture} \pause \vspace{5mm} \begin{eqnarray*} P\{|Y_n-\theta|\geq\epsilon\} & = & F_{_{Y_n}}(\theta-\epsilon) \\ \pause & = & \left(\frac{\theta-\epsilon}{\theta}\right)^n \\ \pause & \rightarrow & 0 \mbox{ ~~~because } \frac{\theta-\epsilon}{\theta}<1. \pause \end{eqnarray*} So the observed maximum data value goes in probability to $\theta$, the theoretical maximum data value. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Markov's inequality: Theorem 3.6.1} \framesubtitle{A stepping stone} \pause Let $Y$ be a random variable with $P(Y \geq 0)=1$. \pause Then for any $a>0$, $E(Y) \geq a \, P(Y \geq a)$. \pause \\ { \small \vspace{3mm} Proof (for continuous random variables): \pause \begin{eqnarray*} E(Y) & = & \int_0^\infty y f(y) \, dy \\ \pause & = & \int_0^a y f(y) \, dy + \int_a^\infty y f(y) \, dy \\ \pause & \geq & \int_a^\infty {\color{red}y} f(y) \, dy \\ \pause & \geq & \int_a^\infty {\color{red}a} f(y) \, dy \\ \pause & = & {\color{red}a} \int_a^\infty f(y) \, dy \\ \pause & = & a \, P(Y \geq a) ~~~~~ \blacksquare \end{eqnarray*} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{The Variance Rule} \framesubtitle{Not in the text, I believe} {\large Let $T_1, T_2, \ldots$ be a sequence of random variables, and let $c$ be a constant. If \begin{itemize} \item $\displaystyle \lim_{n \rightarrow \infty}E(X_n) = c$ and \item $\displaystyle \lim_{n \rightarrow \infty}Var(X_n) = 0$ \end{itemize} Then $T_n \stackrel{p}{\rightarrow} c$. } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Proof of the Variance Rule} \framesubtitle{Using Markov's inequality: $E(Y) \geq a \, P(Y \geq a)$} \pause {\small Seek to show $\forall \epsilon > 0$, $\displaystyle \lim_{n \rightarrow \infty}P\{|T_n-c|\geq\epsilon\} = 0$. \pause Denote $E(T_n)$ by $\mu_n$. \pause In Markov's inequality, let $Y=(T_n-c)^2$, and $a = \epsilon^2$. \pause \begin{eqnarray*} E[(T_n-c)^2] & \geq & \epsilon^2 P\{ (T_n-c)^2 \geq \epsilon^2 \} \\ \pause & = & \epsilon^2 P\{ |T_n-c| \geq \epsilon \}, \mbox{ so} \\ \pause %\end{eqnarray*} %\begin{eqnarray*} 0 & \leq & P\{ |T_n-c| \geq \epsilon \} \leq \frac{1}{\epsilon^2} E[(T_n-c)^2] \\ \pause & = & \frac{1}{\epsilon^2} E[(T_n-\mu_n + \mu_n - c)^2] \\ \pause & = & \frac{1}{\epsilon^2} E[(T_n-\mu_n)^2 +2(T_n-\mu_n)(\mu_n-c) + (\mu_n-c)^2] \\ \pause & = & \frac{1}{\epsilon^2} \left( E(T_n-\mu_n)^2 + 2(\mu_n-c) { \color{red}E(T_n-\mu_n) } + E(\mu_n-c)^2 \right) \\ \pause & = & \frac{1}{\epsilon^2} \left( E(T_n-\mu_n)^2 + 2(\mu_n-c) ({ \color{red}E(T_n)}-{\color{red}\mu_n}) + (\mu_n-c)^2 \right) \\ \pause & = & \frac{1}{\epsilon^2} \left( E(T_n-\mu_n)^2 + 0 + (\mu_n-c)^2 \right) \end{eqnarray*} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Continuing the proof} %\framesubtitle{} {\small Have \begin{eqnarray*} 0 &\leq& P\{ |T_n-c| \geq \epsilon \} \\ &\leq& \frac{1}{\epsilon^2} \left( E(T_n-\mu_n)^2 + (\mu_n-c)^2 \right) \\ \pause &=& \frac{1}{\epsilon^2} \left( Var(T_n) + (\mu_n-c)^2 \right) \pause \mbox{, so that} \\ \pause {\color{red} 0 } &{\color{red}\leq}& {\color{red} \lim_{n \rightarrow\infty} P\{ |T_n-c| \geq \epsilon \} } \\ &{\color{red}\leq}& \lim_{n \rightarrow\infty} \frac{1}{\epsilon^2} \left( Var(T_n) + (\mu_n-c)^2 \right) \\ \pause &=& \frac{1}{\epsilon^2} \left( \lim_{n \rightarrow\infty} Var(T_n) + \lim_{n \rightarrow\infty} (\mu_n-c)^2 \right) \\ \pause &=& \frac{1}{\epsilon^2} \left( \lim_{n \rightarrow\infty} Var(T_n) + \left( \lim_{n \rightarrow\infty}\mu_n - \lim_{n \rightarrow\infty}c \right)^2 \right) \\ \pause &=& \frac{1}{\epsilon^2} \left( 0 + (c-c)^2 \right) \pause = {\color{red} 0 } \pause \end{eqnarray*} Squeeze. \hspace{5mm} $\blacksquare$ } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{The Law of Large Numbers} \framesubtitle{That is, the ``Weak" Law of Large Numbers} \pause Theorem: Let $X_1, \ldots, X_n$ be independent random variables with expected value $\mu$ and variance $\sigma^2$. \pause Then the sample mean {\Large \begin{displaymath} \overline{X}_n = \frac{1}{n}\sum_{i=1}^nX_i \stackrel{p}{\rightarrow} \mu. \end{displaymath} \pause } % End size Proof: $E(\overline{X}_n) = \mu$ and $Var(\overline{X}_n) = \frac{\sigma^2}{n}$.\pause As $n \rightarrow \infty$, $E(\overline{X}_n) \rightarrow \mu$ and $Var(\overline{X}_n)\rightarrow 0$. \pause So by the Variance Rule, $\overline{X}_n \stackrel{p}{\rightarrow} \mu$.\hspace{5mm} $\blacksquare$ \vspace{5mm} \pause The implications are huge. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Probability is long-run relative frequency} \framesubtitle{Sometimes offered as a \emph{definition} of probability!} \pause This follows from the Law of Large Numbers. \pause Repeat some process over and over a lot of times, and count how many times the event $A$ occurs. \pause Independently for $i=1, \ldots, n$, \begin{itemize} \item Let $X_i(s)=1$ if $s \in A$, and $X_i(s)=0$ if $s \notin A$. \pause \item So $X_i$ is an \emph{indicator} for the event $A$. \pause \item $X_i$ is Bernoulli, with $P(X_i=1) = \theta = P(A)$. \pause \item $E(X_i) = \sum_{x=0}^1 x \, p(x) \pause = 0\cdot(1-\theta) + 1\cdot \theta = \theta$. \pause \item $\overline{X}_n$ is the proportion of times the event occurs in $n$ independent trials. \pause \item The proportion of successes converges in probability to $P(A)$. % \pause % \item So while $\overline{X}_n$ is a random quantity with its own probability distribution, \pause % \item That distribution shrinks to fit in a tiny interval around $P(A)$, no matter how small the interval. \end{itemize} \vspace{3mm} \begin{picture}(10,10) % Line, direction (1,0), horizontal extent 200, starting point (50,0) \put(50,0){\line(1,0){200} } \put(150,5){\line(0,-1){10} } \put(148,-15){$\theta$} \put(100,-2){(} % Left parenthesis \put(200,-2){)} % Right parenthesis \put(90,-15){$\theta-\epsilon$} \put(190,-15){$\theta+\epsilon$} \end{picture} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{More comments} %\framesubtitle{} \begin{itemize} \item Law of Large Numbers is the basis of using \emph{simulation} to estimate probabilities. \pause \item Have things like $\frac{1}{n}\sum_{i=1}^nX_i^2 \stackrel{p}{\rightarrow} E(X^2)$ \pause \item In fact, $\frac{1}{n}\sum_{i=1}^ng(X_i) \stackrel{p}{\rightarrow} E[g(X)]$ \pause \item Convergence in probability also applies to \emph{vectors} of random variables, like $(X_n,Y_n) \stackrel{p}{\rightarrow} (c_1,c_2)$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Theorem} \framesubtitle{Continuous Mapping Theorem for convergence in probability} \pause Let $g(x)$ be a function that is continuous at $x=c$. If $T_n \stackrel{p}{\rightarrow} c$, then $g(T_n) \stackrel{p}{\rightarrow} g(c)$. \pause \vspace{5mm} % Examples: \begin{itemize} \item A Geometric distribution has expected value $\frac{1-\theta}{\theta}$. \pause $g(\overline{X}_n) = 1/(1+\overline{X}_n)$ converges in probability to \pause \begin{eqnarray*} \frac{1}{1+E(X_i)} \pause & = & \frac{1}{1+\frac{1-\theta}{\theta}} \\ & = & \theta \end{eqnarray*} \pause \item A Uniform($0,\theta$) distribution has expected value $\theta/2$. So \\ \pause $2\overline{X}_n \stackrel{p}{\rightarrow} 2E(X_i) \pause = 2\frac{\theta}{2}=\theta$ \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Background } \pause \framesubtitle{For the proof of the continuous mapping theorem} \begin{itemize} \item $T_n \stackrel{p}{\rightarrow} c$ means that for all $\epsilon > 0$, \begin{eqnarray*} & & \lim_{n \rightarrow \infty}P\{|T_n-c|\geq\epsilon\} = 0 \\ & \Leftrightarrow & \lim_{n \rightarrow \infty}P\{|T_n-c|< \epsilon\} = 1 \end{eqnarray*} \vspace{7mm} \begin{picture}(10,10)(25,-25) % Line, direction (1,0), horizontal extent 200, starting point (50,0) \put(50,0){\line(1,0){200} } \put(150,5){\line(0,-1){10} } \put(148,-15){$c$} \put(100,-2){(} % Left parenthesis \put(200,-2){)} % Right parenthesis \put(90,-15){$c-\epsilon$} \put(190,-15){$c+\epsilon$} \end{picture} \pause % \vspace{5mm} \item $g(x)$ continuous at $c$ means that for all $\epsilon > 0$, there exists $\delta>0$ such that if $|x-c|<\delta$, then $|g(x)-g(c)| < \epsilon$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Proof of the Continuous Mapping Theorem} \framesubtitle{For convergence in probability} \begin{columns} \column{1.1\textwidth} % To use more margin Have $T_n \stackrel{p}{\rightarrow} c$ and $g(x)$ continuous at $c$. Seek to show that for all $\epsilon > 0$, \pause $ \lim_{n \rightarrow \infty}P\{|g(T_n)-g(c)|< \epsilon\} = 1$. \pause Let $\epsilon > 0$ be given. \pause $g(x)$ continuous at $c$ means there exists $\delta>0$ such that for $s\in S$, if $|X_n(s)-c|<\delta$, then $|g(X_n(s))-g(c)| < \epsilon$. \pause That is, \vspace{3mm} If $s_0 \in \{s: |X_n(s)-c|<\delta\}$, then $s_0 \in \{s: |g(X_n(s))-g(c)| < \epsilon\}$. \pause This is the definition of containment\pause: \begin{eqnarray*} && \{s: |X_n(s)-c|<\delta\} \subseteq \{s: |g(X_n(s))-g(c)| < \epsilon\} \\ \pause & \Rightarrow & P(|X_n-c|<\delta) \leq P(|g(X_n)-g(c)| < \epsilon) \pause \leq 1 \\ \pause & \Rightarrow & \lim_{n \rightarrow \infty} P(|X_n-c|<\delta) \leq \lim_{n \rightarrow \infty}P(|g(X_n)-g(c)| < \epsilon) \leq 1 \\ \pause && \hspace{20mm} \equalto{}{\mbox{1}} \end{eqnarray*} \hspace{10mm} Squeeze $\blacksquare$ \end{columns} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Central Limit Theorem} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Convergence in distribution} \framesubtitle{Another mode of convergence} \pause Definition: Let the random variables $X_1, X_2 \ldots$ have cumulative distribution functions $F_{_{X_1}}(x), F_{_{X_2}}(x) \ldots$\pause, and let the random variable $X$ have cumulative distribution function $F_{_X}(x)$. \pause The (sequence of) random variable(s) $X_n$ is said to \emph{converge in distribution} to $X$ if \pause {\LARGE \begin{displaymath} \lim_{n \rightarrow \infty}F_{_{X_n}}(x) = F_{_X}(x) \end{displaymath} \pause \vspace{4mm} } % End size at every point where $F_{_X}(x)$ is continuous\pause, and we write $X_n \stackrel{d}{\rightarrow} X$. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Example: Convergence to a Bernoulli with $p=\frac{1}{2}$} \framesubtitle{$\lim_{n \rightarrow \infty}F_{_{X_n}}(x) = F_{_X}(x)$ at all continuity points of $F_{_X}(x)$} \pause \begin{displaymath} p_{_{X_n}}(x) = \left\{ \begin{array}{cl} % ll means left left 1/2 & \mbox{for } x=\frac{1}{n} \\ 1/2 & \mbox{for } x=1+\frac{1}{n} \\ 0 & \mbox{Otherwise} \end{array} \right. \end{displaymath} \vspace{3mm} \pause \begin{picture}(10,10)(0,-10) \put(15,-2){$n=1$} \put(50,0){\line(1,0){200} } \put(150,5){\line(0,-1){10} } \put(100,5){\line(0,-1){10} } \put(200,5){\line(0,-1){10} } \put(98,-15){0} \put(148,-15){1} \put(198,-15){2} \put(197.5,-2){$\bullet$} \put(147.5,-2){$\bullet$} \end{picture} \pause \begin{picture}(10,10)(0,10) \put(15,-2){$n=2$} \put(50,0){\line(1,0){200} } \put(150,5){\line(0,-1){10} } \put(100,5){\line(0,-1){10} } \put(200,5){\line(0,-1){10} } \put(98,-15){0} \put(148,-15){1} \put(198,-15){2} \put(172.5,-2){$\bullet$} \put(122.5,-2){$\bullet$} \end{picture} \pause \begin{picture}(10,10)(0,30) \put(15,-2){$n=3$} \put(50,0){\line(1,0){200} } \put(150,5){\line(0,-1){10} } \put(100,5){\line(0,-1){10} } \put(200,5){\line(0,-1){10} } \put(98,-15){0} \put(148,-15){1} \put(198,-15){2} \put(164.7,-2){$\bullet$} \put(114.7,-2){$\bullet$} \end{picture} \pause \vspace{15mm} \begin{itemize} \item For $x<0$, $\lim_{n \rightarrow \infty}F_{_{X_n}}(x)=$ \pause $0$ \pause \item For $01$, $\lim_{n \rightarrow \infty}F_{_{X_n}}(x)=$ \pause $1$ \pause \item What happens at $x=0$ and $x=1$ does not matter. \end{itemize} \end{frame} % A picture of the cdf would be really good. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Convergence to a constant} \pause %\framesubtitle{} {\small Consider a ``degenerate" random variable $X$ with $P(X=c)=1$. \pause \vspace{2mm} \begin{picture}(10,10) % (25,-25) % Line, direction (1,0), horizontal extent 200, starting point (50,0) \put(50,0){\line(1,0){200} } \put(150,5){\line(0,-1){10} } \put(148,-15){$c$} \put(100,-2){(} % Left parenthesis \put(200,-2){)} % Right parenthesis \put(90,-15){$c-\epsilon$} \put(190,-15){$c+\epsilon$} \end{picture} \pause \vspace{5mm} Suppose $X_n$ converges in probability to $c$. \pause \begin{itemize} \item Then for any $x>c$, $F_{_{X_n}}(x) \rightarrow 1$ for $\epsilon$ small enough. \pause \item And for any $xc$ and $F_{_{X_n}}(x) \rightarrow 0$ for all $x0$ be given. % If necessary make it smaller. \pause \begin{eqnarray*} P\{|X_n-c|<\epsilon\} & = & P\{ c-\epsilon < X_n < c+\epsilon \} \\ \pause & = & F_{_{X_n}}(c+\epsilon)-F_{_{X_n}}(c-\epsilon) \pause \mbox{ so} \\ \pause \lim_{n \rightarrow \infty}P\{|X_n-c|<\epsilon\} & = & \lim_{n \rightarrow \infty}F_{_{X_n}}(c+\epsilon) - \lim_{n \rightarrow \infty}F_{_{X_n}}(c-\epsilon) \\ \pause & = & 1-0 = 1 \end{eqnarray*} \pause And $X_n$ converges in probability to $c$. } % End size of whole slide. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Comment} %\framesubtitle{} \begin{itemize} \item Convergence in probability might seem redundant, because it's just convergence in distribution to a constant. \pause \item But that's only true when the convergence is to a constant. \pause \item Convergence in probability to a non-degenerate random variable \pause implies convergence in distribution. \pause \item But convergence in distribution does not imply convergence in probability when the convergence is to a non-degenerate variable. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Big Theorem about convergence in distribution} \framesubtitle{Theorem 4.4.2 in the text} \pause Let the random variables $X_1, X_2 \ldots$ have cumulative distribution functions $F_{_{X_1}}(x), F_{_{X_2}}(x) \ldots$ and moment-generating functions $M_{_{X_1}}(t), M_{_{X_2}}(t) \ldots$. \pause Let the random variable $X$ have cumulative distribution function $F_{_X}(x)$ and moment-generating function $M_{_X}(t)$. \pause If \begin{displaymath} \lim_{n \rightarrow \infty} M_{_{X_n}}(t) = M_{_X}(t) \end{displaymath} for all $t$ in an open interval containing $t=0$, \pause then $X_n$ converges in distribution to $X$. \pause \vspace{5mm} The idea is that convergence of moment-generating functions implies convergence of distribution functions. This makes sense because moment-generating functions and distribution functions are one-to-one. \end{frame} % _{_{X_1}} _{_{X_n}} _{_X} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Example: Poisson approximation to the binomial} \framesubtitle{We did this before with probability mass functions and it was a challenge.} \pause Let $X_n$ be a binomial ($n,p_n$) random variable with $p_n=\frac{\lambda}{n}$, so that $n \rightarrow \infty$ and $p \rightarrow 0$ in such a way that the value of $n \, p_n=\lambda$ remains fixed. Find the limiting distribution of $X_n$. \pause \vspace{1mm} Recalling that the MGF of a Poisson is $e^{\lambda(e^t-1)}$ and $\left(1 + \frac{x}{n}\right)^n \rightarrow e^x$, \pause \begin{eqnarray*} M_{_{X_n}}(t) & = & (\theta e^t+1-\theta )^n \\ \pause & = & \left(\frac{\lambda}{n}e^t+1-\frac{\lambda}{n} \right)^n \\ \pause & = & \left(1+\frac{\lambda(e^t-1)}{n} \right)^n \\ \pause & \rightarrow & e^{\lambda(e^t-1)} \\ \pause \end{eqnarray*} MGF of Poisson($\lambda$). \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{The Central Limit Theorem} \framesubtitle{Proved using limiting moment-generating functions} \pause Let $X_1, \ldots, X_n$ be independent random variables from a distribution with expected value $\mu$ and variance $\sigma^2$. \pause Then \begin{displaymath} Z_n = \frac{\sqrt{n}(\overline{X}_n-\mu)}{\sigma} \stackrel{d}{\rightarrow} Z \sim N(0,1) \end{displaymath} \pause In practice, $Z_n$ is often treated as standard normal for $n>25$\pause, although the $n$ required for an accurate approximation really depends on the distribution. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Sometimes we say the distribution of the sample mean is approximately normal, or ``asymptotically" normal.} \pause %\framesubtitle{} \begin{itemize} \item This is justified by the Central Limit Theorem. \pause \item But it does \emph{not} mean that $\overline{X}_n$ converges in distribution to a normal random variable. \pause \item The Law of Large Numbers says that $\overline{X}_n$ converges in probability to a constant, $\mu$. \pause \item So $\overline{X}_n$ converges to $\mu$ in distribution as well. \pause \item That is, $\overline{X}_n$ converges in distribution to a degenerate random variable with all its probability at $\mu$. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Why would we say that for large $n$, the sample mean is approximately $N(\mu,\frac{\sigma^2}{n})$?} \pause \vspace{5mm} Have $Z_n = \frac{\sqrt{n}(\overline{X}_n-\mu)}{\sigma}$ \pause converging to $ Z \sim N(0,1)$. \pause {\footnotesize \begin{eqnarray*} Pr\{\overline{X}_n \leq x\} \pause & = & Pr\left\{ \frac{\sqrt{n}(\overline{X}_n-\mu)}{\sigma} \leq \frac{\sqrt{n}(x-\mu)}{\sigma}\right\} \\ \pause & = & Pr\left\{ Z_n \leq \frac{\sqrt{n}(x-\mu)}{\sigma}\right\} \pause \approx \Phi\left( \frac{\sqrt{n}(x-\mu)}{\sigma} \right) \end{eqnarray*} } \pause Suppose $Y$ is \emph{exactly} $N(\mu,\frac{\sigma^2}{n})$: \pause {\footnotesize \begin{eqnarray*} Pr\{Y \leq x\} \pause & = & Pr\left\{ \frac{\sqrt{n}(Y-\mu)}{\sigma} \leq \frac{x-\mu}{\sigma/\sqrt{n}}\right\} \\ \pause & = & Pr\left\{ Z_n \leq \frac{\sqrt{n}(x-\mu)}{\sigma}\right\} \pause = \Phi\left( \frac{\sqrt{n}(x-\mu)}{\sigma} \right) \end{eqnarray*} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{Copyright Information} This slide show was prepared by \href{http://www.utstat.toronto.edu/~brunner}{Jerry Brunner}, Department of Statistical Sciences, University of Toronto. It is licensed under a \href{http://creativecommons.org/licenses/by-sa/3.0/deed.en_US} {Creative Commons Attribution - ShareAlike 3.0 Unported License}. Use any part of it as you like and share the result freely. The \LaTeX~source code is available from the course website: \vspace{5mm} \href{http://www.utstat.toronto.edu/~brunner/oldclass/256f19} {\small\texttt{http://www.utstat.toronto.edu/$^\sim$brunner/oldclass/256f19}} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \end{document} and $Var(Y) = \sigma^2$ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{} \pause %\framesubtitle{} \begin{itemize} \item \pause \item \pause \item \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{center} \includegraphics[width=2in]{BivariateNormal} \end{center} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{} %\framesubtitle{} {\LARGE \begin{eqnarray*} m_1 & = & a + b \\ m_2 & = & c + d \end{eqnarray*} } % End size \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame} \frametitle{} \pause %\framesubtitle{} \begin{itemize} \item \pause \item \pause \item \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # R code for the convergence in probability plot rm(list=ls()) epsilon = 1 Xn = seq(from=-10,to=10,by=0.1) Y = Xn y1 = Xn+epsilon y2 = Xn-epsilon plot(Xn,Y,pch=' ',axes=F,xlab='',ylab='') lines(Xn,Y,lty=2) lines(Xn,y1,lty=1); lines(Xn,y2,lty=1) # Draw my own axes xx = c(-9,9); yy = c(0,0); lines(xx,yy,lty=1) xx = c(0,0); yy = c(-9,9); lines(xx,yy,lty=1) text(10,0,'x'); text(0,10,'y') text(-0.3,epsilon,expression(paste(epsilon))) text(-0.5,-epsilon,expression(paste(-epsilon))) # R code for plots of normal MGFs tt = seq(from=-1,to=1,by=0.05) mu = 0; sigsq = 1 zero = exp(mu*tt + 0.5*sigsq*tt^2) mu = 1; one = exp(mu*tt + 0.5*sigsq*tt^2) mu = -1; minusone = exp(mu*tt + 0.5*sigsq*tt^2) x = c(tt,tt,tt); y = c(zero,one,minusone) plot(x,y,pch=' ',xlab='t',ylab = 'M(t)') lines(tt,zero,lty=1) lines(tt,one,lty=2) lines(tt,minusone,lty=3) title("Fingerprints of the normal distribution") # Legend x1 <- c(-0.4,0) ; y1 <- c(4,4) ; lines(x1,y1,lty=1) text(0.25,4,expression(paste(mu," = 0, ",sigma^2," = 1"))) x2 <- c(-0.4,0) ; y2 <- c(3.75,3.75) ; lines(x2,y2,lty=2) text(0.25,3.75,expression(paste(mu," = 1, ",sigma^2," = 1"))) x3 <- c(-0.4,0) ; y3 <- c(3.5,3.5) ; lines(x3,y3,lty=3) text(0.25,3.5,expression(paste(mu," = -1, ",sigma^2," = 1"))) # R code for plots of chi-squared MGFs tt = seq(from=-0.25,to=0.25,by=0.005) nu = 1; one = (1-2*tt)^(-nu/2) nu = 2; two = (1-2*tt)^(-nu/2) nu = 3; three = (1-2*tt)^(-nu/2) x = c(tt,tt,tt); y = c(one,two,three) plot(x,y,pch=' ',xlab='t',ylab = 'M(t)') lines(tt,one,lty=1) lines(tt,two,lty=2) lines(tt,three,lty=3) title("Fingerprints of the chi-squared distribution") # Legend x1 <- c(-0.2,-0.1) ; y1 <- c(2.5,2.5) ; lines(x1,y1,lty=1) text(-0.05,2.5,expression(paste(nu," = 1"))) x2 <- c(-0.2,-0.1) ; y2 <- c(2.3,2.3) ; lines(x2,y2,lty=2) text(-0.05,2.3,expression(paste(nu," = 2"))) x3 <- c(-0.2,-0.1) ; y3 <- c(2.1,2.1) ; lines(x3,y3,lty=3) text(-0.05,2.1,expression(paste(nu," = 3"))) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % I cut this out because it hurt the continuity of LLN. \begin{frame} \frametitle{Calculation of $Var(\overline{X}_n)$} \framesubtitle{An aside} \begin{eqnarray*} Var\left( \frac{1}{n}\sum_{i=1}^nX_i \right) \pause & = & \frac{1}{n^2} Var\left(\sum_{i=1}^nX_i \right) \\ \pause & \stackrel{ind}{=} & \frac{1}{n^2} \sum_{i=1}^nVar(X_i) \\ \pause & = & \frac{1}{n^2} \sum_{i=1}^n \sigma^2 \pause = \frac{1}{n^2} n\sigma^2 \pause = \frac{\sigma^2}{n} \end{eqnarray*} \end{frame}