--- title: "Unemployment Time Series" output: html_document --- ```{r setup, echo=FALSE, message=F} knitr::opts_chunk$set(echo = TRUE) setwd("c:/STA303/lectures/lec11/") library(ggplot2) library(arm) ``` ## Displaying the data ```{r} unemployment <- read.table ("unemployment.dat", header=TRUE) ggplot(unemployment, aes(year, unemployed.pct)) + geom_line() + ylab("unemployed (%)") ``` Let's try to fit a linear model: ```{r} unemployment$year1975 <- unemployment$year-1975 fit <- lm(unemployed.pct~year1975, data=unemployment) summary(fit) ``` There is a pretty small upward trend. Let's plot the residuals. ```{r} qplot(unemployment$year, fit$residuals) ``` It's pretty clear that the residuals are *not* independent -- large residuals tend to occur together. ## Fitting the AR(1) model Let's fit the AR(1) model: ```{r} y <- unemployment$unemployed.pct n <- length(y) y.lag <- c(NA, y[1:(n-1)]) lm.lag <- lm(y~y.lag) ``` Do the residuals look normal? ```{r} qplot(unemployment$year[2:length(y)], lm.lag$residuals) ``` Kind of, and it seems like we got rid of most of the "parabola" pattern in the previous plot. One way to see if the model is appropriate is "fake-data simulation": simulate several datasets from the model using the fitted parameters. ```{r} b.hat <- coef (lm.lag) # vector of 2 regression coefs s.hat <- sigma.hat (lm.lag) # residual sd simulate_series <- function(){ n_years <- 58 y1 <- 3.9 y.rep <- rep(0, n_years) y.rep[1] <- y1 for (t in 2:n_years){ prediction <- c (1, y.rep[t-1]) %*% b.hat y.rep[t] <- rnorm (1, prediction, s.hat) } ggdata <- data.frame(year=seq(1947, 2004), unemployment.pct=y.rep) return(y.rep) } n.sims <- 5 y.rep <- replicate(n.sims, simulate_series()) ``` Let's create a bunch of plots of simulations: ```{r} ggdata <- data.frame(year=seq(1947, 2004), unemployed.pct=y.rep[,1]) ggplot(ggdata, aes(year, unemployed.pct)) + geom_line() + ylab("unemployed (%)") ggdata <- data.frame(year=seq(1947, 2004), unemployed.pct=y.rep[,2]) ggplot(ggdata, aes(year, unemployed.pct)) + geom_line() + ylab("unemployed (%)") ggdata <- data.frame(year=seq(1947, 2004), unemployed.pct=y.rep[,3]) ggplot(ggdata, aes(year, unemployed.pct)) + geom_line() + ylab("unemployed (%)") ggdata <- data.frame(year=seq(1947, 2004), unemployed.pct=y.rep[,4]) ggplot(ggdata, aes(year, unemployed.pct)) + geom_line() + ylab("unemployed (%)") ```