---
title: "Unemployment Time Series"
output: html_document
---

```{r setup, echo=FALSE, message=F}
knitr::opts_chunk$set(echo = TRUE)
setwd("c:/STA303/lectures/lec11/")
library(ggplot2)
library(arm)
```

## Displaying the data
```{r}
unemployment <- read.table ("unemployment.dat", header=TRUE)
ggplot(unemployment, aes(year, unemployed.pct)) + geom_line() + ylab("unemployed (%)")
```


Let's try to fit a linear model:

```{r}
unemployment$year1975 <- unemployment$year-1975
fit <- lm(unemployed.pct~year1975, data=unemployment)
summary(fit)
```

There is a pretty small upward trend. Let's plot the residuals.

```{r}
qplot(unemployment$year, fit$residuals)
```

It's pretty clear that the residuals are *not* independent -- large residuals tend to occur together.

## Fitting the AR(1) model

Let's fit the AR(1) model:

```{r}
y <- unemployment$unemployed.pct
n <- length(y)
y.lag <- c(NA, y[1:(n-1)])
lm.lag <- lm(y~y.lag)
```

Do the residuals look normal?

```{r}
qplot(unemployment$year[2:length(y)], lm.lag$residuals)
```

Kind of, and it seems like we got rid of most of the "parabola" pattern in the previous plot.


One way to see if the model is appropriate is "fake-data simulation": simulate several datasets from the model using the fitted parameters.


```{r}
b.hat <- coef (lm.lag)        # vector of 2 regression coefs
s.hat <- sigma.hat (lm.lag)   # residual sd


simulate_series <- function(){
  n_years <- 58
  y1 <- 3.9
  y.rep <- rep(0, n_years)
  y.rep[1] <- y1
  for (t in 2:n_years){
    prediction <- c (1, y.rep[t-1]) %*% b.hat
    y.rep[t] <- rnorm (1, prediction, s.hat)
  }
  ggdata <- data.frame(year=seq(1947, 2004), unemployment.pct=y.rep)
  return(y.rep)
}

n.sims <- 5
y.rep <- replicate(n.sims, simulate_series())

```


Let's create a bunch of plots of simulations:

```{r}
ggdata <- data.frame(year=seq(1947, 2004), unemployed.pct=y.rep[,1])
ggplot(ggdata, aes(year, unemployed.pct)) + geom_line() + ylab("unemployed (%)")

ggdata <- data.frame(year=seq(1947, 2004), unemployed.pct=y.rep[,2])
ggplot(ggdata, aes(year, unemployed.pct)) + geom_line() + ylab("unemployed (%)")

ggdata <- data.frame(year=seq(1947, 2004), unemployed.pct=y.rep[,3])
ggplot(ggdata, aes(year, unemployed.pct)) + geom_line() + ylab("unemployed (%)")

ggdata <- data.frame(year=seq(1947, 2004), unemployed.pct=y.rep[,4])
ggplot(ggdata, aes(year, unemployed.pct)) + geom_line() + ylab("unemployed (%)")


```