knitr::opts_chunk$set(echo = TRUE)
require(Sleuth3)
require(tigerstats)

The Donner Party

The Donner Party was a large group of people who were travelling to California in a wagon train. The Donner Party was trapped in the Sierra Nevada mountain range for more than three mothns because of heavy snowfall. Their food supplies were exhausted, and of the 87 members of the party, only 48 survived. .

Getting the data:

lines <- 
"AGE,SEX,STATUS
  23,MALE,DIED
  40,FEMALE,SURVIVED
  40,MALE,SURVIVED
  30,MALE,DIED
  28,MALE,DIED
  40,MALE,DIED
  45,FEMALE,DIED
  62,MALE,DIED
  65,MALE,DIED
  45,FEMALE,DIED
  25,FEMALE,DIED
  28,MALE,SURVIVED
  28,MALE,DIED
  23,MALE,DIED
  22,FEMALE,SURVIVED
  23,FEMALE,SURVIVED
  28,MALE,SURVIVED
  15,FEMALE,SURVIVED
  47,FEMALE,DIED
  57,MALE,DIED
  20,FEMALE,SURVIVED
  18,MALE,SURVIVED
  25,MALE,DIED
  60,MALE,DIED
  25,MALE,SURVIVED
  20,MALE,SURVIVED
  32,MALE,SURVIVED
  32,FEMALE,SURVIVED
  24,FEMALE,SURVIVED
  30,MALE,SURVIVED
  15,MALE,DIED
  50,FEMALE,DIED
  21,FEMALE,SURVIVED
  25,MALE,DIED
  46,MALE,SURVIVED
  32,FEMALE,SURVIVED
  30,MALE,DIED
  25,MALE,DIED
  25,MALE,DIED
  25,MALE,DIED
  30,MALE,DIED
  35,MALE,DIED
  23,MALE,SURVIVED
  24,MALE,DIED
  25,FEMALE,SURVIVED"

con <- textConnection(lines)
donner <- read.csv(con, sep=",")

Let’s run Logistic Regression:

fit <- glm(STATUS~SEX + AGE, family=binomial(link="logit"), data=donner)
summary(fit)

Here are is what you need for the practice problem:

donner$SEX <- relevel(donner$SEX, ref = "MALE")
donner$SEX_eff <- ifelse(donner$SEX == "FEMALE", 1, -1)

fit <- glm(STATUS ~ SEX + AGE, data= donner, family= "binomial")
summary(fit)
## 
## Call:
## glm(formula = STATUS ~ SEX + AGE, family = "binomial", data = donner)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7445  -1.0441  -0.3029   0.8877   2.0472  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  1.63312    1.11018   1.471   0.1413  
## SEXFEMALE    1.59729    0.75547   2.114   0.0345 *
## AGE         -0.07820    0.03728  -2.097   0.0359 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 61.827  on 44  degrees of freedom
## Residual deviance: 51.256  on 42  degrees of freedom
## AIC: 57.256
## 
## Number of Fisher Scoring iterations: 4
fit <- glm(STATUS ~ SEX_eff + AGE, data= donner, family= "binomial")
summary(fit)
## 
## Call:
## glm(formula = STATUS ~ SEX_eff + AGE, family = "binomial", data = donner)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7445  -1.0441  -0.3029   0.8877   2.0472  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  2.43177    1.19802   2.030   0.0424 *
## SEX_eff      0.79865    0.37773   2.114   0.0345 *
## AGE         -0.07820    0.03728  -2.097   0.0359 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 61.827  on 44  degrees of freedom
## Residual deviance: 51.256  on 42  degrees of freedom
## AIC: 57.256
## 
## Number of Fisher Scoring iterations: 4
fit1 <- glm(STATUS ~ SEX * AGE, data= donner, family= "binomial")
summary(fit1)
## 
## Call:
## glm(formula = STATUS ~ SEX * AGE, family = "binomial", data = donner)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2279  -0.9388  -0.5550   0.7794   1.6998  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)  
## (Intercept)    0.31834    1.13103   0.281   0.7784  
## SEXFEMALE      6.92805    3.39887   2.038   0.0415 *
## AGE           -0.03248    0.03527  -0.921   0.3571  
## SEXFEMALE:AGE -0.16160    0.09426  -1.714   0.0865 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 61.827  on 44  degrees of freedom
## Residual deviance: 47.346  on 41  degrees of freedom
## AIC: 55.346
## 
## Number of Fisher Scoring iterations: 5
anova(fit1, test= "LRT")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: STATUS
## 
## Terms added sequentially (first to last)
## 
## 
##         Df Deviance Resid. Df Resid. Dev Pr(>Chi)  
## NULL                       44     61.827           
## SEX      1   4.5403        43     57.286  0.03311 *
## AGE      1   6.0300        42     51.256  0.01406 *
## SEX:AGE  1   3.9099        41     47.346  0.04800 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit2 <- glm(STATUS ~ SEX*AGE + I(AGE^2), data= donner, family= "binomial")
summary(fit2)
## 
## Call:
## glm(formula = STATUS ~ SEX * AGE + I(AGE^2), family = "binomial", 
##     data = donner)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2317  -0.9748  -0.3138   0.6874   1.6492  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)  
## (Intercept)   -4.047403   3.951640  -1.024   0.3057  
## SEXFEMALE      7.594162   3.403983   2.231   0.0257 *
## AGE            0.226944   0.228099   0.995   0.3198  
## I(AGE^2)      -0.003398   0.003047  -1.115   0.2648  
## SEXFEMALE:AGE -0.187732   0.098863  -1.899   0.0576 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 61.827  on 44  degrees of freedom
## Residual deviance: 45.830  on 40  degrees of freedom
## AIC: 55.83
## 
## Number of Fisher Scoring iterations: 5
anova(fit2)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: STATUS
## 
## Terms added sequentially (first to last)
## 
## 
##          Df Deviance Resid. Df Resid. Dev
## NULL                        44     61.827
## SEX       1   4.5403        43     57.286
## AGE       1   6.0300        42     51.256
## I(AGE^2)  1   0.8658        41     50.390
## SEX:AGE   1   4.5603        40     45.830
fit3 <- glm(STATUS ~ SEX*AGE + SEX*I(AGE^2), data= donner, family= "binomial")
summary(fit3)
## 
## Call:
## glm(formula = STATUS ~ SEX * AGE + SEX * I(AGE^2), family = "binomial", 
##     data = donner)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3396  -0.9757  -0.3438   0.5269   1.5901  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)
## (Intercept)        -3.318484   3.940184  -0.842    0.400
## SEXFEMALE           0.265286  10.455222   0.025    0.980
## AGE                 0.183031   0.226632   0.808    0.419
## I(AGE^2)           -0.002803   0.002985  -0.939    0.348
## SEXFEMALE:AGE       0.299877   0.696050   0.431    0.667
## SEXFEMALE:I(AGE^2) -0.007356   0.010689  -0.688    0.491
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 61.827  on 44  degrees of freedom
## Residual deviance: 45.361  on 39  degrees of freedom
## AIC: 57.361
## 
## Number of Fisher Scoring iterations: 5
anova(fit3)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: STATUS
## 
## Terms added sequentially (first to last)
## 
## 
##              Df Deviance Resid. Df Resid. Dev
## NULL                            44     61.827
## SEX           1   4.5403        43     57.286
## AGE           1   6.0300        42     51.256
## I(AGE^2)      1   0.8658        41     50.390
## SEX:AGE       1   4.5603        40     45.830
## SEX:I(AGE^2)  1   0.4689        39     45.361