knitr::opts_chunk$set(echo = TRUE)
require(Sleuth3)
require(tigerstats)
The Donner Party was a large group of people who were travelling to California in a wagon train. The Donner Party was trapped in the Sierra Nevada mountain range for more than three mothns because of heavy snowfall. Their food supplies were exhausted, and of the 87 members of the party, only 48 survived. .
Getting the data:
lines <-
"AGE,SEX,STATUS
23,MALE,DIED
40,FEMALE,SURVIVED
40,MALE,SURVIVED
30,MALE,DIED
28,MALE,DIED
40,MALE,DIED
45,FEMALE,DIED
62,MALE,DIED
65,MALE,DIED
45,FEMALE,DIED
25,FEMALE,DIED
28,MALE,SURVIVED
28,MALE,DIED
23,MALE,DIED
22,FEMALE,SURVIVED
23,FEMALE,SURVIVED
28,MALE,SURVIVED
15,FEMALE,SURVIVED
47,FEMALE,DIED
57,MALE,DIED
20,FEMALE,SURVIVED
18,MALE,SURVIVED
25,MALE,DIED
60,MALE,DIED
25,MALE,SURVIVED
20,MALE,SURVIVED
32,MALE,SURVIVED
32,FEMALE,SURVIVED
24,FEMALE,SURVIVED
30,MALE,SURVIVED
15,MALE,DIED
50,FEMALE,DIED
21,FEMALE,SURVIVED
25,MALE,DIED
46,MALE,SURVIVED
32,FEMALE,SURVIVED
30,MALE,DIED
25,MALE,DIED
25,MALE,DIED
25,MALE,DIED
30,MALE,DIED
35,MALE,DIED
23,MALE,SURVIVED
24,MALE,DIED
25,FEMALE,SURVIVED"
con <- textConnection(lines)
donner <- read.csv(con, sep=",")
Let’s run Logistic Regression:
fit <- glm(STATUS~SEX + AGE, family=binomial(link="logit"), data=donner)
summary(fit)
Here are is what you need for the practice problem:
donner$SEX <- relevel(donner$SEX, ref = "MALE")
donner$SEX_eff <- ifelse(donner$SEX == "FEMALE", 1, -1)
fit <- glm(STATUS ~ SEX + AGE, data= donner, family= "binomial")
summary(fit)
##
## Call:
## glm(formula = STATUS ~ SEX + AGE, family = "binomial", data = donner)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7445 -1.0441 -0.3029 0.8877 2.0472
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.63312 1.11018 1.471 0.1413
## SEXFEMALE 1.59729 0.75547 2.114 0.0345 *
## AGE -0.07820 0.03728 -2.097 0.0359 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 61.827 on 44 degrees of freedom
## Residual deviance: 51.256 on 42 degrees of freedom
## AIC: 57.256
##
## Number of Fisher Scoring iterations: 4
fit <- glm(STATUS ~ SEX_eff + AGE, data= donner, family= "binomial")
summary(fit)
##
## Call:
## glm(formula = STATUS ~ SEX_eff + AGE, family = "binomial", data = donner)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7445 -1.0441 -0.3029 0.8877 2.0472
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.43177 1.19802 2.030 0.0424 *
## SEX_eff 0.79865 0.37773 2.114 0.0345 *
## AGE -0.07820 0.03728 -2.097 0.0359 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 61.827 on 44 degrees of freedom
## Residual deviance: 51.256 on 42 degrees of freedom
## AIC: 57.256
##
## Number of Fisher Scoring iterations: 4
fit1 <- glm(STATUS ~ SEX * AGE, data= donner, family= "binomial")
summary(fit1)
##
## Call:
## glm(formula = STATUS ~ SEX * AGE, family = "binomial", data = donner)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2279 -0.9388 -0.5550 0.7794 1.6998
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.31834 1.13103 0.281 0.7784
## SEXFEMALE 6.92805 3.39887 2.038 0.0415 *
## AGE -0.03248 0.03527 -0.921 0.3571
## SEXFEMALE:AGE -0.16160 0.09426 -1.714 0.0865 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 61.827 on 44 degrees of freedom
## Residual deviance: 47.346 on 41 degrees of freedom
## AIC: 55.346
##
## Number of Fisher Scoring iterations: 5
anova(fit1, test= "LRT")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: STATUS
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 44 61.827
## SEX 1 4.5403 43 57.286 0.03311 *
## AGE 1 6.0300 42 51.256 0.01406 *
## SEX:AGE 1 3.9099 41 47.346 0.04800 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit2 <- glm(STATUS ~ SEX*AGE + I(AGE^2), data= donner, family= "binomial")
summary(fit2)
##
## Call:
## glm(formula = STATUS ~ SEX * AGE + I(AGE^2), family = "binomial",
## data = donner)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2317 -0.9748 -0.3138 0.6874 1.6492
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.047403 3.951640 -1.024 0.3057
## SEXFEMALE 7.594162 3.403983 2.231 0.0257 *
## AGE 0.226944 0.228099 0.995 0.3198
## I(AGE^2) -0.003398 0.003047 -1.115 0.2648
## SEXFEMALE:AGE -0.187732 0.098863 -1.899 0.0576 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 61.827 on 44 degrees of freedom
## Residual deviance: 45.830 on 40 degrees of freedom
## AIC: 55.83
##
## Number of Fisher Scoring iterations: 5
anova(fit2)
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: STATUS
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev
## NULL 44 61.827
## SEX 1 4.5403 43 57.286
## AGE 1 6.0300 42 51.256
## I(AGE^2) 1 0.8658 41 50.390
## SEX:AGE 1 4.5603 40 45.830
fit3 <- glm(STATUS ~ SEX*AGE + SEX*I(AGE^2), data= donner, family= "binomial")
summary(fit3)
##
## Call:
## glm(formula = STATUS ~ SEX * AGE + SEX * I(AGE^2), family = "binomial",
## data = donner)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.3396 -0.9757 -0.3438 0.5269 1.5901
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.318484 3.940184 -0.842 0.400
## SEXFEMALE 0.265286 10.455222 0.025 0.980
## AGE 0.183031 0.226632 0.808 0.419
## I(AGE^2) -0.002803 0.002985 -0.939 0.348
## SEXFEMALE:AGE 0.299877 0.696050 0.431 0.667
## SEXFEMALE:I(AGE^2) -0.007356 0.010689 -0.688 0.491
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 61.827 on 44 degrees of freedom
## Residual deviance: 45.361 on 39 degrees of freedom
## AIC: 57.361
##
## Number of Fisher Scoring iterations: 5
anova(fit3)
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: STATUS
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev
## NULL 44 61.827
## SEX 1 4.5403 43 57.286
## AGE 1 6.0300 42 51.256
## I(AGE^2) 1 0.8658 41 50.390
## SEX:AGE 1 4.5603 40 45.830
## SEX:I(AGE^2) 1 0.4689 39 45.361