Read in the data
require(Sleuth3)
krunnit <- case2101
Let’s plot the logits of the proportions vs. the logits of the probability of going extinct.
require(gtools)
require(ggplot2)
krunnit$pi_hat <- krunnit$Extinct/krunnit$AtRisk
qplot(Area, logit(pi_hat), data=krunnit)
Doesn’t look great. What about the log(Area) vs. the logits?
qplot(log(Area), logit(pi_hat), data=krunnit)
Better!
Let’s fit the model
fit <- glm(cbind(Extinct, AtRisk-Extinct)~log(Area), family=binomial(), data=krunnit)
summary(fit)
##
## Call:
## glm(formula = cbind(Extinct, AtRisk - Extinct) ~ log(Area), family = binomial(),
## data = krunnit)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.71726 -0.67722 0.09726 0.48365 1.49545
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.19620 0.11845 -10.099 < 2e-16 ***
## log(Area) -0.29710 0.05485 -5.416 6.08e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 45.338 on 17 degrees of freedom
## Residual deviance: 12.062 on 16 degrees of freedom
## AIC: 75.394
##
## Number of Fisher Scoring iterations: 4
Note that we can interpret this as plain old logistic regression:
data <- data.frame(Island=c(), Area=c(), Died=c())
for(i in 1:dim(krunnit)[1]){
new_data <- data.frame(Island=rep(krunnit[i, "Island"], krunnit[i, "AtRisk"]),
Area= rep(krunnit[i, "Area"], krunnit[i, "AtRisk"]),
Died=c(rep(1, krunnit[i, "Extinct"]), rep(0, krunnit[i, "AtRisk"]-krunnit[i, "Extinct"])))
data <- rbind(data, new_data)
}
fit_logistic <- glm(Died~log(Area), family=binomial(), data=data)
summary(fit_logistic)
##
## Call:
## glm(formula = Died ~ log(Area), family = binomial(), data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.0105 -0.6778 -0.5450 -0.3523 2.3709
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.19620 0.11845 -10.099 < 2e-16 ***
## log(Area) -0.29710 0.05485 -5.416 6.08e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 578.01 on 631 degrees of freedom
## Residual deviance: 544.74 on 630 degrees of freedom
## AIC: 548.74
##
## Number of Fisher Scoring iterations: 5
(Run the R code to view the new data we created.)
If you double the area, how to the odds change?
new_dat <- data.frame(Island=c("small", "large"), Area=c(100, 200))
predict(fit, new_dat)
## 1 2
## -2.564408 -2.770345
plogis(predict(fit, new_dat))
## 1 2
## 0.07146445 0.05894787
0.059/0.071
## [1] 0.8309859
2^-0.3
## [1] 0.8122524
We can also get the “CI” for the prediction:
2^data.frame(confint(fit))["log(Area)",]
## Waiting for profiling to be done...
## X2.5.. X97.5..
## log(Area) 0.7537959 0.8752257
pi <- sum(krunnit$Extinct)/sum(krunnit$AtRisk)
pi
## [1] 0.1708861
fit0 <- glm(cbind(Extinct, AtRisk-Extinct)~1, family=binomial(), data=krunnit)
summary(fit0)
##
## Call:
## glm(formula = cbind(Extinct, AtRisk - Extinct) ~ 1, family = binomial(),
## data = krunnit)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.1761 -0.9375 0.2539 1.3658 2.6467
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.5794 0.1057 -14.95 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 45.338 on 17 degrees of freedom
## Residual deviance: 45.338 on 17 degrees of freedom
## AIC: 106.67
##
## Number of Fisher Scoring iterations: 4
plogis(-1.5794) #probability of extinction, regardless of islad
## [1] 0.1708805
fit_saturated <- glm(cbind(Extinct, AtRisk-Extinct)~Island, family=binomial(), data=krunnit)
summary(fit)
##
## Call:
## glm(formula = cbind(Extinct, AtRisk - Extinct) ~ log(Area), family = binomial(),
## data = krunnit)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.71726 -0.67722 0.09726 0.48365 1.49545
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.19620 0.11845 -10.099 < 2e-16 ***
## log(Area) -0.29710 0.05485 -5.416 6.08e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 45.338 on 17 degrees of freedom
## Residual deviance: 12.062 on 16 degrees of freedom
## AIC: 75.394
##
## Number of Fisher Scoring iterations: 4
summary(fit_saturated)
##
## Call:
## glm(formula = cbind(Extinct, AtRisk - Extinct) ~ Island, family = binomial(),
## data = krunnit)
##
## Deviance Residuals:
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.12026 0.61101 -3.470 0.00052 ***
## IslandIsonkivenletto 0.10536 0.74981 0.141 0.88825
## IslandKraasukka 0.73397 0.82815 0.886 0.37547
## IslandLansiletto 0.64436 0.72588 0.888 0.37471
## IslandLuusiletto 1.33181 0.81501 1.634 0.10224
## IslandMaakrunni -0.94001 0.84988 -1.106 0.26870
## IslandPihlajakari -0.11333 0.86162 -0.132 0.89536
## IslandPohjanletto -0.07696 0.96379 -0.080 0.93635
## IslandRaiska 1.10866 0.73742 1.503 0.13273
## IslandRistikarenletto 2.12026 1.01980 2.079 0.03761 *
## IslandRistikari 0.39750 0.70085 0.567 0.57060
## IslandTasasenletto 0.65393 0.76057 0.860 0.38991
## IslandTiirakari 1.38938 0.69806 1.990 0.04655 *
## IslandToro 1.22645 0.72794 1.685 0.09202 .
## IslandTyni 0.59421 0.78537 0.757 0.44929
## IslandUlkokrunni -0.51879 0.76656 -0.677 0.49855
## IslandVatunginletto 1.98673 0.80074 2.481 0.01310 *
## IslandVatunginnokka 0.98083 0.73371 1.337 0.18129
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4.5338e+01 on 17 degrees of freedom
## Residual deviance: 3.6415e-14 on 0 degrees of freedom
## AIC: 95.332
##
## Number of Fisher Scoring iterations: 4
anova(fit, fit_saturated, test="LRT")
## Analysis of Deviance Table
##
## Model 1: cbind(Extinct, AtRisk - Extinct) ~ log(Area)
## Model 2: cbind(Extinct, AtRisk - Extinct) ~ Island
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 16 12.062
## 2 0 0.000 16 12.062 0.7397