lines <-
"AGE,SEX,STATUS
23,MALE,DIED
40,FEMALE,SURVIVED
40,MALE,SURVIVED
30,MALE,DIED
28,MALE,DIED
40,MALE,DIED
45,FEMALE,DIED
62,MALE,DIED
65,MALE,DIED
45,FEMALE,DIED
25,FEMALE,DIED
28,MALE,SURVIVED
28,MALE,DIED
23,MALE,DIED
22,FEMALE,SURVIVED
23,FEMALE,SURVIVED
28,MALE,SURVIVED
15,FEMALE,SURVIVED
47,FEMALE,DIED
57,MALE,DIED
20,FEMALE,SURVIVED
18,MALE,SURVIVED
25,MALE,DIED
60,MALE,DIED
25,MALE,SURVIVED
20,MALE,SURVIVED
32,MALE,SURVIVED
32,FEMALE,SURVIVED
24,FEMALE,SURVIVED
30,MALE,SURVIVED
15,MALE,DIED
50,FEMALE,DIED
21,FEMALE,SURVIVED
25,MALE,DIED
46,MALE,SURVIVED
32,FEMALE,SURVIVED
30,MALE,DIED
25,MALE,DIED
25,MALE,DIED
25,MALE,DIED
30,MALE,DIED
35,MALE,DIED
23,MALE,SURVIVED
24,MALE,DIED
25,FEMALE,SURVIVED"
con <- textConnection(lines)
donner <- read.csv(con, sep=",")
require(Sleuth3)
krunnit <- case2101
Make sure you understand what this function does and why. It’s not as important to understand the sapply
stuff below.
ctable_line <- function(cutoff, probs, y){
correct <- mean( (probs >= cutoff) == y)
false_pos <- sum((y==0) * (probs > cutoff))/sum(y==0)
false_neg <- sum((y==1) * (probs < cutoff))/sum(y==1)
return(data.frame(cutoff=cutoff, correct=correct, false_pos=false_pos, false_neg=false_neg))
}
#sample call:
#donner$STATUS<- relevel(donner$STATUS, "DIED")
#fit <- glm(STATUS~SEX+AGE, family=binomial, data=donner)
#ctable_line(cutoff=.6, probs=fit$fitted.values, y=donner$STATUS=="SURVIVED")
Let’s now use this function to obtain the correction classification rate, false negative rate, and false positive rate for the Donner data:
donner$STATUS<- relevel(donner$STATUS, "DIED")
fit <- glm(STATUS~SEX+AGE, family=binomial, data=donner)
cutoffs <- seq(0, 1, .02)
t(sapply(cutoffs, ctable_line, probs= fit$fitted.values, y=donner$STATUS=="SURVIVED"))
## cutoff correct false_pos false_neg
## [1,] 0 0.4444444 1 0
## [2,] 0.02 0.4444444 1 0
## [3,] 0.04 0.4888889 0.92 0
## [4,] 0.06 0.5333333 0.84 0
## [5,] 0.08 0.5333333 0.84 0
## [6,] 0.1 0.5333333 0.84 0
## [7,] 0.12 0.5333333 0.84 0
## [8,] 0.14 0.5111111 0.84 0.05
## [9,] 0.16 0.5111111 0.84 0.05
## [10,] 0.18 0.5111111 0.84 0.05
## [11,] 0.2 0.5111111 0.8 0.1
## [12,] 0.22 0.5111111 0.8 0.1
## [13,] 0.24 0.5111111 0.8 0.1
## [14,] 0.26 0.5333333 0.76 0.1
## [15,] 0.28 0.5333333 0.76 0.1
## [16,] 0.3 0.5111111 0.76 0.15
## [17,] 0.32 0.5111111 0.76 0.15
## [18,] 0.34 0.5777778 0.6 0.2
## [19,] 0.36 0.5777778 0.6 0.2
## [20,] 0.38 0.5777778 0.52 0.3
## [21,] 0.4 0.6 0.48 0.3
## [22,] 0.42 0.6 0.48 0.3
## [23,] 0.44 0.7555556 0.16 0.35
## [24,] 0.46 0.7777778 0.08 0.4
## [25,] 0.48 0.7777778 0.08 0.4
## [26,] 0.5 0.7777778 0.08 0.4
## [27,] 0.52 0.7555556 0.08 0.45
## [28,] 0.54 0.7333333 0.08 0.5
## [29,] 0.56 0.7111111 0.08 0.55
## [30,] 0.58 0.7111111 0.08 0.55
## [31,] 0.6 0.7111111 0.08 0.55
## [32,] 0.62 0.7333333 0.04 0.55
## [33,] 0.64 0.7333333 0.04 0.55
## [34,] 0.66 0.7333333 0.04 0.55
## [35,] 0.68 0.6888889 0.04 0.65
## [36,] 0.7 0.6888889 0.04 0.65
## [37,] 0.72 0.6888889 0.04 0.65
## [38,] 0.74 0.6888889 0.04 0.65
## [39,] 0.76 0.6888889 0.04 0.65
## [40,] 0.78 0.6888889 0.04 0.65
## [41,] 0.8 0.6666667 0 0.75
## [42,] 0.82 0.6222222 0 0.85
## [43,] 0.84 0.6 0 0.9
## [44,] 0.86 0.5777778 0 0.95
## [45,] 0.88 0.5777778 0 0.95
## [46,] 0.9 0.5555556 0 1
## [47,] 0.92 0.5555556 0 1
## [48,] 0.94 0.5555556 0 1
## [49,] 0.96 0.5555556 0 1
## [50,] 0.98 0.5555556 0 1
## [51,] 1 0.5555556 0 1
ctable_line_binomial <- function(cutoff, probs, successes, fails){
correct <- sum(successes*(probs > cutoff) + fails*(probs<=cutoff))/(sum(successes)+sum(fails))
false_pos <- sum(fails * (probs > cutoff))/sum(fails)
false_neg <- sum(successes * (probs <= cutoff))/sum(successes)
return(data.frame(cutoff=cutoff, correct=correct, false_pos=false_pos, false_neg=false_neg))
}
#sample call:
fit <- glm(cbind(Extinct, AtRisk-Extinct)~Area, family=binomial, data=krunnit)
ctable_line_binomial(cutoff=.6, probs=fit$fitted.values, successes=krunnit$Extinct, fails=(krunnit$AtRisk-krunnit$Extinct))
## cutoff correct false_pos false_neg
## 1 0.6 0.8291139 0 1
fit <- glm(cbind(Extinct, AtRisk-Extinct)~Area, family=binomial, data=krunnit)
cutoffs <- seq(0, 1, .02)
t(sapply(cutoffs, ctable_line_binomial, probs= fit$fitted.values, successes=krunnit$Extinct, fails=(krunnit$AtRisk-krunnit$Extinct)))
## cutoff correct false_pos false_neg
## [1,] 0 0.1708861 1 0
## [2,] 0.02 0.1708861 1 0
## [3,] 0.04 0.2737342 0.8664122 0.0462963
## [4,] 0.06 0.2737342 0.8664122 0.0462963
## [5,] 0.08 0.2737342 0.8664122 0.0462963
## [6,] 0.1 0.3702532 0.7442748 0.07407407
## [7,] 0.12 0.3702532 0.7442748 0.07407407
## [8,] 0.14 0.3702532 0.7442748 0.07407407
## [9,] 0.16 0.3702532 0.7442748 0.07407407
## [10,] 0.18 0.443038 0.6374046 0.1666667
## [11,] 0.2 0.5047468 0.5515267 0.2222222
## [12,] 0.22 0.8291139 0 1
## [13,] 0.24 0.8291139 0 1
## [14,] 0.26 0.8291139 0 1
## [15,] 0.28 0.8291139 0 1
## [16,] 0.3 0.8291139 0 1
## [17,] 0.32 0.8291139 0 1
## [18,] 0.34 0.8291139 0 1
## [19,] 0.36 0.8291139 0 1
## [20,] 0.38 0.8291139 0 1
## [21,] 0.4 0.8291139 0 1
## [22,] 0.42 0.8291139 0 1
## [23,] 0.44 0.8291139 0 1
## [24,] 0.46 0.8291139 0 1
## [25,] 0.48 0.8291139 0 1
## [26,] 0.5 0.8291139 0 1
## [27,] 0.52 0.8291139 0 1
## [28,] 0.54 0.8291139 0 1
## [29,] 0.56 0.8291139 0 1
## [30,] 0.58 0.8291139 0 1
## [31,] 0.6 0.8291139 0 1
## [32,] 0.62 0.8291139 0 1
## [33,] 0.64 0.8291139 0 1
## [34,] 0.66 0.8291139 0 1
## [35,] 0.68 0.8291139 0 1
## [36,] 0.7 0.8291139 0 1
## [37,] 0.72 0.8291139 0 1
## [38,] 0.74 0.8291139 0 1
## [39,] 0.76 0.8291139 0 1
## [40,] 0.78 0.8291139 0 1
## [41,] 0.8 0.8291139 0 1
## [42,] 0.82 0.8291139 0 1
## [43,] 0.84 0.8291139 0 1
## [44,] 0.86 0.8291139 0 1
## [45,] 0.88 0.8291139 0 1
## [46,] 0.9 0.8291139 0 1
## [47,] 0.92 0.8291139 0 1
## [48,] 0.94 0.8291139 0 1
## [49,] 0.96 0.8291139 0 1
## [50,] 0.98 0.8291139 0 1
## [51,] 1 0.8291139 0 1