titanic <- read.csv("titanic.csv")
titanic <- titanic[!is.na(titanic$pclass),]
We can run the logistic regression by treating class as an integer:
glm(survived~pclass, data=titanic, family=binomial)
##
## Call: glm(formula = survived ~ pclass, family = binomial, data = titanic)
##
## Coefficients:
## (Intercept) pclass
## 1.268 -0.779
##
## Degrees of Freedom: 1308 Total (i.e. Null); 1307 Residual
## Null Deviance: 1741
## Residual Deviance: 1613 AIC: 1617
Or we can treat class as a factor:
glm(survived~factor(pclass), data=titanic, family=binomial)
##
## Call: glm(formula = survived ~ factor(pclass), family = binomial, data = titanic)
##
## Coefficients:
## (Intercept) factor(pclass)2 factor(pclass)3
## 0.4861 -0.7696 -1.5567
##
## Degrees of Freedom: 1308 Total (i.e. Null); 1306 Residual
## Null Deviance: 1741
## Residual Deviance: 1613 AIC: 1619
Let’s now convert pclass
to character
and see what happens:
titanic$pclass = as.character(titanic$pclass)
titanic$pclass[1:10]
## [1] "1" "1" "1" "1" "1" "1" "1" "1" "1" "1"
As you can see, the digits are now in quotes, indicating that R won’t treat them as integers anymore.
glm(survived~pclass, data=titanic, family=binomial)
##
## Call: glm(formula = survived ~ pclass, family = binomial, data = titanic)
##
## Coefficients:
## (Intercept) pclass2 pclass3
## 0.4861 -0.7696 -1.5567
##
## Degrees of Freedom: 1308 Total (i.e. Null); 1306 Residual
## Null Deviance: 1741
## Residual Deviance: 1613 AIC: 1619
This is the same as converting pclass
to a factor.
Now, let’s switch from “1” to “first class”, “2” to “second class”, etc:
digits_to_words <- function(digit){
return(switch(digit, "1"={"first class"}, "2" = {"second class"}, "3" = {"third class"}))
}
titanic$pclass_words <-as.character(sapply(titanic$pclass, digits_to_words))
glm(survived~pclass_words, data=titanic, family=binomial)
##
## Call: glm(formula = survived ~ pclass_words, family = binomial, data = titanic)
##
## Coefficients:
## (Intercept) pclass_wordssecond class
## 0.4861 -0.7696
## pclass_wordsthird class
## -1.5567
##
## Degrees of Freedom: 1308 Total (i.e. Null); 1306 Residual
## Null Deviance: 1741
## Residual Deviance: 1613 AIC: 1619
Same thing.
Finally, let’s switch back to integers
words_to_num <- function(word){
return(switch(word, "first class"={1}, "second class"={2}, "third class"={3}))
}
titanic$class_ordinal <-as.numeric(sapply(titanic$pclass_words, words_to_num))
glm(survived~class_ordinal, data=titanic, family=binomial)
##
## Call: glm(formula = survived ~ class_ordinal, family = binomial, data = titanic)
##
## Coefficients:
## (Intercept) class_ordinal
## 1.268 -0.779
##
## Degrees of Freedom: 1308 Total (i.e. Null); 1307 Residual
## Null Deviance: 1741
## Residual Deviance: 1613 AIC: 1617