titanic <- read.csv("titanic.csv")
titanic <- titanic[!is.na(titanic$pclass),]

We can run the logistic regression by treating class as an integer:

glm(survived~pclass, data=titanic, family=binomial)
## 
## Call:  glm(formula = survived ~ pclass, family = binomial, data = titanic)
## 
## Coefficients:
## (Intercept)       pclass  
##       1.268       -0.779  
## 
## Degrees of Freedom: 1308 Total (i.e. Null);  1307 Residual
## Null Deviance:       1741 
## Residual Deviance: 1613  AIC: 1617

Or we can treat class as a factor:

glm(survived~factor(pclass), data=titanic, family=binomial)
## 
## Call:  glm(formula = survived ~ factor(pclass), family = binomial, data = titanic)
## 
## Coefficients:
##     (Intercept)  factor(pclass)2  factor(pclass)3  
##          0.4861          -0.7696          -1.5567  
## 
## Degrees of Freedom: 1308 Total (i.e. Null);  1306 Residual
## Null Deviance:       1741 
## Residual Deviance: 1613  AIC: 1619

Let’s now convert pclass to character and see what happens:

titanic$pclass = as.character(titanic$pclass)
titanic$pclass[1:10]
##  [1] "1" "1" "1" "1" "1" "1" "1" "1" "1" "1"

As you can see, the digits are now in quotes, indicating that R won’t treat them as integers anymore.

glm(survived~pclass, data=titanic, family=binomial)
## 
## Call:  glm(formula = survived ~ pclass, family = binomial, data = titanic)
## 
## Coefficients:
## (Intercept)      pclass2      pclass3  
##      0.4861      -0.7696      -1.5567  
## 
## Degrees of Freedom: 1308 Total (i.e. Null);  1306 Residual
## Null Deviance:       1741 
## Residual Deviance: 1613  AIC: 1619

This is the same as converting pclass to a factor.

Now, let’s switch from “1” to “first class”, “2” to “second class”, etc:

digits_to_words <- function(digit){
  return(switch(digit, "1"={"first class"}, "2" = {"second class"}, "3" = {"third class"}))
}

titanic$pclass_words <-as.character(sapply(titanic$pclass, digits_to_words))
glm(survived~pclass_words, data=titanic, family=binomial)
## 
## Call:  glm(formula = survived ~ pclass_words, family = binomial, data = titanic)
## 
## Coefficients:
##              (Intercept)  pclass_wordssecond class  
##                   0.4861                   -0.7696  
##  pclass_wordsthird class  
##                  -1.5567  
## 
## Degrees of Freedom: 1308 Total (i.e. Null);  1306 Residual
## Null Deviance:       1741 
## Residual Deviance: 1613  AIC: 1619

Same thing.

Finally, let’s switch back to integers

words_to_num <- function(word){
  return(switch(word, "first class"={1}, "second class"={2},  "third class"={3}))
}

titanic$class_ordinal <-as.numeric(sapply(titanic$pclass_words, words_to_num))
glm(survived~class_ordinal, data=titanic, family=binomial)
## 
## Call:  glm(formula = survived ~ class_ordinal, family = binomial, data = titanic)
## 
## Coefficients:
##   (Intercept)  class_ordinal  
##         1.268         -0.779  
## 
## Degrees of Freedom: 1308 Total (i.e. Null);  1307 Residual
## Null Deviance:       1741 
## Residual Deviance: 1613  AIC: 1617