R : Copyright 2004, The R Foundation for Statistical Computing
Version 1.9.1  (2004-06-21), ISBN 3-900051-00-3

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for a HTML browser interface to help.
Type 'q()' to quit R.

[Previously saved workspace restored]

> rm(list=ls())
> # some subsetting features
> a <- 1:6
> b <- c("hi", "lo")
> ab <- c(a,b)
> is.character(ab)
[1] TRUE
> as.numeric(ab)
[1]  1  2  3  4  5  6 NA NA
Warning message: 
NAs introduced by coercion 
> nab <- .Last.value
> nab[is.na(nab)] <- 0
> nab
[1] 1 2 3 4 5 6 0 0
> library(MASS)
> data(hills)
> hills[1:3,]
             dist climb   time
Greenmantle   2.5   650 16.083
Carnethy      6.0  2500 48.350
Craig Dunain  6.0   900 33.650
> dim(hills)
[1] 35  3
> hills[-4:-35,]
             dist climb   time
Greenmantle   2.5   650 16.083
Carnethy      6.0  2500 48.350
Craig Dunain  6.0   900 33.650
> hills[,-2:-3]
 [1]  2.5  6.0  6.0  7.5  8.0  8.0 16.0  6.0  5.0  6.0 28.0  5.0  9.5  6.0  4.5 10.0 14.0  3.0  4.5  5.5  3.0  3.5
[23]  6.0  2.0  3.0  4.0  6.0  5.0  6.5  5.0 10.0  6.0 18.0  4.5 20.0
> is.matrix(.Last.value)
[1] FALSE
> is.matrix(hills[,-2:-3,drop=FALSE])
[1] FALSE
> is.matrix(hills[,-2:-3,,drop=FALSE])
[1] FALSE
> ?drop
> mymat<-matrix(a,nr=2,byrow=T)
> mymat
     [,1] [,2] [,3]
[1,]    1    2    3
[2,]    4    5    6
> mymat[2,]
[1] 4 5 6
> is.matrix(mymat[2,])
[1] FALSE
> mymat[-1,]
[1] 4 5 6
> is.matrix(mymat[-1,])
[1] FALSE
> is.matrix(mymat[-1,drop=FALSE])
[1] FALSE
> ## when one row is dropped the result is a 1x3 matrix which is converted
> ## to a vector no matter what I try
> ## following p.29 of the book more closely:
> ?array
> myarr <- array(c(a,a*a),dim=c(2,3,2))
> myarr
, , 1

     [,1] [,2] [,3]
[1,]    1    3    5
[2,]    2    4    6

, , 2

     [,1] [,2] [,3]
[1,]    1    9   25
[2,]    4   16   36

> myarr[,,1]
     [,1] [,2] [,3]
[1,]    1    3    5
[2,]    2    4    6
> is.matrix(.Last.value)
[1] TRUE
> myarr[,,1,drop=FALSE]
, , 1

     [,1] [,2] [,3]
[1,]    1    3    5
[2,]    2    4    6

> is.matrix(.Last.value)
[1] FALSE
> is.array(myarr[,,1,drop=FALSE])
[1] TRUE
> ## so it works for arrays of dimension > 2, but not for matrices!
> ## linear regression and the X matrix
> hills.lm <- lm(time~dist+climb, data=hills)
> ## another way to do this is lm(hills$time ~ hills$dist + hills$climb)
> ## another way to do this is lm(formula=time~dist+climb, data=hills)
> summary(hills.lm)

Call:
lm(formula = time ~ dist + climb, data = hills)

Residuals:
    Min      1Q  Median      3Q     Max 
-16.215  -7.129  -1.186   2.371  65.121 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -8.992039   4.302734  -2.090   0.0447 *  
dist         6.217956   0.601148  10.343 9.86e-12 ***
climb        0.011048   0.002051   5.387 6.45e-06 ***
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 

Residual standard error: 14.68 on 32 degrees of freedom
Multiple R-Squared: 0.9191,	Adjusted R-squared: 0.914 
F-statistic: 181.7 on 2 and 32 DF,  p-value: < 2.2e-16 

> X <- model.matrix(hills.lm)
> ## X is what you would expect, a 35 x 3 matrix with 1s in the first column
> X[1:4,]
             (Intercept) dist climb
Greenmantle            1  2.5   650
Carnethy               1  6.0  2500
Craig Dunain           1  6.0   900
Ben Rha                1  7.5   800
> ## lm has tons of other output, as well as many methods
> 
> ## analysis of variance
> ycat <- c(rnorm(10)+1,rnorm(10)+2,rnorm(10)+3) # a vector of length 30 in 3 groups 
> alphacat <- factor(rep(1,10),rep(2,10),rep(3,10)) # a factor to index the groups
> lmcat <- lm(ycat ~ alphacat)
Error in model.frame(formula, rownames, variables, varnames, extras, extranames,  : 
	variable lengths differ
> length(ycat)
[1] 30
> length(alphacat)
[1] 10
> alphacat <- factor(c(rep(1,10),rep(2,10),rep(3,10))) # a factor to index the groups
> lmcat <- lm(ycat ~ alphacat)
> summary(lmcat)

Call:
lm(formula = ycat ~ alphacat)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.3820 -0.6015 -0.1436  0.4603  1.9113 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   1.2790     0.2933   4.360 0.000170 ***
alphacat2     0.2549     0.4148   0.614 0.544099    
alphacat3     2.2007     0.4148   5.305 1.34e-05 ***
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 

Residual standard error: 0.9276 on 27 degrees of freedom
Multiple R-Squared: 0.5551,	Adjusted R-squared: 0.5221 
F-statistic: 16.84 on 2 and 27 DF,  p-value: 1.787e-05 

> ## note that we have estimates of mu, alpha2 and alpha3
> mean(ycat)
[1] 2.097566
> ## and that muhat is not ybar
> model.matrix(lmcat)
   (Intercept) alphacat2 alphacat3
1            1         0         0
2            1         0         0
3            1         0         0
4            1         0         0
5            1         0         0
6            1         0         0
7            1         0         0
8            1         0         0
9            1         0         0
10           1         0         0
11           1         1         0
12           1         1         0
13           1         1         0
14           1         1         0
15           1         1         0
16           1         1         0
17           1         1         0
18           1         1         0
19           1         1         0
20           1         1         0
21           1         0         1
22           1         0         1
23           1         0         1
24           1         0         1
25           1         0         1
26           1         0         1
27           1         0         1
28           1         0         1
29           1         0         1
30           1         0         1
attr(,"assign")
[1] 0 1 1
attr(,"contrasts")
attr(,"contrasts")$alphacat
[1] "contr.treatment"

> ## now I will force the summation constraint
> options(contr=c("contr.sum","contr.poly"))
> lmcat2 <- lm(ycat~alphacat)
> summary(lmcat2)

Call:
lm(formula = ycat ~ alphacat)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.3820 -0.6015 -0.1436  0.4603  1.9113 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   1.2790     0.2933   4.360 0.000170 ***
alphacat2     0.2549     0.4148   0.614 0.544099    
alphacat3     2.2007     0.4148   5.305 1.34e-05 ***
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 

Residual standard error: 0.9276 on 27 degrees of freedom
Multiple R-Squared: 0.5551,	Adjusted R-squared: 0.5221 
F-statistic: 16.84 on 2 and 27 DF,  p-value: 1.787e-05 

> options(contrasts=c("contr.sum","contr.poly"))
> lmcat2 <- lm(ycat~alphacat)
> summary(lmcat2)

Call:
lm(formula = ycat ~ alphacat)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.3820 -0.6015 -0.1436  0.4603  1.9113 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   2.0976     0.1694  12.386  1.2e-12 ***
alphacat1    -0.8185     0.2395  -3.418  0.00202 ** 
alphacat2    -0.5637     0.2395  -2.353  0.02613 *  
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 

Residual standard error: 0.9276 on 27 degrees of freedom
Multiple R-Squared: 0.5551,	Adjusted R-squared: 0.5221 
F-statistic: 16.84 on 2 and 27 DF,  p-value: 1.787e-05 

> ## Now we have estimates of mu, alpha1 and alpha2 (alpha-hat3 is fixed by the constraint)
> mean(y)
Error in mean(y) : Object "y" not found
> mean(ycat)
[1] 2.097566
> tapply(ycat,alphacat,mean)
       1        2        3 
1.279040 1.533907 3.479750 
> .Last.value - mean(ycat)
         1          2          3 
-0.8185260 -0.5636583  1.3821843 
> save.image(file="/Users/dfraser/Nancy/teaching/410/jan13/jan13.rda")
> q()
>