R : Copyright 2004, The R Foundation for Statistical Computing Version 1.9.1 (2004-06-21), ISBN 3-900051-00-3 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for a HTML browser interface to help. Type 'q()' to quit R. [Previously saved workspace restored] > rm(list=ls()) > # some subsetting features > a <- 1:6 > b <- c("hi", "lo") > ab <- c(a,b) > is.character(ab) [1] TRUE > as.numeric(ab) [1] 1 2 3 4 5 6 NA NA Warning message: NAs introduced by coercion > nab <- .Last.value > nab[is.na(nab)] <- 0 > nab [1] 1 2 3 4 5 6 0 0 > library(MASS) > data(hills) > hills[1:3,] dist climb time Greenmantle 2.5 650 16.083 Carnethy 6.0 2500 48.350 Craig Dunain 6.0 900 33.650 > dim(hills) [1] 35 3 > hills[-4:-35,] dist climb time Greenmantle 2.5 650 16.083 Carnethy 6.0 2500 48.350 Craig Dunain 6.0 900 33.650 > hills[,-2:-3] [1] 2.5 6.0 6.0 7.5 8.0 8.0 16.0 6.0 5.0 6.0 28.0 5.0 9.5 6.0 4.5 10.0 14.0 3.0 4.5 5.5 3.0 3.5 [23] 6.0 2.0 3.0 4.0 6.0 5.0 6.5 5.0 10.0 6.0 18.0 4.5 20.0 > is.matrix(.Last.value) [1] FALSE > is.matrix(hills[,-2:-3,drop=FALSE]) [1] FALSE > is.matrix(hills[,-2:-3,,drop=FALSE]) [1] FALSE > ?drop > mymat<-matrix(a,nr=2,byrow=T) > mymat [,1] [,2] [,3] [1,] 1 2 3 [2,] 4 5 6 > mymat[2,] [1] 4 5 6 > is.matrix(mymat[2,]) [1] FALSE > mymat[-1,] [1] 4 5 6 > is.matrix(mymat[-1,]) [1] FALSE > is.matrix(mymat[-1,drop=FALSE]) [1] FALSE > ## when one row is dropped the result is a 1x3 matrix which is converted > ## to a vector no matter what I try > ## following p.29 of the book more closely: > ?array > myarr <- array(c(a,a*a),dim=c(2,3,2)) > myarr , , 1 [,1] [,2] [,3] [1,] 1 3 5 [2,] 2 4 6 , , 2 [,1] [,2] [,3] [1,] 1 9 25 [2,] 4 16 36 > myarr[,,1] [,1] [,2] [,3] [1,] 1 3 5 [2,] 2 4 6 > is.matrix(.Last.value) [1] TRUE > myarr[,,1,drop=FALSE] , , 1 [,1] [,2] [,3] [1,] 1 3 5 [2,] 2 4 6 > is.matrix(.Last.value) [1] FALSE > is.array(myarr[,,1,drop=FALSE]) [1] TRUE > ## so it works for arrays of dimension > 2, but not for matrices! > ## linear regression and the X matrix > hills.lm <- lm(time~dist+climb, data=hills) > ## another way to do this is lm(hills$time ~ hills$dist + hills$climb) > ## another way to do this is lm(formula=time~dist+climb, data=hills) > summary(hills.lm) Call: lm(formula = time ~ dist + climb, data = hills) Residuals: Min 1Q Median 3Q Max -16.215 -7.129 -1.186 2.371 65.121 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -8.992039 4.302734 -2.090 0.0447 * dist 6.217956 0.601148 10.343 9.86e-12 *** climb 0.011048 0.002051 5.387 6.45e-06 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 14.68 on 32 degrees of freedom Multiple R-Squared: 0.9191, Adjusted R-squared: 0.914 F-statistic: 181.7 on 2 and 32 DF, p-value: < 2.2e-16 > X <- model.matrix(hills.lm) > ## X is what you would expect, a 35 x 3 matrix with 1s in the first column > X[1:4,] (Intercept) dist climb Greenmantle 1 2.5 650 Carnethy 1 6.0 2500 Craig Dunain 1 6.0 900 Ben Rha 1 7.5 800 > ## lm has tons of other output, as well as many methods > > ## analysis of variance > ycat <- c(rnorm(10)+1,rnorm(10)+2,rnorm(10)+3) # a vector of length 30 in 3 groups > alphacat <- factor(rep(1,10),rep(2,10),rep(3,10)) # a factor to index the groups > lmcat <- lm(ycat ~ alphacat) Error in model.frame(formula, rownames, variables, varnames, extras, extranames, : variable lengths differ > length(ycat) [1] 30 > length(alphacat) [1] 10 > alphacat <- factor(c(rep(1,10),rep(2,10),rep(3,10))) # a factor to index the groups > lmcat <- lm(ycat ~ alphacat) > summary(lmcat) Call: lm(formula = ycat ~ alphacat) Residuals: Min 1Q Median 3Q Max -2.3820 -0.6015 -0.1436 0.4603 1.9113 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 1.2790 0.2933 4.360 0.000170 *** alphacat2 0.2549 0.4148 0.614 0.544099 alphacat3 2.2007 0.4148 5.305 1.34e-05 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 0.9276 on 27 degrees of freedom Multiple R-Squared: 0.5551, Adjusted R-squared: 0.5221 F-statistic: 16.84 on 2 and 27 DF, p-value: 1.787e-05 > ## note that we have estimates of mu, alpha2 and alpha3 > mean(ycat) [1] 2.097566 > ## and that muhat is not ybar > model.matrix(lmcat) (Intercept) alphacat2 alphacat3 1 1 0 0 2 1 0 0 3 1 0 0 4 1 0 0 5 1 0 0 6 1 0 0 7 1 0 0 8 1 0 0 9 1 0 0 10 1 0 0 11 1 1 0 12 1 1 0 13 1 1 0 14 1 1 0 15 1 1 0 16 1 1 0 17 1 1 0 18 1 1 0 19 1 1 0 20 1 1 0 21 1 0 1 22 1 0 1 23 1 0 1 24 1 0 1 25 1 0 1 26 1 0 1 27 1 0 1 28 1 0 1 29 1 0 1 30 1 0 1 attr(,"assign") [1] 0 1 1 attr(,"contrasts") attr(,"contrasts")$alphacat [1] "contr.treatment" > ## now I will force the summation constraint > options(contr=c("contr.sum","contr.poly")) > lmcat2 <- lm(ycat~alphacat) > summary(lmcat2) Call: lm(formula = ycat ~ alphacat) Residuals: Min 1Q Median 3Q Max -2.3820 -0.6015 -0.1436 0.4603 1.9113 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 1.2790 0.2933 4.360 0.000170 *** alphacat2 0.2549 0.4148 0.614 0.544099 alphacat3 2.2007 0.4148 5.305 1.34e-05 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 0.9276 on 27 degrees of freedom Multiple R-Squared: 0.5551, Adjusted R-squared: 0.5221 F-statistic: 16.84 on 2 and 27 DF, p-value: 1.787e-05 > options(contrasts=c("contr.sum","contr.poly")) > lmcat2 <- lm(ycat~alphacat) > summary(lmcat2) Call: lm(formula = ycat ~ alphacat) Residuals: Min 1Q Median 3Q Max -2.3820 -0.6015 -0.1436 0.4603 1.9113 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 2.0976 0.1694 12.386 1.2e-12 *** alphacat1 -0.8185 0.2395 -3.418 0.00202 ** alphacat2 -0.5637 0.2395 -2.353 0.02613 * --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 0.9276 on 27 degrees of freedom Multiple R-Squared: 0.5551, Adjusted R-squared: 0.5221 F-statistic: 16.84 on 2 and 27 DF, p-value: 1.787e-05 > ## Now we have estimates of mu, alpha1 and alpha2 (alpha-hat3 is fixed by the constraint) > mean(y) Error in mean(y) : Object "y" not found > mean(ycat) [1] 2.097566 > tapply(ycat,alphacat,mean) 1 2 3 1.279040 1.533907 3.479750 > .Last.value - mean(ycat) 1 2 3 -0.8185260 -0.5636583 1.3821843 > save.image(file="/Users/dfraser/Nancy/teaching/410/jan13/jan13.rda") > q() >