## You can often find the code for built in functions by typing the name ## without any arguments, but this doesn't always work > hist function (x, ...) UseMethod("hist") ## this does seem to work > hist.default function (x, breaks = "Sturges", freq = NULL, probability = !freq, include.lowest = TRUE, right = TRUE, density = NULL, angle = 45, col = NULL, border = NULL, main = paste("Histogram of", xname), xlim = range(breaks), ylim = NULL, xlab = xname, ylab, axes = TRUE, plot = TRUE, labels = FALSE, nclass = NULL, ...) { if (!is.numeric(x)) stop("`x' must be numeric") xname <- deparse(substitute(x)) n <- length(x <- x[!is.na(x)]) use.br <- !missing(breaks) if (use.br) { if (!missing(nclass)) warning("`nclass' not used when `breaks' specified") } else if (!is.null(nclass) && length(nclass) == 1) breaks <- nclass use.br <- use.br && (nB <- length(breaks)) > 1 if (use.br) breaks <- sort(breaks) else { if (!include.lowest) { include.lowest <- TRUE warning("include.lowest ignored as `breaks' is not a vector") } if (is.character(breaks)) { breaks <- match.arg(tolower(breaks), c("sturges", "fd", "freedman-diaconis", "scott")) breaks <- switch(breaks, sturges = nclass.Sturges(x), "freedman-diaconis" = , fd = nclass.FD(x), scott = nclass.scott(x), stop("Unknown breaks algorithm")) } else if (is.function(breaks)) { breaks <- breaks(x) } if (!is.numeric(breaks) || is.na(breaks) || breaks < 2) stop("invalid number of breaks") breaks <- pretty(range(x), n = breaks, min.n = 1) nB <- length(breaks) if (nB <= 1) stop(paste("hist.default: pretty() error, breaks=", format(breaks))) } h <- diff(breaks) equidist <- !use.br || diff(range(h)) < 1e-07 * mean(h) if (!use.br && any(h <= 0)) stop("not strictly increasing `breaks'.") if (is.null(freq)) { freq <- if (!missing(probability)) !as.logical(probability) else equidist } else if (!missing(probability) && any(probability == freq)) stop("`probability' is an alias for `!freq', however they differ.") diddle <- 1e-07 * median(diff(breaks)) fuzz <- if (right) c(if (include.lowest) -diddle else diddle, rep.int(diddle, length(breaks) - 1)) else c(rep.int(-diddle, length(breaks) - 1), if (include.lowest) diddle else -diddle) fuzzybreaks <- breaks + fuzz h <- diff(fuzzybreaks) storage.mode(x) <- "double" storage.mode(fuzzybreaks) <- "double" counts <- .C("bincount", x, as.integer(n), fuzzybreaks, as.integer(nB), counts = integer(nB - 1), right = as.logical(right), include = as.logical(include.lowest), naok = FALSE, NAOK = FALSE, DUP = FALSE, PACKAGE = "base")$counts if (any(counts < 0)) stop("negative `counts'. Internal Error in C-code for \"bincount\"") if (sum(counts) < n) stop("some `x' not counted; maybe `breaks' do not span range of `x'") dens <- counts/(n * h) mids <- 0.5 * (breaks[-1] + breaks[-nB]) r <- structure(list(breaks = breaks, counts = counts, intensities = dens, density = dens, mids = mids, xname = xname, equidist = equidist), class = "histogram") if (plot) { plot(r, freq = freq, col = col, border = border, angle = angle, density = density, main = main, xlim = xlim, ylim = ylim, xlab = xlab, ylab = ylab, axes = axes, labels = labels, ...) invisible(r) } else r } ## Some simple examples from MASS using character strings > paste(c("X","Y"),1:4) [1] "X 1" "Y 2" "X 3" "Y 4" > paste(c("X","Y"),1:4, sep="") [1] "X1" "Y2" "X3" "Y4" > paste(c("X","Y"),1:4, sep="",collapse="+") [1] "X1+Y2+X3+Y4" > substring("My name is Nancy", 12, 100) [1] "Nancy" > substring("My name is Nancy", 1,5) [1] "My na" > grep("na$",state.name) [1] 3 14 18 26 33 40 > regexpr("na$",state.name) [1] -1 -1 6 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 6 -1 -1 -1 8 -1 -1 -1 -1 -1 -1 -1 6 -1 -1 -1 -1 -1 [32] -1 13 -1 -1 -1 -1 -1 -1 13 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 attr(,"match.length") [1] -1 -1 2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 2 -1 -1 -1 2 -1 -1 -1 -1 -1 -1 -1 2 -1 -1 -1 -1 -1 [32] -1 2 -1 -1 -1 -1 -1 -1 2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ## regexpr shows where the match occurs (e.g. in letter 6 of Arizona), ## and how many characters matched (here is is always 2) > d <- date() > d [1] "Fri Jan 28 13:32:56 2005" > cat("Today's date is:", substring(d,1,10), substring(d,25,28), "\n") # "\n" is for line feed Today's date is: Fri Jan 28 > cat("Today's date is:", substring(d,1,10), substring(d,21,25), "\n") Today's date is: Fri Jan 28 2005 > cat("Today's date is:", substring(d,1,10), substring(d,21,25)) Today's date is: Fri Jan 28 2005> a <- c (4, 7, 8) > a [1] 4 7 8 > if(a < 10) cat("yes\n") yes Warning message: the condition has length > 1 and only the first element will be used in: if (a < 10) cat("yes\n") > a<10 [1] TRUE TRUE TRUE > if (all(a<10)) cat("yes\n") yes ## R seems to behave differently than is claimed in the book > powers.of.pi <- pi^(-1:10) > powers.of.pi [1] 3.183099e-01 1.000000e+00 3.141593e+00 9.869604e+00 3.100628e+01 9.740909e+01 3.060197e+02 [8] 9.613892e+02 3.020293e+03 9.488531e+03 2.980910e+04 9.364805e+04 > cat(powers.of.pi) 0.3183099 1 3.141593 9.869604 31.00628 97.4091 306.0197 961.3892 3020.293 9488.531 29809.1 93648.05> cat(powers.of.pi, "\n") 0.3183099 1 3.141593 9.869604 31.00628 97.4091 306.0197 961.3892 3020.293 9488.531 29809.1 93648.05 > cat(powers.of.pi, "\n") 0.3183099 1 3.141593 9.869604 31.00628 97.4091 306.0197 961.3892 3020.293 9488.531 29809.1 93648.05 > format(powers.of.pi) [1] "3.183099e-01" "1.000000e+00" "3.141593e+00" "9.869604e+00" "3.100628e+01" "9.740909e+01" [7] "3.060197e+02" "9.613892e+02" "3.020293e+03" "9.488531e+03" "2.980910e+04" "9.364805e+04" > cat(format(powers.of.pi), "\n", sep=" ") 3.183099e-01 1.000000e+00 3.141593e+00 9.869604e+00 3.100628e+01 9.740909e+01 3.060197e+02 9.613892e+02 3.020293e+03 9.488531e+03 2.980910e+04 9.364805e+04 ## Here's a simple example (very simple) of a function > stirling <- function(n){ + sqrt(2*pi)*n^(n+.5)*exp(-n)} > stirling(12) [1] 475687486 > factorial(12) [1] 479001600 > stirling(1:12) [1] 9.221370e-01 1.919004e+00 5.836210e+00 2.350618e+01 1.180192e+02 7.100782e+02 4.980396e+03 [8] 3.990240e+04 3.595369e+05 3.598696e+06 3.961563e+07 4.756875e+08 > stirling("a") Error in n + 0.5 : non-numeric argument to binary operator > stirling(matrix(1:12,3,4)) [,1] [,2] [,3] [,4] [1,] 0.922137 23.50618 4980.396 3598696 [2,] 1.919004 118.01917 39902.395 39615625 [3,] 5.836210 710.07818 359536.873 475687486 ## some functions to compute roots of quadratic ## equations, taken from Radford Neal's web site > quadratic function (a,b,c) { d <- b^2 - 4*a*c c ((-b+sqrt(d))/(2*a), (-b-sqrt(d))/(2*a)) } > quadratic(1,0,-2) [1] 1.414214 -1.414214 > quadratic(1,0,2) [1] NaN NaN Warning messages: 1: NaNs produced in: sqrt(d) 2: NaNs produced in: sqrt(d) > quadratic(1,0,2) ## A better one, note use of if > quadratic2 function (a,b,c) { d <- b^2 - 4*a*c if (d<0) stop("Discriminant is negative") c ((-b+sqrt(d))/(2*a), (-b-sqrt(d))/(2*a)) } > quadratic2(1,0,2) Error in quadratic2(1, 0, 2) : Discriminant is negative ## and one that can handle imaginary > quadratic3 function (a,b,c) { d <- b^2 - 4*a*c c ((-b+sqrt(d+0i))/(2*a), (-b-sqrt(d+0i))/(2*a)) } > quadratic3(1,0,2) [1] 0+1.414214i 0-1.414214i ## Note that the 'd' inside the quadratic functions is hidden > d [1] "Fri Jan 28 13:32:56 2005" ## But if our function needs a variable that doesn't get explicitly ## passed or created within the function it will look in the workspace > a [1] 4 7 8 > quad4 <- function(b,c){ + d <- b^2 -4 * a * c + c((-b+sqrt(d+0i))/(2*a),(-b-sqrt(d+0i))/(2*a)) + } > a [1] 4 7 8 > quad4(0,2) [1] 0+0.7071068i 0+0.5345225i 0+0.5000000i 0-0.7071068i 0-0.5345225i 0-0.5000000i > a<-1 > quad4(0,2) [1] 0+1.414214i 0-1.414214i ## a function to compute the t-statistic > twosam function(y1, y2){ n1 <- length(y1) n2 <- length(y2) yb1 <- mean(y1) yb2 <- mean(y2) s1 <- var(y1) s2 <- var(y2) s <- ((n1-1)*s1+(n2-1)*s2)/(n1+n2-2) tst <- (yb1-yb2)/sqrt(s*((1/n1)+(1/n2))) tst } > x <- rnorm(10) > y <- rnorm(12, 1, 5) > twosam(x,y) [1] 0.1818949 > y <- rnorm(12, 1, 1) > twosam(x,y) [1] -1.627642 > y <- rnorm(12, 1, .1) > twosam(x,y) [1] -4.603017 > ?rnorm > rnorm(10, 1, 5) [1] 1.2190319 -4.3445181 -2.0460187 -11.0546433 10.0058498 -7.1116460 -2.2943813 [8] 3.5509858 -1.3841516 -0.6895883 > rnorm(10, sd=5, mean= 1) [1] -0.79458948 0.36657028 0.07824783 -3.85781706 1.03175870 1.88523517 6.40020338 [8] -2.03755779 -0.88326503 -4.45951335 ## the t-test code as amended by the class ## > ttest <- function(y1, y2, test="two-sided", alpha=0.05){ + n1 <- length(y1); n2 <- length(y2) + ndf <- n1+n2-2 + s2 <- ((n1 - 1) * var(y1) + (n2 -1) * var(y2))/ndf + tstat <- (mean(y1) - mean(y2) )/sqrt(s2 * (1/n1 + 1/n2)) + tail.area <- switch(test, + "two-sided"=2*(1-pt(abs(tstat),ndf)), + "lower" = pt(tstat,ndf), + "upper" = 1-pt(tstat,ndf) + ) + list(tstat=tstat, df=ndf, reject=if(!is.null(tail.area)) tail.area < alpha, + tail.area=tail.area) + } > ttest(x,y) $tstat [1] -4.603017 $df [1] 20 $reject [1] TRUE $tail.area [1] 0.0001721337 >