## You can often find the code for built in functions by typing the name
## without any arguments, but this doesn't always work

> hist
function (x, ...) 
UseMethod("hist")
<environment: namespace:graphics>

## this does seem to work

> hist.default
function (x, breaks = "Sturges", freq = NULL, probability = !freq, 
    include.lowest = TRUE, right = TRUE, density = NULL, angle = 45, 
    col = NULL, border = NULL, main = paste("Histogram of", xname), 
    xlim = range(breaks), ylim = NULL, xlab = xname, ylab, axes = TRUE, 
    plot = TRUE, labels = FALSE, nclass = NULL, ...) 
{
    if (!is.numeric(x)) 
        stop("`x' must be numeric")
    xname <- deparse(substitute(x))
    n <- length(x <- x[!is.na(x)])
    use.br <- !missing(breaks)
    if (use.br) {
        if (!missing(nclass)) 
            warning("`nclass' not used when `breaks' specified")
    }
    else if (!is.null(nclass) && length(nclass) == 1) 
        breaks <- nclass
    use.br <- use.br && (nB <- length(breaks)) > 1
    if (use.br) 
        breaks <- sort(breaks)
    else {
        if (!include.lowest) {
            include.lowest <- TRUE
            warning("include.lowest ignored as `breaks' is not a vector")
        }
        if (is.character(breaks)) {
            breaks <- match.arg(tolower(breaks), c("sturges", 
                "fd", "freedman-diaconis", "scott"))
            breaks <- switch(breaks, sturges = nclass.Sturges(x), 
                "freedman-diaconis" = , fd = nclass.FD(x), scott = nclass.scott(x), 
                stop("Unknown breaks algorithm"))
        }
        else if (is.function(breaks)) {
            breaks <- breaks(x)
        }
        if (!is.numeric(breaks) || is.na(breaks) || breaks < 
            2) 
            stop("invalid number of breaks")
        breaks <- pretty(range(x), n = breaks, min.n = 1)
        nB <- length(breaks)
        if (nB <= 1) 
            stop(paste("hist.default: pretty() error, breaks=", 
                format(breaks)))
    }
    h <- diff(breaks)
    equidist <- !use.br || diff(range(h)) < 1e-07 * mean(h)
    if (!use.br && any(h <= 0)) 
        stop("not strictly increasing `breaks'.")
    if (is.null(freq)) {
        freq <- if (!missing(probability)) 
            !as.logical(probability)
        else equidist
    }
    else if (!missing(probability) && any(probability == freq)) 
        stop("`probability' is an alias for `!freq', however they differ.")
    diddle <- 1e-07 * median(diff(breaks))
    fuzz <- if (right) 
        c(if (include.lowest) -diddle else diddle, rep.int(diddle, 
            length(breaks) - 1))
    else c(rep.int(-diddle, length(breaks) - 1), if (include.lowest) diddle else -diddle)
    fuzzybreaks <- breaks + fuzz
    h <- diff(fuzzybreaks)
    storage.mode(x) <- "double"
    storage.mode(fuzzybreaks) <- "double"
    counts <- .C("bincount", x, as.integer(n), fuzzybreaks, as.integer(nB), 
        counts = integer(nB - 1), right = as.logical(right), 
        include = as.logical(include.lowest), naok = FALSE, NAOK = FALSE, 
        DUP = FALSE, PACKAGE = "base")$counts
    if (any(counts < 0)) 
        stop("negative `counts'. Internal Error in C-code for \"bincount\"")
    if (sum(counts) < n) 
        stop("some `x' not counted; maybe `breaks' do not span range of `x'")
    dens <- counts/(n * h)
    mids <- 0.5 * (breaks[-1] + breaks[-nB])
    r <- structure(list(breaks = breaks, counts = counts, intensities = dens, 
        density = dens, mids = mids, xname = xname, equidist = equidist), 
        class = "histogram")
    if (plot) {
        plot(r, freq = freq, col = col, border = border, angle = angle, 
            density = density, main = main, xlim = xlim, ylim = ylim, 
            xlab = xlab, ylab = ylab, axes = axes, labels = labels, 
            ...)
        invisible(r)
    }
    else r
}
<environment: namespace:graphics>

## Some simple examples from MASS using character strings

> paste(c("X","Y"),1:4)
[1] "X 1" "Y 2" "X 3" "Y 4"
> paste(c("X","Y"),1:4, sep="")
[1] "X1" "Y2" "X3" "Y4"
> paste(c("X","Y"),1:4, sep="",collapse="+")
[1] "X1+Y2+X3+Y4"
> substring("My name is Nancy", 12, 100)
[1] "Nancy"
> substring("My name is Nancy", 1,5)
[1] "My na"
> grep("na$",state.name)
[1]  3 14 18 26 33 40
> regexpr("na$",state.name)
 [1] -1 -1  6 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  6 -1 -1 -1  8 -1 -1 -1 -1 -1 -1 -1  6 -1 -1 -1 -1 -1
[32] -1 13 -1 -1 -1 -1 -1 -1 13 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
attr(,"match.length")
 [1] -1 -1  2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  2 -1 -1 -1  2 -1 -1 -1 -1 -1 -1 -1  2 -1 -1 -1 -1 -1
[32] -1  2 -1 -1 -1 -1 -1 -1  2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1

## regexpr shows where the match occurs (e.g. in letter 6 of Arizona),
## and how many characters matched (here is is always 2)

> d <- date()
> d
[1] "Fri Jan 28 13:32:56 2005"
> cat("Today's date is:", substring(d,1,10), substring(d,25,28), "\n")

# "\n" is for line feed

Today's date is: Fri Jan 28  
> cat("Today's date is:", substring(d,1,10), substring(d,21,25), "\n")
Today's date is: Fri Jan 28 2005 
> cat("Today's date is:", substring(d,1,10), substring(d,21,25))
Today's date is: Fri Jan 28 2005> a <- c (4, 7, 8)

> a
[1] 4 7 8
> if(a < 10) cat("yes\n")
yes
Warning message: 
the condition has length > 1 and only the first element will be used in: if (a < 10) cat("yes\n") 
> a<10
[1] TRUE TRUE TRUE
>  if (all(a<10)) cat("yes\n")
yes

## R seems to behave differently than is claimed in the book
> powers.of.pi <- pi^(-1:10)
> powers.of.pi
 [1] 3.183099e-01 1.000000e+00 3.141593e+00 9.869604e+00 3.100628e+01 9.740909e+01 3.060197e+02
 [8] 9.613892e+02 3.020293e+03 9.488531e+03 2.980910e+04 9.364805e+04
> cat(powers.of.pi) 
0.3183099 1 3.141593 9.869604 31.00628 97.4091 306.0197 961.3892 3020.293 9488.531 29809.1 93648.05> cat(powers.of.pi, "\n")
0.3183099 1 3.141593 9.869604 31.00628 97.4091 306.0197 961.3892 3020.293 9488.531 29809.1 93648.05 
> cat(powers.of.pi, "\n")
0.3183099 1 3.141593 9.869604 31.00628 97.4091 306.0197 961.3892 3020.293 9488.531 29809.1 93648.05 
> format(powers.of.pi)
 [1] "3.183099e-01" "1.000000e+00" "3.141593e+00" "9.869604e+00" "3.100628e+01" "9.740909e+01"
 [7] "3.060197e+02" "9.613892e+02" "3.020293e+03" "9.488531e+03" "2.980910e+04" "9.364805e+04"
> cat(format(powers.of.pi), "\n", sep=" ")
3.183099e-01 1.000000e+00 3.141593e+00 9.869604e+00 3.100628e+01 9.740909e+01 3.060197e+02 9.613892e+02 3.020293e+03 9.488531e+03 2.980910e+04 9.364805e+04 

## Here's a simple example (very simple) of a function

> stirling <- function(n){
+ sqrt(2*pi)*n^(n+.5)*exp(-n)}

> stirling(12)
[1] 475687486
> factorial(12)
[1] 479001600
> stirling(1:12)
 [1] 9.221370e-01 1.919004e+00 5.836210e+00 2.350618e+01 1.180192e+02 7.100782e+02 4.980396e+03
 [8] 3.990240e+04 3.595369e+05 3.598696e+06 3.961563e+07 4.756875e+08
> stirling("a")
Error in n + 0.5 : non-numeric argument to binary operator
> stirling(matrix(1:12,3,4))
         [,1]      [,2]       [,3]      [,4]
[1,] 0.922137  23.50618   4980.396   3598696
[2,] 1.919004 118.01917  39902.395  39615625
[3,] 5.836210 710.07818 359536.873 475687486

##  some  functions  to  compute  roots  of  quadratic
##   equations, taken  from  Radford  Neal's web  site

> quadratic
function (a,b,c) { 
d <- b^2 - 4*a*c
  c ((-b+sqrt(d))/(2*a), (-b-sqrt(d))/(2*a))
}

> quadratic(1,0,-2)
[1]  1.414214 -1.414214

> quadratic(1,0,2)
[1] NaN NaN
Warning messages: 
1: NaNs produced in: sqrt(d) 
2: NaNs produced in: sqrt(d) > quadratic(1,0,2)

## A better one, note use of if

> quadratic2
function (a,b,c) { 
d <- b^2 - 4*a*c 
  if (d<0) stop("Discriminant is negative")
c ((-b+sqrt(d))/(2*a), (-b-sqrt(d))/(2*a))
}
> quadratic2(1,0,2)
Error in quadratic2(1, 0, 2) : Discriminant is negative

## and one that can handle imaginary

> quadratic3
function (a,b,c) { 
d <- b^2 - 4*a*c 
  c ((-b+sqrt(d+0i))/(2*a), (-b-sqrt(d+0i))/(2*a)) 
}
> quadratic3(1,0,2)
[1] 0+1.414214i 0-1.414214i

## Note that the 'd' inside the quadratic functions is hidden

> d
[1] "Fri Jan 28 13:32:56 2005"

## But if our function needs a variable that doesn't get explicitly
## passed or created within the function it will look in the workspace

> a
[1] 4 7 8
> quad4 <- function(b,c){
+  d <- b^2 -4 * a * c
+ c((-b+sqrt(d+0i))/(2*a),(-b-sqrt(d+0i))/(2*a))
+ }
> a
[1] 4 7 8
> quad4(0,2)
[1] 0+0.7071068i 0+0.5345225i 0+0.5000000i 0-0.7071068i 0-0.5345225i 0-0.5000000i
> a<-1
> quad4(0,2)
[1] 0+1.414214i 0-1.414214i

##  a function  to  compute  the  t-statistic

> twosam
function(y1, y2){
n1 <- length(y1)
n2 <- length(y2)
yb1 <- mean(y1)
yb2 <- mean(y2)
s1 <- var(y1)
s2 <- var(y2)
s <- ((n1-1)*s1+(n2-1)*s2)/(n1+n2-2)
tst <- (yb1-yb2)/sqrt(s*((1/n1)+(1/n2)))
tst
}

> x <- rnorm(10)
> y <- rnorm(12, 1, 5)

> twosam(x,y)
[1] 0.1818949
> y <- rnorm(12, 1, 1)
> twosam(x,y)
[1] -1.627642
> y <- rnorm(12, 1, .1)
> twosam(x,y)
[1] -4.603017
> ?rnorm
> rnorm(10, 1, 5)
 [1]   1.2190319  -4.3445181  -2.0460187 -11.0546433  10.0058498  -7.1116460  -2.2943813
 [8]   3.5509858  -1.3841516  -0.6895883
> rnorm(10, sd=5, mean= 1)
 [1] -0.79458948  0.36657028  0.07824783 -3.85781706  1.03175870  1.88523517  6.40020338
 [8] -2.03755779 -0.88326503 -4.45951335

##  the  t-test  code  as  amended  by  the  class
##

> ttest <- function(y1, y2, test="two-sided", alpha=0.05){
+ n1 <- length(y1); n2 <- length(y2)
+ ndf <- n1+n2-2
+ s2 <- ((n1 - 1) * var(y1) + (n2 -1) * var(y2))/ndf
+ tstat  <- (mean(y1) - mean(y2) )/sqrt(s2 * (1/n1 + 1/n2))
+ tail.area <- switch(test,
+ "two-sided"=2*(1-pt(abs(tstat),ndf)),
+ "lower" = pt(tstat,ndf),
+ "upper" = 1-pt(tstat,ndf)
+ )
+ list(tstat=tstat, df=ndf, reject=if(!is.null(tail.area)) tail.area < alpha,
+ 	tail.area=tail.area)
+ }
> ttest(x,y)
$tstat
[1] -4.603017

$df
[1] 20

$reject
[1] TRUE

$tail.area
[1] 0.0001721337

>