SAS Examples
options linesize=79 pagesize=100; title 'Grades from STA 302: Fall, 1990'; proc format; /* Used to label values of the categorical variables */ value sexfmt 0 = 'Male' 1 = 'Female'; value ethfmt 1 = 'Chinese' 2 = 'European' 3 = 'Other' ; data erindale; infile 'class90.dat'; input sex ethnic quiz1-quiz8 comp1-comp9 midterm final; /* Drop lowest score for quiz & computer */ quizave = ( sum(of quiz1-quiz8) - min(of quiz1-quiz8) ) / 7; compave = ( sum(of comp1-comp9) - min(of comp1-comp9) ) / 8; label ethnic = 'Apparent ethnic background (ancestry)' quizave = 'Quiz Average (drop lowest)' compave = 'Computer Average (drop lowest)'; mark = .3*quizave*10 + .1*compave*10 + .3*midterm + .3*final; label mark = 'Final Mark'; format sex sexfmt.; /* Associates sex & ethnic */ format ethnic ethfmt.; /* with formats defined above */ proc freq; tables sex ethnic; proc means n mean std; var quiz1 -- mark; /* single dash only works with numbered lists, like quiz1-quiz8 */ proc freq; tables sex*ethnic / chisq; proc corr; var final midterm quizave compave; proc ttest; class sex; var mark;
/* samp1.sas: read, basic stats on agsrs.dat */ options linesize = 79; title 'Basic descriptive stats on agsrs.dat'; data srs; infile '/student/jbrunner/public/322s00/AGSRS.DAT' delimiter = ','; input COUNTY $ STATE $ ACRES92 ACRES87 ACRES82 FARMS92 FARMS87 FARMS82 LARGEF92 LARGEF87 LARGEF82 SMALLF92 SMALLF87 SMALLF82; if acres92 = -99 then acres92 = .; /* Assign missing value code */ if acres87 = -99 then acres87 = .; if acres82 = -99 then acres82 = .; label ACRES92 = '# farm acres, 1992' ACRES87 = '# farm acres, 1987' ACRES82 = '# farm acres, 1982' FARMS92 = '# of farms, 1992' FARMS87 = '# of farms, 1987' FARMS82 = '# of farms, 1982' LARGEF92 = '# farms with 1000 acres +, 1992' LARGEF87 = '# farms with 1000 acres +, 1987' LARGEF82 = '# farms with 1000 acres +, 1982' SMALLF92 = '# farms with 9 acres or fewer, 1992' SMALLF87 = '# farms with 9 acres or fewer, 1987' SMALLF82 = '# farms with 9 acres or fewer, 1982'; proc means; var Acres92 -- smallf82;
/************************************************************* * senic2.sas: Read data & do basic descriptives * *************************************************************/ title 'Basic Stats for SENIC data'; options linesize=79; proc format; /* value labels used in data step below */ value yesnofmt 1 = 'yes' 2 = 'no' ; value regfmt 1 = 'northeast' 2 = 'north central' 3 = 'south' 4 = 'west' ; value acatfmt 1 = '53 & under' 2 = 'over 53'; data senic1; infile '/student/jbrunner/sta301f99/senic.dat' missover ; /* missover causes blanks to be missing, even at the end of a line */ input #1 id 1-5 stay 7-11 age 13-16 infrisk 18-20 culratio 22-25 xratio 27-31 nbeds 33-35 medschl 37 region 39 census 41-43 nurses 45-47 service 49-52 ; /* If there were a 2nd line of data, we'd continue with #2 for line 2, etc. */ label id = 'hospital identification number' stay = 'av length of hospital stay, in days' age = 'average patient age' infrisk = 'prob of acquiring infection in hospital' culratio = '# cultures / # no hosp acq infect' xratio = '# x-rays / # no signs of pneumonia' nbeds = 'average # beds during study period' medschl = 'medical school affiliation' region = 'region of country (usa)' census = 'aver # patients in hospital per day' nurses = 'aver # nurses during study period' service = '% of 35 potential facil. & services' ; /* Associating variables with their value labels */ format medschl yesnofmt.; format region regfmt.; /***** recodes, computes & ifs *****/ if 053 then agecat=2; label agecat = 'av patient age category'; format agecat acatfmt.; /* compute ad hoc index of hospital quality */ quality=(2*service+nurses+nbeds+10*culratio +10*xratio-2*stay)/medschl; if (region eq 3) then quality=quality-100; label quality = "Jerry's bogus hospital quality index"; /* Describe quantitative variables */ PROC UNIVARIATE PLOT NORMAL ; VAR STAY -- NBEDS CENSUS NURSES SERVICE; /* SINGLE DASH ONLY WORKS WITH NUMBERED LISTS, LIKE ITEM1-ITEM50 */ /* Describe categorical variables */ PROC FREQ; TABLES MEDSCHL REGION AGECAT; PROC CHART; VBAR REGION MEDSCHL AGECAT /DISCRETE ;
/* bankmusic.sas */ proc format; value musicfmt 1 = 'None' 2 = 'Elevator' 3 = 'Rap' 4 = 'Choice'; value jobfmt 1 = 'Teller' 2 = 'Customer'; data people; /* Observations are people in this data set*/ infile 'music.dat'; input respno branch status music satis; label respno = 'Respondent ID Number' branch = 'Bank Branch Number' status = 'Teller or Customer' music = 'Music type' satis = 'Satisfaction rating'; format music musicfmt.; format status jobfmt.; if status=1 then telsat = satis; /* Will be missing for customers */ if status=2 then cussat = satis; /* Will be missing for tellers */ proc freq; tables = branch * music / norow nocol nopercent; /* Error check: What should this table look like? See Ch. 3 for details on proc freq */ proc sort data=people; by branch; proc means data=people noprint nway; class branch; id music; var cussat telsat; /* Means will be based on non-missing observations */ output out=banks mean = Cust_Sat Wrkr_Sat n = ncust nwrkr; data branches; set banks; /* Bring in banks data set created above*/ label Cust_Sat = 'Mean Customer Satisfaction' Wrkr_Sat = 'Mean Worker Satisfaction' ncust = 'Number of Customers Responding' nwrkr = 'Number of Workers Responding'; keep branch music Cust_Sat Wrkr_Sat ncust nwrkr; /* Discards the other variables - not really needed */ proc sort data = branches; by music; proc means data = branches; class music; var Cust_Sat Wrkr_Sat;
/* hungrymice.sas */ options linesize=79 pagesize=500 noovp formdlim=' '; title 'Calorie restriction and Longevity in Mice'; /* Read data directly from Excel spreadsheet */ proc import datafile="StarvingMice.XLS" out=mouse1 dbms=xls; getnames=yes; /* Input data file is StarvingMice.XLS Ouput data set is called mouse1 dbms=xls The input file is an Excel spreadsheet. Necessary to read an Excel spreadsheet directly under unix/linux Works in PC environment too except for Excel 4.0 spreadsheets The xlsx file type is not supported as of SAS Version 9.2 If there are multiple sheets, use sheet="sheet1" or something. getnames=yes Use column names as variable names */ /* proc print; */ proc freq; tables diet; proc means; class diet; var lifetime; data mouse2; set mouse1; /* Now mouse2 is just mouse1, and we can transform the data. */ diet0 = diet; if diet = 'NP' then diet = 'Feed at Will'; else if diet = 'lopro' then diet = 'N/R50 lopro'; label lifetime = 'Life Length in Months'; /* By default, procedures use the most recent SAS data set. If you don't want this, use the data= option. */ proc freq; title2 'Check Re-labeling of Diet'; tables diet * diet0 / norow nocol nopercent; proc glm; 'Main analysis'; class diet; model lifetime=diet / clparm; /* clparm gives CIs for contrasts down in the estimate statements. */ means diet; /* Estimate (like Contrast) uses alphabetical order: Be careful! Feed at Will | N/N85 | N/R40 | N/R50 | N/R50 lopro | R/R50 Positive values of contrast mean longer life with less food. */ estimate 'Feed at Will vs. N/N85' diet -1 1 0 0 0 0; estimate 'N/N85 vs. N/R50' diet 0 -1 0 1 0 0; estimate 'N/R50 vs. R/R50' diet 0 0 0 -1 0 1; estimate 'N/R40 vs. N/R50' diet 0 0 1 -1 0 0; estimate 'N/R50 vs. N/R50 lopro' diet 0 0 0 -1 1 0;
options linesize=79; title 'Creating Mood data set'; data path; array h{20} husb1-husb20; array w{20} wife1-wife20; do n = 1 to 300; relig=rantbl(0,.2,.4,.4); mar=ranpoi(0,5); h{1}=3+relig; w{1}=4+relig; do day=2 to 20; erhusb=ranbin(0,4,.5) - 2; erwife=ranbin(0,2,.5) - 1; w{day} = relig + mar*w{day-1}/(day-1) + mar*h{day-1}/10 + erwife; w{day} =round(w{day}); h{day} = relig + mar*w{day}/10 + h{day-1}/(day-1) + erhusb; h{day} = round (h{day}); end; /* Next DAY */ output; end; /* next N */ proc print; var n relig mar husb1-husb20 wife1-wife20
/******* senicdef.sas just reads data ***********/ filename rawdat 'senic.raw'; /* in senic.raw, missing=blank */ proc format; /* value labels used in data step below */ value yesnofmt 1 = 'yes' 2 = 'no' ; value regfmt 1 = 'northeast' 2 = 'north central' 3 = 'south' 4 = 'west' ; value acatfmt 1 = '53 & under' 2 = 'over 53'; data better; infile rawdat missover ; /* missover causes blanks to be missing */ input #1 id 1-5 stay 7-11 age 13-16 infrisk 18-20 culratio 22-25 xratio 27-31 nbeds 33-35 medschl 37 region 39 census 41-43 nurses 45-47 service 49-52 ; label id = 'hospital identification number'; label stay = 'av length of hospital stay, in days'; label age = 'average patient age'; label infrisk = 'prob of acquiring infection in hospital'; label culratio = '# cultures / # no hosp acq infect'; label xratio = '# x-rays / # no signs of pneumonia'; label nbeds = 'average # beds during study period'; label medschl = 'medical school affiliation'; label region = 'region of country (usa)'; label census = 'aver # patients in hospital per day' ; label nurses = 'aver # nurses during study period'; label service = '% of 35 potential facil. & services' ; /* associating variables with their value labels */ format medschl yesnofmt.; format region regfmt.; /***** recodes, computes & ifs *****/ if 053 then agecat=2; label agecat = 'av patient age category'; format agecat acatfmt.; /* compute ad hoc index of hospital quality */ quality=(2*service+nurses+nbeds+10*culratio +10*xratio-2*stay)/medschl; if (region eq 3) then quality=quality-100; label quality = 'jerry''s bogus hospital quality index';
/* senic3.sas */ %include 'senicdef.sas'; /* senicdef.sas reads data, etc. */ TITLE2 'Elementary Statistical Tests'; PROC FREQ; TITLE3 'Use PROC FREQ to do crosstabs'; TABLES REGION*MEDSCHL / NOCOL NOPERCENT EXPECTED CHISQ; PROC TTEST; title3 'Independent t-test with PROC TTEST'; CLASS MEDSCHL; VAR INFRISK AGE ; PROC GLM; title3 'One-way ANOVA and followups with PROC GLM'; CLASS REGION; MODEL INFRISK=REGION; MEANS REGION/ TUKEY SCHEFFE; PROC PLOT; title3 'Scatterplots with PROC CORR'; PLOT INFRISK * NURSES INFRISK * NURSES = MEDSCHL; PROC CORR; title3 'Correlation matrix with PROC CORR'; VAR STAY -- NBEDS CENSUS NURSES SERVICE; PROC REG; title3 'Simple regression with PROC REG'; MODEL INFRISK=NURSES;
/********************* senicreg96a.sas **********************/ OPTIONS LINESIZE=79; TITLE 'SENIC DATA: SAS Regression Examples'; %include 'senicdef.sas'; /* SENICDEF.SAS reads data, etc. */ /* First the indicator dummy vars. These definitions could (should) be in senicdef.sas */ if region = 1 then r1=1; else r1=0; if region = 2 then r2=1; else r2=0; if region = 3 then r3=1; else r3=0; if medschl = 2 then mschool = 0; else mschool = medschl; /* mschool is an indicator for medical school = yes */ proc reg; model infrisk=stay age nbeds census nurses service r1-r3 mschool; output out=resdata predicted=prerisk residual=resrisk; proc univariate plot; var resrisk; proc sort; /* Track down any suspicious residuals */ by resrisk; proc print; var id resrisk infrisk prerisk; proc plot; plot resrisk * (prerisk stay age nbeds census nurses service region medschl xratio culratio) ;
/********************** cars98a.sas **************************/ options linesize=79 pagesize=35; title 'STA 320F98 SAS Lesson 2: Prediction, Diagnosis & Testing'; proc format; /* Used to label values of the categorical variables */ value carfmt 1 = 'US' 2 = 'Japanese' 3 = 'Other' ; data auto; infile 'cars.dat'; input country mpg weight length; len2 = length**2; weight2 = weight**2; lenxwt = length*weight; /* Indicator dummy vars */ if country = 1 then c1=1; else c1=0; if country = 3 then c2=1; else c2=0; /* Cell means dummy vars */ if country = 1 then cm1=1; else cm1=0; if country = 2 then cm2=1; else cm2=0; if country = 3 then cm3=1; else cm3=0; /* Effect coding dummy vars */ if country = 1 then ec1=1; else if country = 3 then ec1 = -1; else ec1=0; if country = 2 then ec2=1; else if country = 3 then ec2 = -1; else ec2=0; mileage = mpg; if _n_ = 71 then mileage = .; label country = 'Country of Origin' mpg = 'Miles per Gallon'; format country carfmt.; proc reg; model mpg = weight length c1 c2; dvar: test c1 = 0, c2 = 0; proc reg; model mpg = weight length cm1-cm3 / noint; /* No intercept */ cellmean: test cm1=cm2=cm3; proc reg; model mpg = weight length ec1 ec2; effcode: test ec1=ec2=0; proc sort; by country; proc univariate plot; var mpg; by country; /* side-by-side boxplots */ proc means; var mpg; class country; proc means; var mpg length weight; proc reg; model mpg = weight length c1 c2 / i ss1 clm cli r influence partial; /* i prints (X'X)-inverse ss1 prints sequential sums of squares clm prints confidence interval for E(Yh) cli prints prediction interval for new observation r prints residual analysis influence prints influence statistics partial prints partial regression plots */ proc glm; class country; model mpg = country ; means country; means country / alpha=.05 bon tukey scheffe; /* alpha=.05 is default */; estimate 'JAPvsUSO' country 1 -.5 -.5;
options linesize=79; title 'Assignment 8 SAS check'; %include 'furndef.sas'; proc reg ; model consume = age area height; areaht: test area=0 , height=0;
options linesize=79; title 'Multiple regression with categorical IV''s'; title2 ' Region controlling for hospital size'; %include 'senicdef.sas'; /* senicdef.sas reads data, etc. */ r1=(region=1); /* or you could say: if (region=1) then r1=1; else r1=0; */ r2=(region=2); r3=(region=3); proc reg; model infrisk = r1 r2 r3 nbeds census; USregion: test r1=r2=r3=0; proc glm; class region; model infrisk= nbeds census region;
/* ch6a.sas */ options linesize=79 pagesize=500; title 'Dwaine Studios Example from Chapter 6 (Section 6.9)'; data portrait; infile 'dwaine.dat'; input kids income sales; proc reg simple; /* "simple" prints simple descriptive statistics */ model sales = kids income / I; /* "I" prints XPrimeX-Inverse */ output out=resdata predicted=presale residual=resale; /* Creates new SAS data set with Y-hat and e as additional variables*/ proc print; /* To see the new data set */ proc iml; tcrit95 = tinv(.975,18); print tcrit95;
/* ch6b.sas */ options linesize=79 pagesize=500; title 'Dwaine Studios Example from Chapter 6 (Section 6.9)'; title2 'Use proc glm''s ESTIMATE'; data portrait; infile 'dwaine.dat'; input kids income sales; proc glm; model sales=kids income / inverse; estimate 'Xh p249' intercept 1 kids 65.4 income 17.6;
/******************** senicglm96a.sas *************************/ options linesize=79; title 'Senic data: SAS glm = Dummy Variable Regression'; %include 'senicdef.sas'; /* senicdef.sas reads data, etc. */ /* reg1-reg3 & ms1 are effect coded dummy vars */ ms1 = medschl; if medschl = 2 then ms1 = -1; if region = 1 then reg1 = 1; else if region=4 then reg1 = -1; else reg1 = 0; if region = 2 then reg2 = 1; else if region=4 then reg2 = -1; else reg2 = 0; if region = 3 then reg3 = 1; else if region=4 then reg3 = -1; else reg3 = 0; mr1 = ms1 * reg1; mr2 = ms1 * reg2 ; mr3 = ms1 * reg3; /* First a nice two-factor ANOVA */ proc reg; model infrisk = reg1-reg3 ms1 mr1-mr3; regtest: test reg1=reg2=reg3=0; mstest: test ms1=0; m_by_r: test mr1=mr2=mr3=0; proc glm; class region medschl; model infrisk = region|medschl; /* or, region medschl region*medschl */ means region|medschl; /* Now control for all the other independent variables */ proc reg; model infrisk=stay age nbeds census nurses service culratio xratio reg1-reg3 ms1 mr1-mr3; regtest: test reg1=reg2=reg3=0; mstest: test ms1=0; m_by_r: test mr1=mr2=mr3=0; proc glm; class region medschl; model infrisk=stay age nbeds census nurses service culratio xratio region|medschl; lsmeans region|medschl; /* Least squares means are Y-hat for each combo of categorical IV values, with all continuous vars held constant at their sample mean values -- exactly what you want, and easy. */
/******************** senicglm96b.sas *************************/ options linesize=79; title 'Senic data: SAS glm special tests (contrasts) & residuals'; %include 'senicdef.sas'; /* senicdef.sas reads data, etc. */ proc glm; class region medschl; model infrisk=stay age nbeds census nurses service region medschl culratio xratio; contrast 'Xratio & Culratio' culratio 1, xratio 1; contrast 'Region' region 1 -1 0 0, region 0 1 -1 0, region 0 0 1 -1; output out=resdata predicted=prerisk residual=resrisk; /* Now plot residuals as before. */
/************* castle1.sas ********************* Two-way ANOVA SAS example with dummy var coding: See Section 19.4; data on P. 818 ***************************************************/ options linesize = 79; proc format; /* value labels used in data step below */ value htfmt 1 = 'Bottom' 2 = 'Middle' 3 = 'Top'; value widfmt 1 = 'Regular' 2 = 'Wide'; data bake; infile 'castle.dat'; input height width sales; label height = 'Display Height' width = 'Display Width' sales = 'Sales in cases'; format height htfmt.; format width widfmt.; /* Asssociate variables with print formats */ /* Effect dummy variable coding */ if width = 1 then w1 = 1; else w1 = -1; if height = 1 then h1 = 1; else if height = 3 then h1 = -1; else h1 = 0; if height = 2 then h2 = 1; else if height = 3 then h2 = -1; else h2 = 0; h1w1 = h1*w1 ; h2w1 = h2*w1; /* Interactions */ /* Cell means coding: indicators are named after their parameters. */ if height = 1 and width = 1 then mu11 = 1 ; else mu11 = 0; if height = 1 and width = 2 then mu12 = 1 ; else mu12 = 0; if height = 2 and width = 1 then mu21 = 1 ; else mu21 = 0; if height = 2 and width = 2 then mu22 = 1 ; else mu22 = 0; if height = 3 and width = 1 then mu31 = 1 ; else mu31 = 0; if height = 3 and width = 2 then mu32 = 1 ; else mu32 = 0; /* Make combination variable */ hwcombo = 10*height + width; proc freq; tables height * width / nopercent norow nocol; tables hwcombo * (height width) / nopercent norow nocol; proc glm; /* glm does the dummy vars for us */ class height width; model sales = height width height*width; means height width height*width; means height / alpha=.05 tukey bon scheffe cldiff; /* Why didn't I bother to do it for shelf width? */ proc reg; /* Now do it with dummy var regression using effect coding */ model sales = h1 h2 w1 h1w1 h2w1; height: test h1=h2=0; width: test w1 = 0; h_by_w: test h1w1 = h2w1 = 0; proc reg; /* Now with cell means coding */ model sales = mu11 -- mu32 / noint ; height: test mu11+mu12=mu21+mu22=mu31+mu32; width: test mu11+mu21+mu31=mu12+mu22+mu32; h_by_w: test mu11-mu12 = mu21-mu22 = mu31-mu32; proc glm; /* Combination variables are good for comparing cell means */ class hwcombo; model sales=hwcombo; means hwcombo / tukey; /* Could have used cldiff option */
/********************* west93.sas ************************ ************* Mean length for westar only ************* *************************************************************/ %include 'gh93read.sas'; options pagesize = 500; title2 'Analysis of mean lesion length for Westar only'; if cultivar = 1; /* select westar */ /* proc univariate normal plot; var meanlng; */ proc tabulate; class clone replic; var meanlng; table (clone all)*(mean std n),(replic all) * meanlng; proc glm; class clone replic; model meanlng = replic|clone; title3 'Clone by Replic Interaction: Does clone diff depend on replic?'; proc anova; class clone; model meanlng = clone; means clone / scheffe; title3 'All Three Replications Pooled'; proc sort; by replic; proc anova; class clone; model meanlng = clone; means clone / scheffe; by replic; title3 'Test the Three Replications Separately';
/************* train.sas ************************ Nested Example from NKNW Ch. 28 (Table 28.1) **************************************************/ options linesize = 79; proc format; /* value labels used in data step below */ value cityfmt 1 = 'Atlanta' 2 = 'Chicago' 3 = 'San Francisco'; data school; infile 'training.dat'; input city teacher learning; format city cityfmt.; proc glm; class city teacher; model learning = city teacher(city) ; means city teacher(city);
/************* bread.sas ************************ Subsampling Example from NKNW Ch. 28 (Table 28.9) **************************************************/ options linesize = 79; proc format; /* value labels used in data step below */ value tempfmt 1 = 'Low' 2 = 'Medium' 3 = 'High'; data school; infile 'bread.dat'; input temp batch crusty; label crusty = 'Crustiness of bread'; format temp tempfmt.; proc glm; class temp batch; model crusty = temp batch(temp); random batch(temp) / test; means temp batch(temp); proc glm; /* Pretending both effects random */ class temp batch; model crusty = temp batch(temp); random temp batch(temp) / test;
options linesize=79; title 'Multivariate Regression'; title2 'Test the relationship of house type to energy'; title3 'consumption, holding chim area and chimney type constant'; %include 'furndef.sas'; /* DUMMY VARIABLES FOR SHAPE AND HOUSE */ if shape = 1 then s1=1; else s1=0; if shape = 2 then s2=1; else s2=0; if house = 1 then h1=1; else h1=0; if house = 2 then h2=1; else h2=0; if house = 3 then h3=1; else h3=0; if house = 4 then h4=1; else h4=0; /*** First with PROC REG See SAS Statistics Guide, pp. 665-666 & 683-686 ***/ proc reg; model dampin dampout=area s1--h4; house: mtest h1=0,h2=0,h3=0,h4=0; /* Multivariate */ uhouse: test h1=0,h2=0,h3=0,h4=0; /* Univariate */ /*** Now check with PROC GLM See SAS Statistics Guide, p. 445 ***/ proc glm; class shape house; model dampin dampout = area shape house; manova H=house / short;
/******************** senicmv96a.sas *************************/ options linesize=79; title 'Senic data: SAS glm & reg multivariate intro'; %include 'senicdef.sas'; /* senicdef.sas reads data, etc. Includes reg1-reg3, ms1 & mr1-mr3 */ /* First a nice two-factor MANOVA on infrisk & stay */ proc glm; class region medschl; model infrisk stay = region|medschl; manova h = _all_; /* Now do it with proc reg. Syntax is the same, except list more than one dependent variable, and say "mtest" instead of "test." */ proc reg; model infrisk stay = reg1-reg3 ms1 mr1-mr3; regtest: mtest reg1=reg2=reg3=0; mstest: mtest ms1=0; m_by_r: mtest mr1=mr2=mr3=0;
%include 'tuberead.sas'; options pagesize=500; Title2 'Test for Differences Between MCG Strains'; Title3 'And Also for Departure From Linear Growth'; proc glm data=mould; class mcg; model length1-length11 = mcg / nouni; repeated time polynomial / summary;
%include 'tuberead.sas'; options pagesize=500; Title2 'Test Linearity of growth'; Title3 'Ho: Growth is linear starting with day 1'; proc glm data=mould; class mcg; model length1-length11 = mcg / nouni; contrast 'Depart frm Linearity' /* specifying L for Ho: LBM = 0 Actually, L is just picking out the intercept term, but this name makes the output look nicer */ intercept 1; manova m = length1 - 2*length2 + length3, /* iff lng1-lng2=lng2-lng3 */ length2 - 2*length3 + length4, /* iff lng2-lng3=lng3-lng4 */ length3 - 2*length4 + length5, /* iff lng3-lng4=lng4-lng5 */ length4 - 2*length5 + length6, /* iff lng4-lng5=lng5-lng6 */ length5 - 2*length6 + length7, /* iff lng5-lng6=lng6-lng7 */ length6 - 2*length7 + length8, /* iff lng6-lng7=lng7-lng8 */ length7 - 2*length8 + length9, /* iff lng7-lng8=lng8-lng9 */ length8 - 2*length9 + length10, /* iff lng8-lng9=lng9-lng10 */ length9 - 2*length10 + length11 /* iff lng9-lng10=lng10-lng11 */ mnames = Linearity /short; /****** Test Linearity starting with day 2 *******/ proc glm data=mould; class mcg; model length1-length11 = mcg / nouni; contrast 'Depart frm Linearity' /* specifying L for Ho: LBM = 0 Actually, L is just picking out the intercept term, but this name makes the output look nicer */ intercept 1; manova m = length2 - 2*length3 + length4, /* iff lng2-lng3=lng3-lng4 */ length3 - 2*length4 + length5, /* iff lng3-lng4=lng4-lng5 */ length4 - 2*length5 + length6, /* iff lng4-lng5=lng5-lng6 */ length5 - 2*length6 + length7, /* iff lng5-lng6=lng6-lng7 */ length6 - 2*length7 + length8, /* iff lng6-lng7=lng7-lng8 */ length7 - 2*length8 + length9, /* iff lng7-lng8=lng8-lng9 */ length8 - 2*length9 + length10, /* iff lng8-lng9=lng9-lng10 */ length9 - 2*length10 + length11 /* iff lng9-lng10=lng10-lng11 */ mnames = Linearity /short;
/********************* tuberep.sas **********************/ /* Not necessarily the best analysis -- just an example */ /*******************************************************/ %include 'tuberead2.sas'; title2 'Doubly multivariate repeated measures'; proc means n mean stddev; title3 'take a look at the means'; class mcg; var length4-length7 pmscl4-pmscl7; proc glm; title3 'Main Effect for MCG on length AND number of sclerotia'; class mcg; model length4-length7 pmscl4-pmscl7 = mcg; manova H = mcg M = length4 + length5 + length6 + length7, pmscl4 + pmscl5 + pmscl6 + pmscl7 mnames = avelen avescl / summary; /* The summary option requests univariate results on the transformed variables. */ proc glm; title3 'Main Effect of Time and Time*mcg interaction for both DVs'; class mcg; model length4-length7 pmscl4-pmscl7 = mcg; manova H = intercept mcg M = length5-length4, length6-length5, length7-length6, pmscl5-pmscl4, pmscl6-pmscl5, pmscl7-pmscl6 mnames = lgrow45 lgrow56 lgrow67 sgrow45 sgrow56 sgrow67 / summary; proc glm; title3 'Main Effect of Time and Time*mcg, interaction, just for length'; class mcg; model length4-length7 = mcg; manova H = intercept mcg M = length5-length4, length6-length5, length7-length6 mnames = lgrow45 lgrow56 lgrow67 / summary; proc glm; title3 'Main Effect of Time and Time*mcg interaction, just sclerotia'; class mcg; model pmscl4-pmscl7 = mcg; manova H = intercept mcg M = pmscl5-pmscl4, pmscl6-pmscl5, pmscl7-pmscl6 mnames = sgrow45 sgrow56 sgrow67 / summary; /* When you compose your own transformations for a within cases analysis, 1. Main effects of between cases factors correspond to main effects on sums or averages of the dependent variables. 2. Main effects of within-cases factors are represented by the intercept, for a collection of difference variables. 3. Between by within interactions are represented by main effects of the between factors on collections of difference variables. In this last run of proc glm, we just verify that the "repeated" syntax gives us the same test statistics as the job above, just for the length variables. */ proc glm; title3 'Check using REPEATED syntax, just for length'; class mcg; model length4-length7 = mcg; repeated time profile / short summary;
/**************** noise96a.sas ***********************/ options linesize=79 pagesize=250; title 'Repeated measures on Noise data: Multivariate approach'; proc format; value sexfmt 0 = 'Male' 1 = 'Female' ; data loud; infile 'noise.dat'; /* Multivariate data read */ input ident interest sex age noise1 time1 discrim1 ident2 inter2 sex2 age2 noise2 time2 discrim2 ident3 inter3 sex3 age3 noise3 time3 discrim3 ident4 inter4 sex4 age4 noise4 time4 discrim4 ident5 inter5 sex5 age5 noise5 time5 discrim5 ; format sex sex2-sex5 sexfmt.; /* noise1 = 1, ... noise5 = 5. time1 = time noise 1 presented etc. ident, interest, sex & age are identical on each line */ label interest = 'Interest in topic (politics)'; proc glm; class age sex; model discrim1-discrim5 = age|sex; repeated noise profile/ short summary;
/**************** noise96b.sas ***********************/ options linesize=79 pagesize=250; title 'Repeated measures on Noise data: Univariate approach'; proc format; value sexfmt 0 = 'Male' 1 = 'Female' ; data loud; infile 'noise.dat'; /* Univariate data read */ input ident interest sex age noise time discrim ; format sex sexfmt.; label interest = 'Interest in topic (politics)' time = 'Order of presenting noise level'; proc glm; class age sex noise ident; model discrim = ident(age*sex) age|sex|noise; random ident(age*sex) / test;
/**************** noise96c.sas ***********************/ options linesize=79 pagesize=250; title 'Repeated measures on Noise data: Cov Struct Approach'; proc format; value sexfmt 0 = 'Male' 1 = 'Female' ; data loud; infile 'noise.dat'; /* Univariate data read */ input ident interest sex age noise time discrim ; format sex sexfmt.; label interest = 'Interest in topic (politics)' time = 'Order of presenting noise level'; proc mixed method = ml; class age sex noise; model discrim = age|sex|noise; repeated / type = un subject = ident r; lsmeans age noise; proc mixed method = ml; class age sex noise; model discrim = age|sex|noise; repeated / type = cs subject = ident r;
/*********************** cartoonread.sas ******************/ options linesize = 79; title 'Cartoon Data'; proc format; /* value labels used in data step below */ value cfmt 0 = 'Black & White' 1 = 'Colour'; value efmt 0 = 'Pre-professional' 1 = 'Professional' 2 = 'Student'; value lfmt 1 = 'Hospital A ' 2 = 'Hospital B' 3 = 'Hospital C' 4 = 'Penn State'; data disney; infile 'cartoon.dat'; input id colour educ location otis cartoon1 real1 cartoon2 real2; /* This shows how a file with repeated measures on the same data line can be split into a file with one record per observation */ do time = 1 to 2; do type = 'Cartoon','Real'; if (time=1 and type='Cartoon') then memory=cartoon1; else if (time=1 and type='Real') then memory=real1; else if (time=2 and type='Cartoon') then memory=cartoon2; else memory=real2; output; /* Write a case */ end; end; label colour = 'Colour versus Black & white' educ = 'Education' location = 'Where did respondent come from?' otis = 'Otis Mental Ability Test' cartoon1 = 'Cartoon test score at time 1' real1 = 'Realistic test score at time 1' cartoon2 = 'Cartoon test score at time 2' real2 = 'Realistic test score at time 2' memory = 'Recall of training materials' type = 'Training materials: Cartoon versus realistic '; format colour cfmt.; format educ efmt.; format location lfmt.; drop cartoon1--real2; /* proc print; var id time type memory; */
/*********************** cartoon2.sas ******************/ /* Slight variations: Should be better than one */ /*******************************************************/ %include 'cartoonread.sas'; title2 'Proc Mixed repeated measures on Cartoon Data'; proc mixed; class colour educ time type; model memory = otis colour|educ|time|type / ddfm = satterth; repeated / type=un subject=id r=1,2; lsmeans time type; /* Look at what's significant */ proc plot; plot memory*otis;
title 'Plant death as a function of Plant & MCG (3D log-linear)'; /* Uses cell counts as input */ filename dead 'death.dat'; data compost; infile dead; input died $ plant $ mcg count percent; if plant ne 'GP159'; if count=0 then count = 1e-20; proc catmod; weight count; model died*plant*mcg=_response_ / ml nodesign noiter noparm noresponse; loglin died plant mcg plant*mcg died*plant died*mcg;
options linesize=79 pagesize=40; title 'Coronary Heart Disease Example from Chapter 1'; filename rawdata 'chd.dat'; data heart; infile rawdata; input id agegrp age chd; label chd = 'Coronary Heart Disease (1=yes, 0=no)'; proc catmod; direct age; model chd=age / ml nodesign noresponse noprofile; proc plot; plot chd*age; proc freq; tables agegrp*chd/ nopercent nocol; proc logistic; model chd = age;
options linesize=72; /****************************************************************** Example Plot of Normal Distribution *******************************************************************/ title1 f=centx 'The Normal Distribution'; title2 f=complex 'with ' f=greek 'm' f=complex ' = 0 and varying ' f=greek 's'; /* GOPTIONS DEVICE=tek4014 HPOS=0 VPOS=0; */ filename grafout 'norm.ps'; goptions device = AppleLW /* Another Postscript device is PS */ gsfmode = Append NoPrompt NoDisplay gsfName = grafout Handshake = XonXoff NoBorder rotate ; /* To get landscape orientation - default is portrait*/ DATA SIDE; /* THIS IS TO LABEL THE LINES. IT IS AN ANNOTATE DATA SET */ LENGTH FUNCTION $ 8. TEXT $ 20.; FUNCTION='MOVE'; X=48; Y=25; OUTPUT; FUNCTION='DRAW'; X=55; Y=25; LINE=1; OUTPUT; FUNCTION='LABEL'; X=57; Y=25.25; POSITION='6'; style = 'greek' ; TEXT='s = 1/2';OUTPUT; FUNCTION='MOVE'; X=48; Y=23; OUTPUT; FUNCTION='DRAW'; X=55; Y=23; LINE=2; OUTPUT; FUNCTION='LABEL'; X=57; Y=23.25; POSITION='6'; TEXT='s = 1';OUTPUT; FUNCTION='MOVE'; X=48; Y=21; OUTPUT; FUNCTION='DRAW'; X=55; Y=21; LINE=8; OUTPUT; FUNCTION='LABEL'; X=57; Y=21.25; POSITION='6'; TEXT='s = 2';OUTPUT; FUNCTION='MOVE'; X=48; Y=19; OUTPUT; FUNCTION='DRAW'; X=55; Y=19; LINE=4; OUTPUT; FUNCTION='LABEL'; X=57; Y=19.25; POSITION='6'; TEXT='s = 5';OUTPUT; data normal; do x=-10 to 10 by .1; pi = 3.14159; sigma1 = .5; sigma2 = 1; sigma3 = 2; sigma4 = 5; density = 1/(sigma1*sqrt(2*pi)) * exp(-.5*(x/sigma1)**2); density2 = 1/(sigma2*sqrt(2*pi)) * exp(-.5*(x/sigma2)**2); density3 = 1/(sigma3*sqrt(2*pi)) * exp(-.5*(x/sigma3)**2); density4 = 1/(sigma4*sqrt(2*pi)) * exp(-.5*(x/sigma4)**2); output; end; LABEL DENSITY=' '; axis1 label=('Density'); symbol1 v=none i=join l=1; symbol2 v=none i=join l=2; symbol3 v=none i=join l=8; symbol4 v=none i=join l=4; proc gplot; plot density*x = 1 density2*x = 2 density3*x = 3 density4*x = 4 /overlay vaxis=axis1 annotate=side;
options linesize=72; GOPTIONS DEVICE=tek4014 HPOS=0 VPOS=0; data bivar; do x=-3 to 3 by .1; do y=-3 to 3 by .1; density = exp(-.5*x**2 - y**2)/(2*3.14159*sqrt(2)); output; end; end; LABEL DENSITY='Density'; proc g3d ; plot x*y=density / xticknum=5 yticknum=5;
Here is some material that may be included and documented later.
/************ twinpca96a.sas ********************/ OPTIONS LINESIZE=79; TITLE2 'Principal Components Analysis'; %include 'twinread.sas'; proc factor simple corr score rotate=varimax; var progmat reason verbal /* mental */ headlng headbrd headcir bizyg height weight; /* physical */ /* Now save first 2 components */ proc factor rotate=varimax nfactors = 2 out = facout; var progmat reason verbal headlng headbrd headcir bizyg height weight; proc univariate normal plot; var factor1 factor2; /************ twincancor96a.sas ********************/ options linesize=79 pagesize = 200;; title2 'Canonical Correlation Analysis of twin data'; %include 'twinread.sas'; proc cancorr vname = 'Mental Variables' wname = 'Physical Variables' out = keepem; /* New data set includes canonical variates */ var progmat reason verbal; with headlng headbrd headcir bizyg height weight; proc univariate; var v1-v3 w1-w3; /************ twincancor96b.sas ********************/ options linesize=79 pagesize = 200;; title2 'Canonical Correlation Analysis of twin data'; %include 'twinread.sas'; proc cancorr vname = 'Mental Variables' wname = 'Physical Variables'; var progmat reason verbal; with headlng headbrd headcir bizyg height weight; partial sex ident; /************************** sampvar1.sas **************************/ /* Finds n needed for significance, for a given effect size */ /*******************************************************************/ options linesize = 79 pagesize = 200; data explvar; /* Can replace alpha, s, p, and a below. */ alpha = 0.05; /* Significance level. */ s = 6; /* Numerator df = # IVs being tested. */ p = 26; /* There are p beta parameters. */ a = .10 ; /* Proportion of remaining variation after */ /* controlling for all other variables. */ /* Initializing ... */ pval = 1; n = p+1; do until (pval <= alpha); F = (n-p)/s * a/(1-a); df2 = n-p; pval = 1-probf(F,s,df2); n = n+1 ; end; /* When finished, n is one too many */ n = n-1; F = (n-p)/s * a/(1-a); df2 = n-p; pval = 1-probf(F,s,df2); put ' *********************************************************'; put ' '; put ' For a multiple regression model with ' p 'betas, '; put ' testing ' s ' variables controlling for the others,'; put ' a sample size of ' n 'is needed for significance at the'; put ' alpha = ' alpha 'level, when the effect explains a = ' a ; put ' of the remaining variation after allowing for all other '; put ' variables in the model. '; put ' F = ' F ',df = (' s ',' df2 '), p = ' pval; put ' '; put ' *********************************************************'; /************************** sampvar2.sas ********************/ /* Finds "a" needed for significance, given sample size n */ /*************************************************************/ options linesize = 79 pagesize = 200; data explvar; /* Replace alpha, s, p, and a below. */ alpha = 0.05; /* Significance level. */ s = 6; /* Numerator df = # IVs being tested. */ p = 26; /* There are p beta parameters. */ n = 120 ; /* Sample size */ /* Initializing ... */ pval = 1; a = 0; df2 = n-p; do until (pval <= alpha); F = (n-p)/s * a/(1-a); pval = 1-probf(F,s,df2); a = a + .001 ; end; /* When finished, a is .001 too much */ a = a-.001; F = (n-p)/s * a/(1-a); pval = 1-probf(F,s,df2); put ' ******************************************************'; put ' '; put ' For a multiple regression model with ' p 'betas, '; put ' testing ' s ' variables at significance level '; put ' alpha = ' alpha ' controlling for the other variables,'; put ' and a sample size of ' n', the variables need to explain'; put ' a = ' a ' of the remaining variation to be significant.'; put ' F = ' F ', df = (' s ',' df2 '), p = ' pval; put ' '; put ' *******************************************************'; /***************** fpower.sas *********************/ options linesize = 79 pagesize = 200; data fpower; /* Replace alpha, s, p, and wantpow below */ alpha = 0.05; /* Significance level */ s = 6; /* Numerator df = # IVs being tested */ p = 26; /* There are p beta parameters */ a = .10 ; /* Effect size */ wantpow = .80; /* Find n to yield this power. */ power = 0; n = p+1; oneminus = 1-alpha; /* Initializing ... */ do until (power >= wantpow); ncp = (n-p)*a/(1-a); df2 = n-p; power = 1-probf(finv(oneminus,s,df2),s,df2,ncp); n = n+1 ; end; n = n-1; put ' *********************************************************'; put ' '; put ' For a multiple regression model with ' p 'betas, '; put ' testing ' s 'independent variables using alpha = ' alpha ','; put ' a sample size of ' n 'is needed'; put ' in order to have probability ' wantpow 'of rejecting H0'; put ' for an effect of size a = ' a ; put ' '; put ' *********************************************************'; /***************** pow.sas *********************/ options linesize = 79 pagesize = 200; data power; /* Replace alpha, s, & p below */ alpha = 0.05; /* Significance level */ s = 6; /* Numerator df = # IVs being tested */ p = 26; /* There are p beta parameters */ n = 144 ; /* Sample size */ F = 2.185; /* Observed value of F statistic */ ncp = s * F; df2 = n-p; oneminus = 1 - alpha; power = 1-probf(finv(oneminus,s,df2),s,df2,ncp); put ' *********************************************************'; put ' '; put ' For a multiple regression model with ' p 'betas, '; put ' testing ' s 'independent variables using alpha = ' alpha ','; put ' and a sample size of ' n ', observed power = ' power; put ' for F = ' F; put ' '; put ' *********************************************************';
/*************************** iml1.sas ************************************* Illustrate some matrix calculations from Chapter 2 of Johnson and Wichern * **************************************************************************/ options linesize=79 pagesize=500; proc iml; print "Example 2.1"; x = {1, 3, 2}; /* Rows are separated by commas */ y = {-2, 1, -1}; print X y; z = 3#x; z = x+y; print x y z; cosine = x`*y / sqrt(x`*x # y`*y); print cosine; print "Example 2.2"; /* First check for linear independence as the book does, using the definition (A c = 0) */ A = {1 1 1, 2 0 -2, 1 -1 1}; b = {0, 0, 0}; c = solve(A,b); /* Error message if linearly dependent */ print c; /* The problem with solve is that the matrix A must be square. This means it's useless, for example, if you want to check that the columns of a data matrix are linearly independent. Another way is to reduce A` to row echelon form, and if there is a row of zeros then the rows were linearly dependent */ A = {1 1 1, 2 0 -2, 1 -1 1, 7 9 2}; reducd = echelon(A`); print reducd; print "Example 2.3"; reset print; /* Print each matrix as it is created */ A = {3 -1 2, 1 5 4}; c = a`; b = {1 -2 -3 , 2 5 1}; C = 4#A; C = A+B; B = {-2, 7,9}; C = {2 0, 1 -1}; D=A*B; D = C*A; print "Example 2.8"; A = {3 2, 4 1}; B = inv(A); print "Example 2.9"; A = {1 -5, -5 1}; print "Eigenvalues will be in lambda, eigenvectors in E"; call eigen(lambda,E,A); print "Calculating a determinant"; det_A = det(A);
/*************************** iml2.sas ************************************* Read in data, do a multiple regression with proc iml. Compare proc reg. * **************************************************************************/ options linesize=79 pagesize=500; data auto; infile 'cars.dat'; input country mpg weight length; label country = 'Country of Origin' mpg = 'Miles per Gallon'; if country = 2 then jcar = 1; /* Indicator for Japanese */ else jcar=0; misss = mpg + weight + length + jcar; if misss = . then delete; /* Throw out the case if misss is missing */ proc iml; use auto; read all var {weight length jcar} into X; read all var {mpg} into y; n = nrow(y); one = j(n,1,1); /* Creates matrix (nrows,ncols,contents) */ X = one || X; /* Column of ones for the intercept */ b = inv(X`*X)*X`*y ; print b;
/*************************** sweat1.sas *********************************** * Johnson and Wichern's Ex. 5.2, Page 229 * **************************************************************************/ options linesize=79 pagesize=500; data sweat; infile 'sweat.dat'; input rate sodium potassm; proc iml; use sweat; read all var {rate sodium potassm} into X; n = nrow(X); p = ncol(X); one = j(n,1,1); /* Creates matrix (nrows,ncols,contents) */ ident = I(n); /* n by n identity matrix */ mu0 = {4,50,10}; xbar = 1/n # X`*one; S = 1/(n-1) # X`*(ident - (1/n)#one*one`)*X; print xbar; print S; T2 = n # (xbar-mu0)`*inv(S)*(xbar-mu0); print "T-squared = " T2; F = (n-p)/(n#p-p) # T2 ; pval = 1-probf(F,p,n-p); /* probchi is good too */ print "F = " F; print "p = " pval;
/* mvr1.sas: Example 7.8, Pages 413-414 */ options linesize=79 pagesize=500; proc iml; print "Example 7.8, Pages 413-414"; Y = {1 -1, 4 -1, 3 2, 8 3, 9 2}; Z = {1 0, 1 1, 1 2, 1 3, 1 4}; n = nrow(Z); Bhat = inv(Z`*Z)*Z`*Y ; print Bhat; E = Y`*Y - Bhat`*Z`*Z*Bhat; print E; sighat = (1/n) # E ; Z1 = {1, 1, 1, 1, 1}; Bhat1 = inv(Z1`*Z1)*Z1`*Y ; sighat1 = (1/n) # (Y`*Y - Bhat1`*Z1`*Z1*Bhat1); lambda = det(sighat)/det(sighat1); print "Wilks' Lambda = " lambda; data ex78; infile 'ex7.8.dat'; input x y1 y2; proc reg; model y1 y2 = x; mtest x;
/* mvr2.sas: Example 6.8, Pages 323-326 */ title "Multivariate Regression: One-way Manova"; title2 "Example 6.8 of Johnson and Wichern"; options linesize=79 pagesize=500; data ex68; infile 'ex6.8.dat'; input sample y1 y2; if sample=1 then s1=1; else s1=0; if sample=2 then s2=1; else s2=0; proc print; proc reg; model y1 y2 = s1 s2; samptest: mtest s1=0, s2=0; proc glm; class sample; /* Makes dummy variables for you */ model y1 y2 = sample; manova h = _all_;
/**************** noisereg1.sas ***********************/ options linesize=79 pagesize=250; title 'Repeated measures on Noise data: Multivariate approach with proc reg'; proc format; value sexfmt 0 = 'Male' 1 = 'Female' ; data loud; infile 'noise.dat'; /* Multivariate data read */ input ident interest sex age noise1 time1 discrim1 ident2 inter2 sex2 age2 noise2 time2 discrim2 ident3 inter3 sex3 age3 noise3 time3 discrim3 ident4 inter4 sex4 age4 noise4 time4 discrim4 ident5 inter5 sex5 age5 noise5 time5 discrim5 ; format sex sex2-sex5 sexfmt.; /* noise1 = 1, ... noise5 = 5. time1 = time noise 1 presented etc. ident, interest, sex & age are identical on each line */ label interest = 'Interest in topic (politics)'; interest = interest - 2.4316667; /* Centering at mean won't affect any tests */ avedisc = mean(of discrim1-discrim5); /* Dummy variables with effect coding */ s = 2*sex -1 ; if age=1 then a1 = 1; else if age=3 then a1 = -1; else a1 = 0; if age=2 then a2 = 1; else if age=3 then a2 = -1; else a2 = 0; /* Interaction terms*/ sa1 = s*a1; sa2 = s*a2; proc means; class sex age; var avedisc; proc means; class sex age; var discrim1-discrim5; proc reg; model discrim1-discrim5 = interest s a1 a2 sa1 sa2; sextest: mtest s=0, discrim1+discrim2+discrim3+discrim4+discrim5; agetest: mtest a1=0,a2=0, discrim1+discrim2+discrim3+discrim4+discrim5; agebysex: mtest sa1=sa2=0, discrim1+discrim2+discrim3+discrim4+discrim5; noisetst: mtest intercept=0, discrim1-discrim2, discrim2-discrim3, discrim3-discrim4, discrim4-discrim5; sexnoise: mtest s=0, discrim1-discrim2, discrim2-discrim3, discrim3-discrim4, discrim4-discrim5; agenoise: mtest a1=a2=0, discrim1-discrim2, discrim2-discrim3, discrim3-discrim4, discrim4-discrim5; sanoise: mtest sa1=sa2=0, discrim1-discrim2, discrim2-discrim3, discrim3-discrim4, discrim4-discrim5; /* proc glm will do the grunt work for you. It makes your dummy variables and composes the L and M matrices. */ proc glm; class age sex; model discrim1-discrim5 = interest age|sex; repeated noise profile/ short summary;
/* salmon.sas */ title 'Univariate and Multivariate Scheffe on Salmon data'; title2 'STA2201s04 Assignment 8 Check'; options linesize=79 noovp formdlim='_'; proc format; value sexfmt 1 = 'Female' 2 = 'Male' ; value cfmt 1 = 'Alaskan' 2 = 'Canadian'; data fish; infile 'salmon.dat'; input country sex fresh marine; growth = fresh+marine; combo = 10*sex+country; if combo = 11 then FA=1 ; else FA=0; if combo = 12 then FC=1 ; else FC=0; if combo = 21 then MA=1 ; else MA=0; if combo = 22 then MC=1 ; else MC=0; label fresh = 'Freshwater growth' marine = 'Marine growth' growth = 'Total growth (fresh+marine)'; format country cfmt.; format sex sexfmt.; proc freq; tables country*sex / norow nocol nopercent; /* Univariate */ proc iml; title3 'Table of critical values for all possible Scheffe tests'; numdf = 3; /* Numerator degrees of freedom for initial test */ dendf =96; /* Denominator degrees of freedom for initial test */ alpha = 0.05; critval = finv(1-alpha,numdf,dendf); zero = {0 0}; S_table = repeat(zero,numdf,1); /* Make empty matrix */ /* Label the columns */ namz = {"Number of Contrasts in followup test" " Scheffe Critical Value"}; mattrib S_table colname=namz; do i = 1 to numdf; s_table(|i,1|) = i; s_table(|i,2|) = numdf/i * critval; end; reset noname; /* Makes output look nicer in this case */ print "Initial test has" numdf " and " dendf "degrees of freedom." "Using significance level alpha = " alpha; print s_table; proc glm; class country sex; model growth=country|sex; means country|sex; proc glm; class combo; model growth=combo; means combo / scheffe; /* Multivariate */ /* Critical value of Wilks' Lambda for testing H0: L Beta M = 0 Use in all Scheffe followups */ proc iml; /************ Replace p, q, v and maybe alpha below. *************/ p = 2; /* Rank of (E+H): LE number columns of M */ q = 3; /* Rank of L(X-p-X-inv)L': Usually number of rows in L */ v = 96; /* Error degrees of freedom: get from summary table */ alpha = 0.05; /**/ /*****************************************************************/ print p q v alpha; r = v - (p-q+1)/2; u = (p*q-2)/4; p2 = p**2 ; q2 = q**2; if p2+q2>5 then t = sqrt((p2*q2-4)/(p2+q2-5)) ; else t=1; numdf = p*q; denomdf = r*t-2*u; oneminus = 1-alpha; fcrit = finv(oneminus,numdf,denomdf); print "Critical value of F, numerator and denominator DF"; print fcrit numdf denomdf; Lcrit = (r*t-2*u)/(p*q*fcrit+r*t-2*u); print "Critical value of Wilks' Lambda" Lcrit; /* Just for checking input */ m = (abs(p-q)-1)/2; s = min(p,q); n = (v-p-1)/2; print "Just for checking the correctness of (p,q,v) input"; print s m n; proc reg; model fresh marine = FA FC MA MC / noint; anydiff: mtest FA=FC=MA=MC; /* Overall Test Significant */ country: mtest FA+MA=FC+MC; /* Sig */ cfresh: mtest FA+MA=FC+MC, fresh; cmarine: mtest FA+MA=FC+MC, marine; cgrowth: mtest FA+MA=FC+MC, fresh+marine; gender: mtest FA+FC=MA+MC; inter: mtest FA-FC=MA-MC; /* Pairwise MV*/ FAvsFC: mtest FA=FC; /* Sig */ FAvsMA: mtest FA=MA; FAvsMC: mtest FA=MC; /* Sig */ FCvsMA: mtest FC=MA; /* Sig */ FCvsMC: mtest FC=MC; MAvsMC: mtest MA=MC; /* Sig */ /* proc glm; class country sex; model fresh marine = country|sex; manova h = _all_; proc reg; model fresh marine = FA FC MA MC / noint; anydiff: test FA=FC=MA=MC; country: test FA+MA=FC+MC;
/* bweight5.sas: Explore ODS */ %include 'bweightread.sas'; title2 'Likelihood ratio test with less output'; /* Starting with SAS Version 9, every table produced by SAS has a name, and the Output Delivery System (ODS) allows one to specify what is printed. The most convenient way to find out the names in a unix/linux environment is with ods trace. */ data chopped; set bigbaby; if _n_ < 30 then r2=.; ods trace on / listing; /* listing option writes trace on list file, rather than log file (default) */ proc logistic; title3 'Forced stepwise, chopped data'; model low (event=last) = age lwt smoke ptl ht ui ftv r2 r3 / selection=forward start=7 slentry=1.00 covb; run; /* Need run statement with ods trace */ ods trace off; ods select Logistic.NObs Logistic.Step0.GlobalTests Logistic.Step2.GlobalTests; proc logistic; title3 'Reduced Output: Still must use a calculator'; model low (event=last) = age lwt smoke ptl ht ui ftv r2 r3 / selection=forward start=7 slentry=1.00; /* Comments: 1. It is safer to use Logistic.Step0.GlobalTests etc. rather than Logistic.Step0.FitStatistics etc., because the global tests show the df. This lets you verify that you're doing the right test. 2. The names are very systematic, so by counting the number of steps required to go from the full to the reduced model, you can use ods select with confidence. There is no need to do ods trace every time. 3. It is a great relief to be able to look at only PART of the SAS output. 4. This example barely scratches the surface of what you can do with the ods system. For example, you can get SAS to write the list file in html format suitable for posting as a Web page, or it can write selected tables to SAS data sets for further processing within the current SAS job, particularly with proc iml.
This handout is licensed under a Creative Commons Attribution-ShareAlike 3.0 Unported License.