/* Enclosed is an SPlus like program, a SAS-Stat program and an IML program that Estimate White Blood Cell changes for Stage1 and Stage2 male and female HIV patients(made up data). I decided to use the regression approach for the analysis of variance, because it is easier to code in the three languages. All programs produce the same results. I used the GLM module in Splus(R) due to my lack of knowledge of Splus(R) matrix algebra syntax. No claims are made as to which package is superior. That is up to the user. Note that SPlus uses 'objects' which can be used and reused in many ways. However SAS-Stat with ODS creates many tables, every option, listing or 'out' dataset is a table in ODS. SPlus also allows user to build 'routines' much the way IML allows users to create 'modules'. I could turn the IML regression code into a module in minutes. The Splus(R) output is enclosed at the end of this post. The SAS output can be produced by running enclosed code. Obviously, there is overlap in functionality. It would be interesting to see which package can produce the best publication quality graphics, i.e. postscript output. CGM is generally not considered 'publication' quality, and has caused problems for other companies. * Splus(R) program; wbcchg <- c(-0.3,0.3,-0.2,-0.1,0.0,-0.4,-0.5,-0.2,0.9,-0.5,0.6,0.6,-0.1,1.0,-0.9,1.0,0.2,-1.0,-0.0,0.3,0.2,-0.7, 0.9,-0.4,1.5,-1.3,0.8,1.3,0.0,0.5,1.9,0.0,-0.4,0.9,-0.2,1.1,0.3,1.0,-1.1,1.0,-0.6,-1.6,-0.3,-0.2,2.4,0.2,-0.6,-3.0) stage1 <- c(1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0) stage2 <- c(0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0) male <- c(1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0) print(d.AD <- data.frame(stage1, stage2, male, wbcchg)) glm.D93 <- glm(wbcchg ~ stage1 + stage2 + male) glm(glm.D93) summary(glm.D93) ================================================================================================== * Sas Stat program; Data Hiv_WbcChg ( Label = "White Blood Cell changes for Stage1 and Stage2 Aids for Males and Females" ); Input stage1 stage2 male hmochg @@; Datalines; 1 0 1 -0.3 0 1 0 0.3 0 0 1 -0.2 1 0 0 -0.1 0 1 1 0.0 0 0 0 -0.4 1 0 1 -0.5 0 1 0 -0.2 0 0 1 0.9 1 0 0 -0.5 0 1 1 0.6 0 0 0 0.6 1 0 1 -0.1 0 1 0 1.0 0 0 1 -0.9 1 0 0 1.0 0 1 1 0.2 0 0 0 -1.0 1 0 1 0.0 0 1 0 0.3 0 0 1 0.2 1 0 0 -0.7 0 1 1 0.9 0 0 0 -0.4 1 0 1 1.5 0 1 0 -1.3 0 0 1 0.8 1 0 0 1.3 0 1 1 0.0 0 0 0 0.5 1 0 1 1.9 0 1 0 0.0 0 0 1 -0.4 1 0 0 0.9 0 1 1 -0.2 0 0 0 1.1 1 0 1 0.3 0 1 0 1.0 0 0 1 -1.1 1 0 0 1.0 0 1 1 -0.6 0 0 0 -1.6 1 0 1 -0.3 0 1 0 -0.2 0 0 1 2.4 1 0 0 0.2 0 1 1 -0.6 0 0 0 -3.0 ; run; Proc Reg Data=Hiv_wbcChg; Model HmoChg = Male Stage1 Stage2 / p ; output out=resid residual=r ; Run; proc means data=resid min max median p25 p75; var residual; run; ================================================================================================== * IML Program; /* first column of X matrix is for intercept */ Proc IML; X={1 1 0 1,1 0 1 0,1 0 0 1,1 1 0 0,1 0 1 1,1 0 0 0,1 1 0 1,1 0 1 0,1 0 0 1,1 1 0 0, 1 0 1 1,1 0 0 0,1 1 0 1,1 0 1 0,1 0 0 1,1 1 0 0,1 0 1 1,1 0 0 0,1 1 0 1,1 0 1 0, 1 0 0 1,1 1 0 0,1 0 1 1,1 0 0 0,1 1 0 1,1 0 1 0,1 0 0 1,1 1 0 0,1 0 1 1,1 0 0 0,1 1 0 1,1 0 1 0, 1 0 0 1,1 1 0 0,1 0 1 1,1 0 0 0,1 1 0 1,1 0 1 0,1 0 0 1, 1 1 0 0,1 0 1 1,1 0 0 0,1 1 0 1,1 0 1 0,1 0 0 1,1 1 0 0,1 0 1 1,1 0 0 0 }; Y={ -0.3,0.3,-0.2,-0.1,0.0,-0.4,-0.5,-0.2,0.9, -0.5,0.6,0.6,-0.1,1.0,-0.9,1.0,0.2,-1.0, 0.0,0.3,0.2,-0.7,0.9,-0.4,1.5,-1.3,0.8, 1.3,0.0,0.5,1.9,0.0,-0.4, 0.9,-0.2,1.1,0.3,1.0,-1.1,1.0,-0.6, -1.6,-0.3,-0.2,2.4,0.2,-0.6,-3.0}; nobs = nrow(x); /* sample size */ dfe = nrow(x) - ncol(x); /* error df */ xpxi = inv(t(x)*x); /* inverse of X X */ beta = xpxi*(t(x)*y); /* parameter estimate */ yhat = x*beta; /* predicted values */ res = y-yhat; /* regression residuals */ sse = ssq(res); /* SSE */ cssy = ssq(y-sum(y)/nobs); rsquare=(cssy-sse)/cssy; mse = sse/dfe; /* MSE */ stde = sqrt(vecdiag(xpxi)*mse); /* std err on estimates */ tval = beta/stde; /* parameter t-tests */ pval = 1-probf(tval#tval,1,dfe); /* p-values */ minres= min(res); maxres= max(res); medres=median(res); b=res; res[rank(res),]=b; /* Sort the residuals */ p25res=res[12,1]; /* 48 residuals 12-25th percentile */ p75res=res[36,1]; print minres maxres medres p25res p75res; print nobs dfe sse mse; print , "Parameter Estimates" ,, beta stde tval pval ; quit; run; * SPlus like output; > hmochg <- c(-0.3,0.3,-0.2,-0.1,0.0,-0.4,-0.5,-0.2,0.9,-0.5,0.6,0.6,-0.1,1.0,-0.9,1.0,0.2,-1.0,-0.0,0.3,0.2,-0.7, + 0.9,-0.4,1.5,-1.3,0.8,1.3,0.0,0.5,1.9,0.0,-0.4,0.9,-0.2,1.1,0.3,1.0,-1.1,1.0,-0.6,-1.6,-0.3,-0.2,2.4,0.2,-0.6,-3.0) > stage1 <- c(1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0) > stage2 <- c(0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0) > male <- c(1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0) > print(d.AD <- data.frame(stage1, stage2, male, hmochg)) > glm.D93 <- glm(hmochg ~ stage1 + stage2 + male) > glm(glm.D93) Call: glm(formula = glm.D93) Coefficients: (Intercept) stage1 stage2 male -0.2542 0.5063 0.2313 0.1958 Degrees of Freedom: 47 Total (i.e. Null); 44 Residual Null Deviance: 41.38 Residual Deviance: 38.87 AIC: 136.1 > summary(glm.D93) Call: glm(formula = hmochg ~ stage1 + stage2 + male) Deviance Residuals: Min 1Q Median 3Q Max -2.7458 -0.7464 -0.1458 0.7479 2.4583 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -0.2542 0.2713 -0.937 0.354 stage1 0.5063 0.3323 1.523 0.135 stage2 0.2313 0.3323 0.696 0.490 male 0.1958 0.2713 0.722 0.474 (Dispersion parameter for gaussian family taken to be 0.8833902) Null deviance: 41.385 on 47 degrees of freedom Residual deviance: 38.869 on 44 degrees of freedom AIC: 136.09 Number of Fisher Scoring iterations: 2