######################################## # Day 1 of S-Plus for Statistical Data # Analysis and Graphics ######################################## library(Hmisc,T) mean(1:10) (1:10)+.5 x <- (1:10)+.5 y <- seq(1,39,by=2) x y 1 + sqrt(9) rep(2.3, 10) y y < 17 plot(rnorm(20)) 37+38.40+17.25 log(1:5) 5>1 || 1>3 y < 17 search() ?cleanup.import hospital <- cleanup.import(hospital) sink('/tmp/contents.txt') contents(hospital) sink() con <- contents(hospital) class(con) print.contents.data.frame page(con, multi=T) page(describe(hospital),multi=T) datadensity(hospital,group=hospital$sex, col=c(1,4)) data.restore('/tmp/pbc.sdd') # opposite of data.dump(..., oldStyle=T) pbc <- cleanup.import(pbc) page(pbc) datadensity(pbc) page(contents(pbc),multi=T) label(pbc$chol) attr(pbc$chol, 'label') page(describe(pbc),multi=T) d <- describe(pbc) page(d) d$age d[c('age','chol')] # 2-variable subset of d hist.data.frame(pbc) ecdf(pbc) ecdf(pbc, group=pbc$drug, col=1:3) # Note: the above command bombed but worked later in the day! show.col() nac <- naclus(pbc) plot(na) naplot(na) na.pattern(pbc) library(rpart) ?rpart f <- rpart(is.na(trig) ~ drug + age + sex, data=pbc) plot(f);text(f) table(pbc$drug) f$cptable summary(f) search() objects(5) args(objects) ?objects objects('data') objects.summary(where=7) search() attach(pbc) search()[1:4] find(pbc) find(age) detach('pbc') # or detach(2) search()[1:5] x1 <- 1:4 sex <- rep(c('male','female'),2) sex subset(x1, sex=='male') x1[sex=='male'] d <- data.frame(x1=x1, x2=(1:4)/10, x3=11:14, sex=sex) d subset(d, sex=='male') subset(d, sex=='male' & x2>.2) subset(d, x1>1, select=-x1) subset(d, select=c(x1,sex)) subset(d, x2<.3, select=x2:sex) subset(d, x2<.3, select=-(x3:sex)) # attach(subset(d, sex=='male' & ..., x1:x3)) subset.data.frame contents(pbc) hospital2 <- upData(hospital, antibiotic=factor(antibiotic,1:2,c('yes','no')), bculture=factor(bculture,1:2,c('yes','no')), service=factor(service,1:2,c('med','surg')), sex=factor(sex,1:2,c('male','female'))) describe(hospital2) # Should have defined labels at same run of # upData hospital2 <- upData(hospital2, labels=c(duration='Duration of Hospital Stay')) hospital <- hospital2 rm(hospital2) # once OK ############################ # 2nd Day ############################ library(Hmisc,T) x <- c(1,2,NA,4) impute(x) impute(x,median) impute(x,'random') impute(x,'random') x <- impute(x) is.imputed(x) attributes(x) n <- 50 reps <- 400 meds <- single(reps) set.seed(171) for(i in 1:reps) { x <- exp(rnorm(n)) meds[i] <- median(x) } var(meds) h <- 1:20 median(h) B <- 500 meds <- single(B) set.seed(113) for(i in 1:B) meds[i]<-median(sample(h,20,T)) quantile(meds, c(.05,.95)) # 90% C.L.s b <- bootstrap(h, median, B=B) limits.emp(b) args(binconf) binconf(3,10,method='all') bpower(.2, odds.ratio=2, n=200) bpower.sim(.2,odds.ratio=2,n=200) args(bpower.sim) bsamsize(.2, plogis(qlogis(.2)+log(2)),power=.569) ?popower ?bpower n <- seq(10, 1000, by=10) OR <- seq(.2,.9,by=.1) pow <- lapply(OR, function(or,n)list(x=n,y=bpower(p1=.1,odds.ratio=or,n=n)), n=n) names(pow) <- format(OR) labcurve(pow, pl=T, xlab='n', ylab='Power') ?spower sc <- Weibull2(c(1,3), c(.95,.7)) sc # Inverse cumulative distribution for case where all subjects are followed # at least a years and then between a and b years the density rises # as (time - a) ^ d is a + (b-a) * u ^ (1/(d+1)) rcens <- function(n) 1 + (5-1) * (runif(n) ^ .5) # To check this, type hist(rcens(10000), nclass=50) # Put it all together f <- Quantile2(sc, hratio=function(x)ifelse(x<=.75, 1, .75), dropin=function(x)ifelse(x<=.5, 0, .15*(x-.5)/(5-.5)), dropout=function(x).3*x/5) par(mfrow=c(2,2)) plot(f, 'all', label.curves=list(keys='lines')) rcontrol <- function(n) f(n, 'control') rinterv <- function(n) f(n, 'intervention') set.seed(211) spower(rcontrol, rinterv, rcens, nc=350, ni=350, test=logrank, nsim=300) names(pbc) s <- spearman2(bili ~ albumin + stage + drug, data=pbc) s page(s) class(s) plot(s) # invokes plot.spearman2.formula plot.default(s) summary(pbc) page(contents(pbc),multi=T) s <- summary(bili ~ drug + sex + age + chol, data=pbc) class(s) args(print.summary.formula.response) options(digits=3) s ?options g <- function(y) c(Mean=mean(y),Median=median(y)) s <- summary(bili ~ drug + sex + age + chol, data=pbc, fun=g) page(s) par(mfrow=c(1,1)) plot(s, which=1:2, pch=1:2) s <- summary(bili ~ drug + sex + age + chol, data=pbc, fun=function(y)c(smean.sd(y),smedian.hilow(y))) page(s) s <- summary(bili ~ drug+sex, method='cross',data=pbc) page(s) s <- summary(drug ~ bili + sex + age + chol, method='reverse', data=pbc) options(width=100) page(s) plot(s) s <- summary(bili ~ drug, fun=median, data=pbc) attributes(s) plot(s) s attr(s,'funlab') # To fully control labeling: s <- summary(bili ~ drug, data=pbc, fun=function(y)c(' Mean\nBilirubin'=mean(y))) s plot(s) title(sub='Circle: mean Bilirubin',adj=1) ?summary.formula show.pch() g <- function(y)c(Mean=mean(y),Median=median(y)) s <- summary(wbc ~ age + sex + service,data=hospital, fun=g) s plot(s,which=1:2,pch=c(1,4)) title(sub='Circle: mean; X: median',adj=1) args(plot.summary.formula.response) rm(plot.summary.formula.response) fix(dotchart2) objects(1) rm(sex) attach(hospital) # summarize is for cross-classification ONLY s <- summarize(temp, llist(Age=cut2(age,g=4), Sex=sex), g) s # Note: columns labeled "temp" (mean) and "Median" # 4(c) summarize(temp, llist(Age=cut2(age,g=4), sex), smedian.hilow) summarize(temp, llist(Age=cut2(age,g=4),sex), quantile, probs=c(.5,.025,.975)) # 3(f) summary(cbind(wbc,temp) ~ age+sex,data=hospital) ###################################### # 3rd Day ###################################### library(Hmisc,T) ?event.chart show.col() ?par cdcaids <- data.frame( age=c(4,2,1,1,2,2,2,4,2,1,1,3,2,1,3,2,1,2,4,2,2,1,4,2,4,1,4,2,1,1,3,3,1,3), infedate=c( 7274,7727,7949,8037,7765,8096,8186,7520,8522,8609,8524,8213,8455,8739, 8034,8646,8886,8549,8068,8682,8612,9007,8461,8888,8096,9192,9107,9001, 9344,9155,8800,8519,9282,8673), diagdate=c( 8100,8158,8251,8343,8463,8489,8554,8644,8713,8733,8854,8855,8863,8983, 9035,9037,9132,9164,9186,9221,9224,9252,9274,9404,9405,9433,9434,9470, 9470,9472,9489,9500,9585,9649), diffdate=c( 826,431,302,306,698,393,368,1124,191,124,330,642,408,244,1001,391,246, 615,1118,539,612,245,813,516,1309,241,327,469,126,317,689,981,303,976), dethdate=c( 8434,8304,NA,8414,8715,NA,8667,9142,8731,8750,8963,9120,9005,9028,9445, 9180,9189,9406,9711,9453,9465,9289,9640,9608,10010,9488,9523,9633,9667, 9547,9755,NA,9686,10084), censdate=c( NA,NA,8321,NA,NA,8519,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA, NA,NA,NA,NA,NA,NA,NA,NA,NA,10095,NA,NA)) cdcaids <- upData(cdcaids, labels=c(age ='Age, y', infedate='Date of blood transfusion', diagdate='Date of AIDS diagnosis', diffdate='Incubation period (days from HIV to AIDS)', dethdate='Fictitious date of death', censdate='Fictitious censoring date')) # Note that the style options listed with these # examples are best suited for output to a postscript file (i.e., using # the postscript function with horizontal=T) as opposed to a graphical # window (e.g., motif). # To produce simple calendar event chart (with internal legend): # postscript('example1.ps', horizontal=T) event.chart(cdcaids, subset.c=c('infedate','diagdate','dethdate','censdate'), x.lab = 'observation dates', y.lab='patients (sorted by AIDS diagnosis date)', titl='AIDS data calendar event chart 1', point.pch=c(1,2,15,0), point.cex=c(1,1,0.8,0.8), legend.plot=T, legend.location='i', legend.cex=1.0, legend.point.text=c('transfusion','AIDS diagnosis','death','censored'), legend.point.at = list(c(7210, 8100), c(35, 27)), legend.bty='o') attach(pbc) page(contents(pbc),multi=T) plot(age, sgot) scat1d(age) scat1d(sgot, side=4) args(scat1d) hist(sgot, nclass=15, prob=T, xlim=c(0,500)) lines(density(sgot,na.rm=T), lwd=6, col=4) histSpike(sgot) plot(age, sgot) histSpike(sgot, side=4, add=T) ?histSpike ecdf(sgot, q=(1:3)/4) abline(v=mean(sgot,na.rm=T), col=4) ecdf(sgot, group=drug, col=1:3) ecdf(sgot, group=drug, col=1:2, label.curves=list(keys=1:2)) ecdf(sgot, group=drug, col=1:2, label.curves=list(keys='lines')) ?ecdf ecdf(pbc, group=drug, col=1:3, label.curves=F) set.seed(13) x <- rnorm(1000) g <- sample(1:6, 1000, replace=T) x[g==1][1:20] <- rnorm(20)+3 # contaminate 20 x's for group 1 # default trellis box plot bwplot(g ~ x) # box-percentile plot with data density (rug plot) bwplot(g ~ x, panel=panel.bpplot, probs=seq(.01,.49,by=.01), datadensity=T) # add ,scat1d.opts=list(tfrac=1) to make all tick marks the same size # when a group has > 125 observations # small dot for means, show only .05,.125,.25,.375,.625,.75,.875,.95 quantiles bwplot(g ~ x, panel=panel.bpplot, cex=.3) # suppress means and reference lines for lower and upper quartiles bwplot(g ~ x, panel=panel.bpplot, probs=c(.025,.1,.25), means=F, qref=F) # continuous plot up until quartiles ("Tootsie Roll plot") bwplot(g ~ x, panel=panel.bpplot, probs=seq(.01,.25,by=.01)) # start at quartiles then make it continuous ("coffin plot") bwplot(g ~ x, panel=panel.bpplot, probs=seq(.25,.49,by=.01)) # same as previous but add a spike to give 0.95 interval bwplot(g ~ x, panel=panel.bpplot, probs=c(.025,seq(.25,.49,by=.01))) # decile plot with reference lines at outer quintiles and median bwplot(g ~ x, panel=panel.bpplot, probs=c(.1,.2,.3,.4), qref=c(.5,.2,.8)) # default plot with tick marks showing all observations outside the outer # box (.05 and .95 quantiles), with very small ticks bwplot(g ~ x, panel=panel.bpplot, nout=.05, scat1d.opts=list(frac=.01)) # show 5 smallest and 5 largest observations bwplot(g ~ x, panel=panel.bpplot, nout=5) # Use a scat1d option (preserve=T) to ensure that the right peak extends # to the same position as the extreme scat1d bwplot(~x , panel=panel.bpplot, probs=seq(.00,.5,by=.001), datadensity=T, scat1d.opt=list(preserve=T)) bwplot(cut2(age,g=10) ~ sgot, panel=panel.bpplot) locator() ?symbols select <- c("Atlanta", "Atlantic City", "Bismarck", "Boise", "Dallas", "Denver", "Lincoln", "Los Angeles", "Miami", "Milwaukee", "New York", "Seattle") city.x <- city.x # need local copies for S+ 6 city.y <- city.y city.name <- city.name names(city.x) <- city.name names(city.y) <- city.name names(city.name) <- city.name pop <- c(426, 60, 28, 34, 904, 494, 129, 2967, 347, 741, 7072, 557) usa() # plot map of US symbols(city.x[select], city.y[select], circles = sqrt(pop), add = T) size <- 1 text(city.x[select], city.y[select], city.name[select], cex = size) murder <- state.x77[,"Murder"] west <- state.center$x < -95 therm <- cbind(.4, 1, murder[west]/max(murder[west])) usa(xlim = c(94, 135), ylim = c(25, 52)) symbols(state.center$x[west], state.center$y[west], thermometers = therm, inches = .5, add = T) title(main = "Murder Rates in the Western U.S.") x <- 1:10 y <- runif(10) z <- runif(10) symbols(x, y, circles=z, inches=.2) plot(state.x91[,'Income'], state.x91[,'Murder']) identify(state.x91[,c('Income','Murder')], labels=dimnames(state.x91)[[1]]) brush(state.x91) x <- 1:10 y <- x^2 ys <- seq(0,100,by=10) setps(test, trellis=T) # or setpdf xyplot(sqrt(y) ~ x, type='l', ylab='y', ylim=sqrt(c(0,100)), scales=list(y=list( at=sqrt(ys), labels=format(ys)))) dev.off() ?trellis.args ?setpdf library(Hmisc,T) fix(store) store() search() data.restore('/tmp/diabetes.sdd') diabetes <- cleanup.import(diabetes) plot(varclus(~.,data=diabetes)) bwplot(cut2(age,g=5) ~ glyhb | gender*frame, panel=panel.bpplot, data=diabetes, ylab='age', xlab=label(diabetes$glyhb)) # Add panel sample sizes to panel strip labels attach(diabetes) gf <- interaction(gender,frame,sep=' ') tab <- table(gf) levels(gf) <- paste(levels(gf), ' n=', format(tab),sep='') table(gf) bwplot(cut2(age,g=5) ~ glyhb | gf, panel=panel.bpplot, layout=c(2,3), strip=function(...)strip.default(..., style=1)) # Assignment 8 Problem 8 s <- summarize(glyhb, llist(age=cut2(age,g=5), gender, frame), smean.cl.boot) page(s) names(s) # Make error bars stand out by making them thicker # than the default. Need to do this with # trellis device inactive dev.off() w <- trellis.par.get('plot.line') w$lwd <- 4 trellis.par.set('plot.line',w) Dotplot(age ~ Cbind(glyhb,Lower,Upper) | gender*frame, data=s) search() detach(2) ?xYplot set.seed(111) dfr <- expand.grid(month=1:12, year=c(1997,1998), reps=1:100) attach(dfr) y <- abs(month-6.5) + 2*runif(length(month)) + year-1997 s <- summarize(y, llist(month,year), smedian.hilow, conf.int=.5) xYplot(Cbind(y,Lower,Upper) ~ month, groups=year, data=s, keys='lines', method='filled bands', type='b') args(smedian.hilow) Key() detach(2)