library(Hmisc,T) .First <- function() { library(Hmisc,T) invisible() } # hospital$sex <- factor(hospital$sex, 1:2, c('male','female')) hospital <- cleanup.import(hospital) sapply(hospital,storage.mode) # source('/tmp/upData.ssc') hospital2 <- upData(hospital, sex = factor(sex, 1:2, c('male','female')), antibiotic = factor(antibiotic, 1:2, c('yes','no')), bculture = factor(bculture, 1:2, c('yes','no')), service = factor(service, 1:2, c('med','surg')), labels= list(duration ='Duration of hospital stay', temp ='First temperature', wbc ='White Blood Count(x100)', antibiotic='Received antibiotic', bculture ='Received bacterial culture')) hosp3 <- upData(hospital, levels=list(sex=list(male=1,female=2), antibiotic=list(yes=1,no=2), bculture=list(yes=1,no=2), service=list(med=1,surg=2)), labels= list(duration ='Duration of hospital stay', temp ='First temperature', wbc ='White Blood Count(x100)', antibiotic='Received antibiotic', bculture ='Received bacterial culture')) all.equal(hospital2,hosp3) yn <- list(yes=1,no=2) # list('yes'=1,'no'=2) or list(yes='si',...) hosp4 <- upData(hospital, levels=list(sex=list(male=1,female=2), antibiotic=yn, bculture=yn, service=list(med=1,surg=2)), labels= list(duration ='Duration of hospital stay', temp ='First temperature', wbc ='White Blood Count(x100)', antibiotic='Received antibiotic', bculture ='Received bacterial culture')) all.equal(hosp4, hosp3) sapply(hospital2, label) options(width=100) # if problem wrapping page(describe(hospital2),multi=T) table(hospital2$temp) csum <- cumsum(table(hospital2$temp)) csum <- 100*csum/csum[length(csum)] # or /max(csum) csum hospital <- hospital2 rm(hospital2,hosp3,hosp4) datadensity(hospital) hist.data.frame(hospital) ecdf(hospital, datadensity='rug') ecdf(hospital, group=hospital$sex, col=1:2) ecdf(~temp|sex, data=hospital, fun=qnorm) ecdf(~temp, groups=sex, data=hospital, fun=qnorm) ?ecdf a <- 1:2 b <- c('A','B') list(a,b) llist(a,b) d <- expand.grid(age=c(10,20,30),sex=c('f','m')) d$x <- rnorm(nrow(d)) d d$y <- d$age + .5*(d$sex=='f') d search() age <- c(1,2,NA,4) age.i <- impute(age) age.i attributes(age.i) is.imputed(age.i) mean(age.i[!is.imputed(age.i)]) describe(age.i) age.reconstruct <- ifelse(is.imputed(age.i),NA,age.i) age.reconstruct x <- 1:50 set.seed(1) smean.cl.boot(x) args(smean.cl.boot) smean.cl.boot binconf(1, 20, .05) bpower(.2, .3, n=1000) bpower.sim(.2, .3, n=1000) bsamsize(.2, .3, power=.9557) ballocation(.2, .3) ?bpower n <- seq(10, 1000, by=10) OR <- seq(.2,.9,by=.1) pow <- lapply(OR, function(or,n)list(x=n,y=bpower(p1=.1,odds.ratio=or,n=n)), n=n) names(pow) <- format(OR) labcurve(pow, pl=T, xlab='n', ylab='Power') # Example of numerical integration x <- seq(0,1,length=1000) y <- x^3 delta <- x[2]-x[1] delta area <- sum(y)*delta area # area between x=.2 and .3 sum(y[x >= .2 & x < .3])*delta # Example of solving for x in f(x) = constant approx(y, x, xout=1/8)$y # Use root finder uniroot(function(x)x^3 - 1/8, c(0,1))$root names(hospital) s <- spearman2(wbc ~ sex + age + temp + service, data=hospital) s plot(s) s2 <- spearman2(wbc ~ sex + age + temp + service, p=2, data=hospital) s2 plot(s2) # Assignment 4 options(digits=3) # 3. s <- summary(wbc ~ age + sex + antibiotic + bculture + service, data=hospital, g=3) options(width=90) s sb <- update(s, fun=median) sb plot(s) # Confidence intervals for pop. mean using nonparametric bootstrap scl <- update(s, fun=smean.cl.boot) scl show.pch() plot(scl, which=1:3, pch=c(16,91,93)) #, lines=F) #latex(scl) if latex installed, instant previewing #html(latex(scl)) when new Hmisc comes out w <- latex(scl, fi='/tmp/scl.tex') unclass(w) args(latex.summary.formula.response) args(dotchart2) s <- summary(wbc ~ age + sex + antibiotic + bculture + service, data=hospital, g=3, fun=function(y)c(Mean=mean(y),Median=median(y))) s plot(s, pch=1:2, which=1:2, xlab='WBC', main='') # 3f summary(cbind(wbc,temp) ~ age + sex + antibiotic + bculture + service, data=hospital, g=3) # 3f extended to compute mean+median s <- summary(cbind(wbc,temp) ~ age + sex + antibiotic + bculture + service, data=hospital, g=3, fun=function(y)c('Mean WBC'=mean(y[,1]), 'Median WBC'=median(y[,1]), 'Mean Temp'=mean(y[,2]), 'Median Temp'=median(y[,2]))) s par(mfrow=c(1,2)) plot(s, which=1:2, pch=1:2, xlab='WBC', main='') plot(s, which=3:4, pch=1:2, xlab='Temperature',main='') par(mfrow=c(1,1)) #4a s <- summary(temp ~ age + sex, data=hospital, method='cross', fun=median) print(s, min.colwidth=6) #args(print.summary.formula.cross) #4b attach(hospital) find(age) # found a short temporary vector! age rm(age) find(age) summarize(temp, llist(Age=cut2(age,g=4),sex), median) #4c summarize(temp, llist(Age=cut2(age,g=4),sex), quantile, probs=c(.5,.25,.75)) # Variable names not too good summarize(temp, llist(Age=cut2(age,g=4),sex), smedian.hilow, conf.int=.5) # Take total control summarize(temp, llist(Age=cut2(age,g=4),sex), function(y) { z <- quantile(y, (1:3)/4) names(z) <- NULL c(Median=z[2],Q1=z[1],Q3=z[3]) } ) #4d summarize(temp, llist(Age=cut2(age,g=4),sex), smean.sd) #5 s <- summary(service ~ ., data=hospital[,-1], method='reverse') s plot(s) ?summary.formula plot(s, which='cat') # Creates Key function for auto legend Key() Key(locator(1), lev=c('Medical','Surgical')) # Assignment 6 Problem 4 set.seed(11) x <- rnorm(500, 20, 5) x[1:5] <- runif(5, 50, 60) ecdf(~x, q=(1:3)/4, datadensity='hist') # guiModify(....) to change details #guiModify( "GraphSheet", Name = "GSD2", GraphSheetColor = "Transparent") densityplot(~x) #Assignment 7 data.restore('/tmp/pbc.sdd') # or File ... Import ... From File page(describe(pbc),multi=T) datadensity(pbc) na <- naclus(pbc) plot(na) naplot(na) na.pattern(pbc) # builtin function # Find out WHO is missing cholesterol. # Download rpart.zip from lib.stat.cmu.edu/DOS/S library(rpart) f <- rpart(is.na(chol) ~ drug + age, data=pbc) plot(f); text(f) title('Patterns of NAs for Chol.') title(sub='Used drug and age only', adj=0, cex=.6) pstamp() # Attach randomized subset attach(pbc[pbc$drug != 'not randomized',]) search()[1:2] find(age) # 2b bwplot(drug ~ chol) bwplot(drug ~ chol, panel=panel.bpplot) ?panel.bpplot bwplot(drug ~ chol|edema, panel=panel.bpplot) # 3 ecdf(~ chol|edema, groups=drug, label.curve=F) Key() histbackback(chol, drug, seq(min(chol,na.rm=T),max(chol,na.rm=T),length=15)) # looks wrong # 4 ecdf(~ sgot | edema*drug, q=.5, datadensity='hist') ecdf(~ sgot, fun=qnorm) qqmath(~ sgot | edema) # qqmath is builtin to Trellis # Assignment 8 Problem 5 data.restore('/tmp/olympics.1996.sdd') detach(2); detach(2) attach(olympics.1996) plot(population, medals,log='x') plsmo(log(population), medals, datadensity=T) plot(population, medals/population, log='x') plsmo(log(population), medals/population, datadensity=T) detach(2) # Problem 1 data.restore('/tmp/diabetes.sdd') datadensity(diabetes) ecdf(diabetes, group=diabetes$gender, label.curve=F, col=1:2) v <- varclus(~., data=diabetes) plot(v) attach(diabetes) s <- summarize(glyhb, llist(frame,gender), median, na.rm=T) Dotplot(frame ~ glyhb | gender, data=s) Dotplot(frame ~ glyhb, groups=gender, data=s); Key() detach(2) # Assignment 10 # Click on .ssc file for automatic opening of script window dput(jcetable) Dotplot(event ~y | method*what, groups=sex, data=jcetable, pch=1:2) # Best if major interest is female:male Key(-.54, 1.06) Dotplot(event ~y | method*what*sex, data=jcetable) # 2 pages Dotplot(event ~y | sex*what, groups=method, data=jcetable, pch=1:2) # Best if major interest is mail:telephone # Now sort levels by percentages if ignore method,what,sex -sort(-tapply(jcetable$y, jcetable$event, mean)) event2 <- reorder.factor(jcetable$event, jcetable$y, mean) levels(event2) Dotplot(event2 ~ y | sex*what, groups=method, data=jcetable, pch=1:2) table(abbreviate(jcetable$event), abbreviate(event2)) # Assignment 9 Problem 3 # Function to classify a single variable as numeric continuous is.cont <- function(x) is.numeric(x) && length(unique(x))>=10 ic <- which(sapply(diabetes,is.cont)) # which is a builtin function that translates a series of T/F # to integers corresponding to the TRUEs ic # Scatterplot matrix only of continuous variables pairs(diabetes[ic]) # Divide into 2 pages pairs(diabetes[ic[1:8]]) pairs(diabetes[ic[9:length(ic)]]) # More concise: show SIMILARITIES of variables, not raw data plot(varclus(~., data=diabetes)) # Does pairwise deletion of NAs in figuring correlation coefficients # Make attempt at plotting pairs of variables that # are highly correlated par(mfrow=c(3,3)) nam <- names(diabetes) for(i in 1:(length(nam)-1)) { for(j in (i+1):length(nam)) { x <- diabetes[,i] y <- diabetes[,j] if(is.factor(x) || is.factor(y)) next # punt rho2 <- spearman2(x, y)['rho2'] if(rho2 > .2) { plot(x, y, xlab=nam[i], ylab=nam[j]) title(paste('rho^2=',format(rho2))) }}} # Assignment 9 Prob. 5 s <- summary(glyhb ~ age + frame + gender + chol + weight, fun=quantile, data=diabetes) options(width=90, digits=4) s class(s) methods(class=class(s)) par(mfrow=c(1,1)) plot(s, which=2:4, pch=c(91,16,93), xlim=c(4,8), xlab='Hemoglobin H1b', main='') locator(1)$x # check one of the plotted x-coordinates # Prob. 6 s <- summary(gender ~ glyhb + age + frame + chol + weight, method='reverse', data=diabetes) s plot(s) # Prob. 7 plsmo(age, glyhb, group=gender, datadensity=T) # Means and nonparametric (bootstrap) CLs for population mean glyhb # stratified by quartile of cholesterol and gender ?summarize s <- summarize(glyhb, llist(gender,chol=cut2(chol,g=4)), smean.cl.boot) s ?Dotplot Dotplot(chol ~ Cbind(glyhb,Lower,Upper) | gender, data=s) # Error bars on an xyplot dfr <- expand.grid(month=1:12, continent=c('Europe','USA'), sex=c('female','male')) attach(dfr) set.seed(13) y <- month/10 + 1*(sex=='female') + 2*(continent=='Europe') + runif(48,-.15,.15) lower <- y - runif(48,.05,.15) upper <- y + runif(48,.05,.15) xYplot(Cbind(y,lower,upper) ~ month,subset=sex=='male'&continent=='USA') # add ,label.curves=F to suppress use of labcurve to label curves where farthest apart xYplot(Cbind(y,lower,upper) ~ month|continent,subset=sex=='male') xYplot(Cbind(y,lower,upper) ~ month|continent,groups=sex); Key() xYplot(Cbind(y,lower,upper) ~ month,groups=sex,subset=continent=='Europe') xYplot(Cbind(y,lower,upper) ~ month,groups=sex,subset=continent=='Europe',keys='lines') # keys='lines' causes labcurve to draw a legend where the panel is most empty xYplot(Cbind(y,lower,upper) ~ month,groups=sex,subset=continent=='Europe',method='bands') xYplot(Cbind(y,lower,upper) ~ month,groups=sex,subset=continent=='Europe',method='upper') label(y) <- 'Quality of Life Score' # label is in Hmisc library = attr(y,'label') <- 'Quality...'; will be y-axis label # can also specify Cbind('Quality of Life Score'=y,lower,upper) xYplot(Cbind(y,lower,upper) ~ month, groups=sex, subset=continent=='Europe', method='alt bars', offset=.4) # offset passed to labcurve to label .4 y units away from curve detach(2) xYplot(glyhb ~ chol|gender, method='quantile') # Generate data for 12 months, 2 treatments, 100 obs. per combo d <- expand.grid(month=1:12, treat=c('a','b'), rep=1:100) d$response <- rnorm(nrow(d))+(d$treat=='b') # treatment effect=1 # For each combo get the mean +- 2 s.d. attach(d) s <- summarize(response, llist(month,treat), smean.sdl) xYplot(Cbind(response,Lower,Upper)~month|treat, data=s)