R Code Used in Analyzing German Breast Cancer Data

library(Design)
Load(brca)
latex(describe(brca))
datadensity(brca)
pred <- ~chemo+hormon+x1+x2+x3+x4+x5+x6+x7
dd <- datadist(brca); options(datadist='dd')
attach(brca)
plot(varclus(pred))
par(mfrow=c(3,3))
w <- transcan(pred, data=brca)
par(mfrow=c(1,1))
summary(w)
library(lattice)
bwplot(x2 ~ x1, panel=panel.bpplot, xlab=label(x1))
stripplot(x2 ~ x1)
xYplot(sqrt(x6) ~ sqrt(x7))
rectime <- rectime/365.25
units(rectime) <- 'Year'
S <- Surv(rectime, censrec=='recurrence')

g <- function(x) round(rcorr.cens(x, S)['Dxy'],3)
xs <- paste('x', 1:7, sep='')
dxy <- sapply(xs, function(z) g(get(z))); names(dxy) <- xs
dxy
dotchart2(-sort(-dxy), xlab=expression(D[xy]), reset.par=TRUE)

a <- aregImpute(~x1+x2+x3+x4+x5+x6+x7+rectime*censrec, n.impute=5,x=TRUE)
tevent <- rectime*(censrec=='recurrence')
a <- aregImpute(~x1+x2+x3+x4+x5+x6+x7+rectime + censrec + tevent, n.impute=5,x=TRUE)

d <- brca; d$tevent <- tevent
f <- fit.mult.impute(S ~ chemo+hormon+rcs(x1,5)+x2+rcs(x3,5)+x4+rcs(x5,5)+rcs(x6,5)+rcs(x7,5),
         cph, a, tol=1e-13, data=d)
plot(anova(f))

# Compare with dataset used in JRSSA article which excluded cases with NAs
Load(breastcancer)
g <- cph(Surv(rectime, censrec=='recurrence') ~ hormon+rcs(x1,5)+x2+rcs(x3,5)+x4+rcs(x5,5)+rcs(x6,5)+rcs(x7,5),
    tol=1e-13, data=breastcancer)
par(mfrow=c(2,1)); 
plot(anova(f), main='Multiply Imputed')
plot(anova(g), main='Casewise Deletion (published)')
par(mfrow=c(1,1))

f <- cph(S ~ rcs(x1,5) + x2 + x3 + x4 + rcs(x5,5) + rcs(x6,4) +
         x7 + chemo + hormon, tol=1e-13)
f$stats
latex(anova(f))
f <- cph(S ~ pol(x1,2) + x2 + x3 + x4 + sqrt(x5) + sqrt(x6) + sqrt(x7)
         + hormon, x=TRUE, y=TRUE)
zph <- cox.zph(f, transform='identity')
par(mfrow=c(3,4))
plot(zph, resid=FALSE)
par(mfrow=c(1,1))
fastbw(f)
f <- cph(S ~ pol(x1,2) + sqrt(x5) + sqrt(x6) + hormon, x=TRUE, y=TRUE)
cox.zph(f)

f <- cph(S ~ rcs(x1,5) + x2 + x3 + x4 + rcs(sqrt(x5),5) +
         rcs(sqrt(x6),4) + sqrt(x7) + hormon + chemo,  surv=TRUE, time.inc=3,
         x=TRUE, y=TRUE)
v <- validate(f, B=100, dxy=TRUE)
options(digits=3)
v
pred3 <- survest(f, linear.predictors=f$linear.predictors, times=3)$surv
cal <- calibrate(f, B=40, u=3,
                 cuts=quantile(pred3, (1:5)/6))
plot(cal)

f <- fit.mult.impute(S ~ rcs(x1,5) + x2 + x3 + x4 + rcs(sqrt(x5),5) +
         rcs(sqrt(x6),4) + sqrt(x7) + hormon + chemo,  cph, a, data=d, surv=TRUE)

f
anova(f)
plot(anova(f))
par(mfrow=c(3,3))
plot(f, ref.zero=TRUE, ylim=c(-1.5,1.5))
par(mfrow=c(1,1))
plot(f, x1=NA)
plot(summary(f, vnames='labels'), log=TRUE)
latex(f)

surv <- Survival(f)
quant <- Quantile(f)
quantile(rectime[censrec=='censored'])
mn <- Mean(f, tmax=5)
surv2 <- function(x) surv(times=2, lp=x)
surv5 <- function(x) surv(times=5, lp=x)
med <- function(x) quant(lp=x)
nomogram(f, fun=list('2y Survival'=surv2, '5y Survival'=surv5,
              'Median Lifelength'=med, 'Mean Restricted Life'=mn))
Topic revision: r1 - 09 Jul 2006, FrankHarrell
 

This site is powered by FoswikiCopyright © 2013-2017 by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding Vanderbilt Biostatistics Wiki? Send feedback