library(Design)

x <- csv.get("data.csv", header=TRUE)
# Data set is the one used in Puzzler 1
#    --> data was simulated such that there is a nonlinear
#    interaction between sex and age

# MODEL TREATING ALL CONTINUOUS VARIABLES CONTINUOUS
ff <- lrm(dz ~ sex*rcs(age, 4) + rcs(sysbp, 4), data=x)
anova(ff) # Total D.F. = 10
# sex*age interaction is significant (p < 0.05)

# CATEGORIZE THE CONTINUOUS VARIABLES
x <- upData(x,
   agecut = cut(age,
      breaks=quantile(subset(age, dz==0), 
         probs=c(0, 0.25, 0.50, 0.75, 1), na.rm=TRUE)),
   sysbpcut = cut(sysbp,
      breaks=quantile(subset(sysbp, dz==0), 
         probs=c(0, 0.25, 0.50, 0.75, 1), na.rm=TRUE))
)

# NOTE: agecut, sysbpcut, are factors ==> need them as factors

# MODEL TREATING ALL CONTINUOUS VARIABLES AS CATEGORICAL
fcut <- lrm(dz ~ sex*agecut + sysbpcut, data=x)
anova(fcut) # TOTAL D.F. = 10
# sex*age interaction no longer significant (p > 0.05)

# PLOT THE PREDICTED EFFECT OF AGE ACROSS SEX IN EACH MODEL
dd <- datadist(x)
options (datadist = 'dd')
par(mfrow=c(1,2))
plot(ff, age=NA, sex=NA, conf.int=FALSE)
plot(fcut, age=NA, sex=NA, conf.int=FALSE)

# VIEW WHAT'S ACTUALLY BEING PLOTTED
print.plot.Design(plot(ff, age=NA, sex=NA, conf.int=FALSE))
# Adjusted to medians: sysbp = 120
print.plot.Design(plot(fcut, age=NA, sex=NA, conf.int=FALSE))
# Adjusted to most frequent level: sysbpcut=(117, 128]

# MAKE SURE BOTH MODELS ARE ADJUSTED TO THE SAME THINGS
# --> median and percentile that spans the median
# i.e., if fcut wasn't adjusted to the quantile that spanned 120,
#   would use: 
# par(mfrow=c(1,2))
# plot(ff, age=NA, sex=NA, conf.int=FALSE)
# plot(fcut, age=NA, sex=NA, conf.int=FALSE,
#    sysbpcut="(117,128]")

# PUT THE TWO PLOTS TOGETHER
plff <- plot(ff, age=NA, sex=NA, conf.int=FALSE)
plfcut <- plot(fcut, age=NA, sex=NA, conf.int=FALSE)

par (mfrow =c(1,1), mar=c(5,5,4,2)+0.1, lwd=2)
# First plot the spline regression results: age across sex
plot(ff, age=NA, sex=NA, conf.int=FALSE,
#   ylim = range(c(range(plff$x.xbeta[["log odds"]]), 
#      range(plfcut$x.xbeta[["log odds"]]))),
   ylim = c(-2, 2),
   label.curves=FALSE, lty=1, col=c("gray", "black"))
# Now plot the categorical regression results: age across sex
Fstep <- stepfun(x=with(x, quantile(subset(age, dz==0),
   prob=c(0.25, 0.50, 0.75), na.rm=TRUE)), 
   y = subset(plfcut$x.xbeta, sex=="female")$"log odds")
par(new=TRUE, col="gray", lty=2)
lines(Fstep, xlim=c(0, 150))
Mstep <- stepfun(x=with(x, quantile(subset(age, dz==0),
   prob=c(0.25, 0.50, 0.75), na.rm=TRUE)), 
   y = subset(plfcut$x.xbeta, sex=="male")$"log odds")
par(new=TRUE, col="black", lty=2)
lines(Mstep, xlim=c(0, 150))
par(new=TRUE, col="black", lty=1, lwd=1)
legend(x=40, y=2,
#   y=max(c(range(plff$x.xbeta[["log odds"]]), 
#      range(plfcut$x.xbeta[["log odds"]]))), 
   xjust=0, yjust=1,
   legend = c("Female - Spline Regression", 
      "Male - Spline Regression",
      "Female - Categorical Regression", 
      "Male - Categorical Regression"),
   lty=rep(1:2, each=2), col = rep(c("gray", "black"), 2), lwd=2)
Edit | Attach | Print version | History: r2 < r1 | Backlinks | View wiki text | Edit WikiText | More topic actions...
Topic revision: r1 - 08 Nov 2006, TheresaScott
 

This site is powered by FoswikiCopyright © 2013-2017 by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding Vanderbilt Biostatistics Wiki? Send feedback