|
|
|
||||
|
|
|||||
S-Plus Textbook Examples
Applied Survival Analysis
Chapter 5: Model
Development
Sections noted by R indicate differences between R and SPLUS.
SPLUS program
R program
To input the uis.csv data set in R use the following code which must be executed before running the R program:
>uis <- read.csv("d:/uis.csv", header=T)
Here is the the SPLUS data set uis.
Table 5.1, p. 166.
Creating the dummy variables for the variable hercoc.
uis$hercoc1 <- (uis$hercoc==1) uis$hercoc2 <- (uis$hercoc==2) uis$hercoc3 <- (uis$hercoc==3) uis$hercoc4 <- (uis$hercoc==4)
The Log-Rank test and the Likelihood Ratio test for hercoc.
hercoc.ph <- coxph( Surv(time, censor) ~ hercoc2+hercoc3+hercoc4, uis, method="breslow", na.action=na.exclude) summary(hercoc.ph) <output omitted> n=610 (18 observations deleted due to missing values) Likelihood ratio test= 7.76 on 3 df, p=0.0513 Wald test = 7.88 on 3 df, p=0.0486 Score (logrank) test = 7.92 on 3 df, p=0.0476
The median time to drug use for each level of hercoc and the 95% confidence intervals.
hercoc.surv <- survfit( Surv(time, censor) ~ hercoc, uis, na.action=na.exclude)
hercoc.surv
18 observations deleted due to missing values
n events mean se(mean) median 0.95LCL 0.95UCL
hercoc=1 111 92 238 22.4 150 107 198
hercoc=2 114 100 272 33.2 146 110 184
hercoc=3 178 136 316 21.8 183 148 231
hercoc=4 207 165 274 18.7 181 155 220
Creating the dummy variables for ivhx.
uis$ivhx1 <- 1*(uis$ivhx==1) uis$ivhx2 <- 1*(uis$ivhx==2) uis$ivhx3 <- 1*(uis$ivhx==3)
The Log-Rank test and the Likelihood Ratio test for ivhx.
ivhx.ph<-coxph( Surv(time, censor) ~ ivhx2+ivhx3, uis, method="breslow", na.action=na.exclude) summary(ivhx.ph) <output omitted> Likelihood ratio test= 14.6 on 2 df, p=0.000663 Wald test = 14.5 on 2 df, p=0.000706 Score (logrank) test = 14.7 on 2 df, p=0.000654
The median time to drug use for each level of ivhx and 95% confidence interval.
ivhx.surv <-survfit( Surv(time, censor) ~ ivhx, uis, na.action=na.exclude)
ivhx.surv
n events mean se(mean) median 0.95LCL 0.95UCL
ivhx=1 233 173 407 32.8 194 175 231
ivhx=2 115 93 273 24.2 170 130 231
ivhx=3 262 227 224 13.6 148 115 168
The Log-Rank test and the Likelihood Ratio test for race.
race.ph<-coxph( Surv(time, censor) ~ race, uis, method="breslow", na.action=na.exclude) summary(race.ph) <output omitted> Likelihood ratio test= 7.57 on 1 df, p=0.00593 Wald test = 7.2 on 1 df, p=0.00727 Score (logrank) test = 7.26 on 1 df, p=0.00706
The median time to drug use for each level of race and 95% confidence interval.
race.surv<-survfit( Surv(time, censor) ~ race, uis, na.action=na.exclude)
race.surv
6 observations deleted due to missing values
n events mean se(mean) median 0.95LCL 0.95UCL
race=0 467 388 318 18.6 152 126 175
race=1 155 116 321 24.0 193 168 239
The Log-Rank test and the Likelihood Ratio test for treat.
treat.ph<-coxph( Surv(time, censor) ~ treat, uis, method="breslow", na.action=na.exclude) summary(treat.ph) <output omitted> Likelihood ratio test= 6.75 on 1 df, p=0.00937 Wald test = 6.74 on 1 df, p=0.00941 Score (logrank) test = 6.77 on 1 df, p=0.00926
The median time to drug use for each level of treat and 95% confidence interval.
treat.surv<-survfit( Surv(time, censor) ~ treat, uis,
na.action=na.exclude)
treat.surv
n events mean se(mean) median 0.95LCL 0.95UCL
treat=0 320 265 250 15.1 131 115 156
treat=1 308 243 356 31.7 190 175 226
The Log-Rank test and the Likelihood Ratio test for site.
site.ph<-coxph( Surv(time, censor) ~ site, uis,method="breslow", na.action=na.exclude) summary(site.ph) <output omitted> Likelihood ratio test= 2.4 on 1 df, p=0.121 Wald test = 2.35 on 1 df, p=0.125 Score (logrank) test = 2.36 on 1 df, p=0.125
The median time to drug use for each level of site and 95% confidence interval.
site.surv<-survfit( Surv(time, censor) ~ site, uis, na.action=na.exclude)
site.surv
n events mean se(mean) median 0.95LCL 0.95UCL
site=0 444 364 321 20.8 156 132 175
site=1 184 144 272 16.9 198 161 232
Creating the categorical variable of age called agecat and the dummy variables for agecat.
uis$agecat[uis$age < 28] <- 1 uis$agecat[uis$age >= 28 & uis$age < 33] <- 2 uis$agecat[uis$age >= 33 & uis$age < 38] <- 3 uis$agecat[uis$age >= 38] <- 4 uis$agecat1 <- (uis$age < 28) uis$agecat2 <- (uis$age >= 28 & uis$age < 33) uis$agecat3 <- (uis$age >= 33 & uis$age < 38) uis$agecat4 <- (uis$age >= 38)
The Log-Rank test and the Likelihood Ratio test for agecat.
agecat.ph<-coxph( Surv(time, censor) ~ agecat2+agecat3+agecat4, uis, method="breslow", na.action=na.exclude) summary(agecat.ph) <output omitted> Likelihood ratio test= 3.81 on 3 df, p=0.282 Wald test = 3.79 on 3 df, p=0.285 Score (logrank) test = 3.8 on 3 df, p=0.284
The median time to drug use for each level of agecat and 95% confidence interval.
agecat.surv<-survfit( Surv(time, censor) ~ agecat, uis, na.action=na.exclude)
agecat.surv
5 observations deleted due to missing values
n events mean se(mean) median 0.95LCL 0.95UCL
agecat=1 158 128 244 18.0 164 122 203
agecat=2 158 135 245 20.0 148 123 182
agecat=3 184 145 367 31.9 162 124 216
agecat=4 123 96 286 21.9 189 168 243
Creating the categorical variable of becktota called beckcat and the dummy variables for beckcat.
uis$beckcat[uis$becktota < 10] <- 1 uis$beckcat[uis$becktota >= 10 & uis$becktota < 15] <- 2 uis$beckcat[uis$becktota >= 15 & uis$becktota < 25] <- 3 uis$beckcat[uis$becktota >= 25] <- 4 uis$beckcat1 <- (uis$becktota < 10) uis$beckcat2 <- (uis$becktota >= 10 & uis$becktota < 15) uis$beckcat3 <- (uis$becktota >= 15 & uis$becktota < 25) uis$beckcat4 <- (uis$becktota >= 25)
The Log-Rank test and the Likelihood Ratio test for beckcat.
beckcat.ph<-coxph( Surv(time, censor) ~ beckcat2+beckcat3+beckcat4,
uis, method="breslow", na.action=na.exclude)
summary(beckcat.ph)
<output omitted>
Likelihood ratio test= 4.41 on 3 df, p=0.221
Wald test = 4.37 on 3 df, p=0.225
Score (logrank) test = 4.38 on 3 df, p=0.223
The median time to drug use for each level of beckcat and 95% confidence interval.
beckcat.surv<-survfit( Surv(time, censor) ~ beckcat, uis, na.action=na.exclude)
beckcat.surv
33 observations deleted due to missing values
n events mean se(mean) median 0.95LCL 0.95UCL
beckcat=1 135 104 294 21.0 211 170 248
beckcat=2 102 78 389 44.0 169 133 232
beckcat=3 226 185 272 18.1 168 139 200
beckcat=4 132 111 232 18.4 147 115 193
Creating the categorical variable of ndrugtx called drugcat and the dummy variables for drugcat.
uis$drugcat[uis$ndrugtx < 2] <- 1 uis$drugcat[uis$ndrugtx >= 2 & uis$ndrugtx < 4] <- 2 uis$drugcat[uis$ndrugtx >= 4 & uis$ndrugtx < 7] <- 3 uis$drugcat[uis$ndrugtx >= 7] <- 4 uis$drugcat1 <- (uis$ndrugtx < 2) uis$drugcat2 <- (uis$ndrugtx >= 2 & uis$ndrugtx < 4) uis$drugcat3 <- (uis$ndrugtx >= 4 & uis$ndrugtx < 7) uis$drugcat4 <- (uis$ndrugtx >= 7)
drugcat.ph<-coxph( Surv(time, censor) ~ drugcat2+drugcat3+drugcat4, uis,
method="breslow", na.action=na.exclude)
summary(drugcat.ph)
<output omitted>
Likelihood ratio test= 14.5 on 3 df, p=0.0023
Wald test = 14.7 on 3 df, p=0.00206
Score (logrank) test = 14.9 on 3 df, p=0.00192
The median time to drug use for each level of drugcat and 95% confidence interval.
drugcat.surv<-survfit( Surv(time, censor) ~ drugcat, uis, na.action=na.exclude)
drugcat.surv
17 observations deleted due to missing values
n events mean se(mean) median 0.95LCL 0.95UCL
drugcat=1 183 141 302 21.7 170 143 227
drugcat=2 165 123 413 35.3 177 162 211
drugcat=3 138 119 227 18.8 136 106 187
drugcat=4 125 113 193 15.3 123 110 186
Table 5.2, p. 167.
First we create new variables for age, becktota and ndrugtx that have better scales.
uis$age5 <- uis$age/5
uis$beck10 <- uis$becktota/10
uis$drug5 <- uis$ndrugtx/5
age5.<-coxph( Surv(time, censor) ~ age5, uis, method="breslow", na.action=na.exclude)
summary(age5.ph)
n=623 (5 observations deleted due to missing values)
coef exp(coef) se(coef) z p
age5 -0.0643 0.938 0.0359 -1.79 0.074
exp(coef) exp(-coef) lower .95 upper .95
age5 0.938 1.07 0.874 1.01
Rsquare= 0.005 (max possible= 1 )
Likelihood ratio test= 3.24 on 1 df, p=0.0719
Wald test = 3.2 on 1 df, p=0.0735
Score (logrank) test = 3.2 on 1 df, p=0.0735
beck10.ph<-coxph( Surv(time, censor) ~ beck10, uis, method="breslow", na.action=na.exclude)
summary(beck10.ph)
n=595 (33 observations deleted due to missing values)
coef exp(coef) se(coef) z p
beck10 0.11 1.12 0.0472 2.32 0.02
exp(coef) exp(-coef) lower .95 upper .95
beck10 1.12 0.896 1.02 1.22
Rsquare= 0.009 (max possible= 1 )
Likelihood ratio test= 5.32 on 1 df, p=0.0211
Wald test = 5.4 on 1 df, p=0.0201
Score (logrank) test = 5.41 on 1 df, p=0.0201
drug5.ph<-coxph( Surv(time, censor) ~ drug5, uis, method="breslow", na.action=na.exclude)
summary(drug5.ph)
n=611 (17 observations deleted due to missing values)
coef exp(coef) se(coef) z p
drug5 0.147 1.16 0.0375 3.92 0.000089
exp(coef) exp(-coef) lower .95 upper .95
drug5 1.16 0.863 1.08 1.25
Rsquare= 0.022 (max possible= 1 )
Likelihood ratio test= 13.4 on 1 df, p=0.000259
Wald test = 15.4 on 1 df, p=0.0000895
Score (logrank) test = 15.5 on 1 df, p=0.0000844
Table 5.3, p. 168.
full.ph <- coxph( Surv(time, censor) ~ age + becktota + ndrugtx + hercoc2 +
hercoc3 + hercoc4 + ivhx2 + ivhx3 + race + treat + site, uis, method="breslow",
na.action=na.exclude)
summary(full.ph)
<output omitted>
n=575 (53 observations deleted due to missing values)
coef exp(coef) se(coef) z p
age -0.02887 0.972 0.00817 -3.533 0.00041
becktota 0.00834 1.008 0.00498 1.677 0.09400
ndrugtx 0.02837 1.029 0.00831 3.416 0.00064
hercoc2 0.06532 1.067 0.15001 0.435 0.66000
hercoc3 -0.09362 0.911 0.16547 -0.566 0.57000
hercoc4 0.02798 1.028 0.16028 0.175 0.86000
ivhx2 0.17439 1.191 0.13864 1.258 0.21000
ivhx3 0.28071 1.324 0.14693 1.911 0.05600
race -0.20289 0.816 0.11669 -1.739 0.08200
treat -0.23995 0.787 0.09437 -2.543 0.01100
site -0.10249 0.903 0.10927 -0.938 0.35000
Rsquare= 0.08 (max possible= 1 )
Likelihood ratio test= 47.9 on 11 df, p=1.48e-006
Wald test = 48.8 on 11 df, p=1.04e-006
Score (logrank) test = 49.4 on 11 df, p=8.17e-007
Table 5.4, p. 168.
reduced1.ph <- coxph( Surv(time, censor) ~ age + becktota + ndrugtx +
ivhx2 + ivhx3 + race + treat + site, uis, method="breslow",
na.action=na.exclude)
summary(reduced1.ph)
<output omitted>
n=575 (53 observations deleted due to missing values)
coef exp(coef) se(coef) z p
age -0.02822 0.972 0.00817 -3.454 0.00055
becktota 0.00794 1.008 0.00497 1.598 0.11000
ndrugtx 0.02776 1.028 0.00829 3.351 0.00081
ivhx2 0.19599 1.217 0.13721 1.428 0.15000
ivhx3 0.33280 1.395 0.11991 2.775 0.00550
race -0.20925 0.811 0.11589 -1.805 0.07100
treat -0.23177 0.793 0.09371 -2.473 0.01300
site -0.09946 0.905 0.10854 -0.916 0.36000
Rsquare= 0.078 (max possible= 1 )
Likelihood ratio test= 46.5 on 8 df, p=1.9e-007
Wald test = 47.5 on 8 df, p=1.24e-007
Score (logrank) test = 48 on 8 df, p=9.92e-008
Table 5.5, p. 168.
reduced2.ph <- coxph( Surv(time, censor) ~ age + becktota + ndrugtx +
ivhx3 + race + treat + site, uis, method="breslow", na.action=na.exclude)
summary(reduced2.ph)
<output omitted>
n=575 (53 observations deleted due to missing values)
coef exp(coef) se(coef) z p
age -0.0262 0.974 0.00805 -3.249 0.0012
becktota 0.0084 1.008 0.00495 1.696 0.0900
ndrugtx 0.0291 1.030 0.00821 3.540 0.0004
ivhx3 0.2561 1.292 0.10630 2.409 0.0160
race -0.2245 0.799 0.11527 -1.947 0.0510
treat -0.2324 0.793 0.09373 -2.480 0.0130
site -0.0867 0.917 0.10786 -0.804 0.4200
Rsquare= 0.074 (max possible= 1 )
Likelihood ratio test= 44.5 on 7 df, p=1.7e-007
Wald test = 45.6 on 7 df, p=1.07e-007
Score (logrank) test = 46 on 7 df, p=8.67e-008
Table 5.6, p. 168.
cat.ph <- coxph(formula=Surv(time, censor) ~ agecat2+agecat3+agecat4+beckcat2+beckcat3+
beckcat4+drugcat2+drugcat3+drugcat4+ ivhx3+site+race+treat, method="breslow", uis,
na.action=na.exclude)
summary(cat.ph)
<output omitted>
n=575 (53 observations deleted due to missing values)
coef exp(coef) se(coef) z p
agecat2 0.0374 1.038 0.1317 0.284 0.7800
agecat3 -0.2410 0.786 0.1312 -1.836 0.0660
agecat4 -0.4004 0.670 0.1507 -2.657 0.0079
beckcat2 0.0464 1.048 0.1527 0.304 0.7600
beckcat3 0.1078 1.114 0.1250 0.863 0.3900
beckcat4 0.2098 1.233 0.1403 1.495 0.1300
drugcat2 -0.0915 0.913 0.1294 -0.707 0.4800
drugcat3 0.2427 1.275 0.1337 1.815 0.0690
drugcat4 0.3706 1.449 0.1404 2.639 0.0083
ivhx3 0.2417 1.273 0.1068 2.264 0.0240
site -0.0835 0.920 0.1085 -0.770 0.4400
race -0.2511 0.778 0.1164 -2.158 0.0310
treat -0.2173 0.805 0.0941 -2.309 0.0210
Rsquare= 0.079 (max possible= 1 )
Likelihood ratio test= 47.2 on 13 df, p=8.8e-006
Wald test = 47.4 on 13 df, p=8.24e-006
Score (logrank) test = 48 on 13 df, p=6.59e-006
Fig. 5.2a, p. 175.
The fitted line appears to be approximately linear thus there is no transformation of age needed.
age.ph <- coxph( Surv(time, censor) ~ becktota + ndrugtx +
ivhx3 + race + treat + site, uis, method="breslow", na.action=na.exclude)
uis$resid <- residuals.coxph(age.ph, type="martingale")
plot(uis$age, uis$resid, xlab="Age",ylab="Martingale Residuals", ylim=c(-4, 1.0))
lines(lowess(uis$age[!is.na(uis$age) & !is.na(uis$resid)],
uis$resid[!is.na(uis$age) & !is.na(uis$resid)]))
Fig. 5.3a, p. 176.
The fitted line appears to be approximately linear thus there is no transformation of becktota needed.
becktota.ph <- coxph( Surv(time, censor) ~ age + ndrugtx +
ivhx3 + race + treat + site, uis, method="breslow", na.action=na.exclude)
uis$resid <- residuals.coxph(becktota.ph, type="martingale")
plot(uis$becktota, uis$resid, xlab="Becktota",ylab="Martingale Residuals", ylim=c(-4, 1.0))
lines(lowess(uis$becktota[!is.na(uis$becktota) & !is.na(uis$resid)],
uis$resid[!is.na(uis$becktota) & !is.na(uis$resid)]))
Fig. 5.4a, p. 177.
The fitted line has a noticeable squiggle in the beginning which makes us very nervous and it is an indicator that we can't just include ndrugtx in the model; we should transform it before including it in the final model.
ndrugtx.ph <- coxph(formula = Surv(time, censor) ~ age + becktota +
ivhx3 + race + treat + site, data=uis, method="breslow",
na.action=na.exclude)
uis$resid <- residuals.coxph(ndrugtx.ph, type="martingale")
plot(uis$ndrugtx, uis$resid, xlab="Ndrugtx",ylab="Martingale Residuals", ylim=c(-4, 1.0))
lines(lowess(uis$ndrugtx[!is.na(uis$ndrugtx) & !is.na(uis$resid)],
uis$resid[!is.na(uis$ndrugtx) & !is.na(uis$resid)]))
Creating ndrugfp1 and ndrugfp2, p. 174.
uis$ndrugfp1 <- 1/((uis$ndrugtx+1)/10) uis$ndrugfp2 <- (1/((uis$ndrugtx+1)/10))*log((uis$ndrugtx+1)/10)
Table 5.8, p. 174.
reduced3.ph <- coxph( Surv(time, censor)~ age + becktota + ndrugfp1 + ndrugfp2 +
ivhx3 + race + treat + site, uis, method="breslow", na.action=na.exclude)
summary(reduced3.ph)
<output omitted>
n=575 (53 observations deleted due to missing values)
coef exp(coef) se(coef) z p
age -0.02815 0.972 0.00813 -3.461 0.000540
becktota 0.00916 1.009 0.00499 1.837 0.066000
ndrugfp1 -0.52284 0.593 0.12441 -4.202 0.000026
ndrugfp2 -0.19478 0.823 0.04825 -4.037 0.000054
ivhx3 0.25858 1.295 0.10802 2.394 0.017000
race -0.24220 0.785 0.11547 -2.098 0.036000
treat -0.21084 0.810 0.09369 -2.250 0.024000
site -0.10532 0.900 0.10915 -0.965 0.330000
Rsquare= 0.086 (max possible= 1 )
Likelihood ratio test= 51.4 on 8 df, p=2.17e-008
Wald test = 51.4 on 8 df, p=2.17e-008
Score (logrank) test = 52 on 8 df, p=1.72e-008
Creating the interaction variables, p. 178.
uis$agesite <- uis$age*uis$site uis$racesite <- uis$race*uis$site uis$agefp1 <- uis$age*uis$ndrugfp1 uis$agefp2 <- uis$age*uis$ndrugfp2
Table 5.10, p. 179.
interaction.ph <- coxph(formula=Surv(time, censor) ~ age + becktota + + ndrugfp1 + ndrugfp2 +
ivhx3 + race + treat + site + agesite + racesite + agefp1 + agefp2,
data=uis, method="breslow", na.action=na.exclude)
summary(interaction.ph)
<output omitted>
n=575 (53 observations deleted due to missing values)
coef exp(coef) se(coef) z p
age -0.05431 0.947 0.02803 -1.9372 0.05300
becktota 0.01005 1.010 0.00499 2.0129 0.04400
ndrugfp1 -0.67435 0.509 0.64447 -1.0464 0.30000
ndrugfp2 -0.17217 0.842 0.25234 -0.6823 0.50000
ivhx3 0.22935 1.258 0.10793 2.1250 0.03400
race -0.48774 0.614 0.13461 -3.6233 0.00029
treat -0.24205 0.785 0.09455 -2.5600 0.01000
site -1.11940 0.326 0.54607 -2.0499 0.04000
agesite 0.02644 1.027 0.01658 1.5949 0.11000
racesite 0.86274 2.370 0.24810 3.4773 0.00051
agefp1 0.00163 1.002 0.01945 0.0838 0.93000
agefp2 -0.00198 0.998 0.00766 -0.2589 0.80000
Rsquare= 0.119 (max possible= 1 )
Likelihood ratio test= 73.1 on 12 df, p=8.3e-011
Wald test = 73 on 12 df, p=8.61e-011
Score (logrank) test = 73.9 on 12 df, p=5.99e-011
Table 5.11, p. 179.
reducedinter.ph <- coxph(formula=Surv(time, censor) ~ age + becktota + + ndrugfp1 + ndrugfp2 +
ivhx3 + race + treat + site + agesite + racesite, uis, method="breslow", na.action=na.exclude)
summary(reducedinter.ph)
<output omitted>
n=575 (53 observations deleted due to missing values)
coef exp(coef) se(coef) z p
age -0.04138 0.959 0.00991 -4.17 3.0e-005
becktota 0.00874 1.009 0.00497 1.76 7.8e-002
ndrugfp1 -0.57473 0.563 0.12519 -4.59 4.4e-006
ndrugfp2 -0.21470 0.807 0.04859 -4.42 9.9e-006
ivhx3 0.22776 1.256 0.10857 2.10 3.6e-002
race -0.46661 0.627 0.13475 -3.46 5.3e-004
treat -0.24667 0.781 0.09434 -2.61 8.9e-003
site -1.31517 0.268 0.53143 -2.47 1.3e-002
agesite 0.03235 1.033 0.01608 2.01 4.4e-002
racesite 0.85014 2.340 0.24774 3.43 6.0e-004
Rsquare= 0.11 (max possible= 1 )
Likelihood ratio test= 67.1 on 10 df, p=1.58e-010
Wald test = 66.5 on 10 df, p=2.07e-010
Score (logrank) test = 67.4 on 10 df, p=1.38e-010
UCLA Researchers are invited to our Statistical Consulting Services
We recommend others to our list of Other Resources for Statistical Computing Help
These pages are Copyrighted (c) by UCLA Academic Technology Services