options nocenter nonumber nodate;
*Note: the herco variable was created: hercoc=1 or 3 then herco=1 else herco=hercoc;

*Getting the data into SAS.;
data uis;
  set 'c:\survival\uis_small';
run;
*Looking at the first 10 obs of the data.;
proc print data=uis (obs=10);
run;
*Univariate test and K-M curve for categorical variables.;
proc lifetest data=uis plots=(s);
  time time*censor(0);
  strata treat;
run;
proc lifetest data=uis plots=(s);
  time time*censor(0);
  strata site;
run;
proc lifetest data=uis plots=(s);
  time time*censor(0);
  strata herco;
run;
*Univariate tests for continuous variables, no K-M curves.;
proc phreg data=uis;
  model time*censor(0) = ndrugtx;
run;
proc phreg data=uis;
  model time*censor(0) = age;
run;
*Every predictor in the big model;
proc phreg data=uis;
  model time*censor(0) = age ndrugtx treat site herco2 herco3;
  herco2 = herco=2;
  herco3 = herco=3;
  herco: test herco2, herco3;
run;
*Drop herco since not sig.;
*We keep site because we know from previous research that it is very important;
*It could be just by chance that in our data it is not sig.;
*Without herco it has smaller p-value;
proc phreg data=uis;
  model time*censor(0) = age ndrugtx  treat site;
run;
*Looking at all the interactions.;
*Often in medical data there is no prior research done and we have do to some "fishing".;
proc phreg data=uis;
  model time*censor(0) = age ndrugtx treat site drugage;
  drugage = ndrugtx*age;
run;
proc phreg data=uis;
  model time*censor(0) = age ndrugtx treat site drugtreat;
  drugtreat = ndrugtx*treat;
run;
proc phreg data=uis;
  model time*censor(0) = age ndrugtx treat site drugsite;
  drugsite = ndrugtx*site;
run;
proc phreg data=uis;
  model time*censor(0) = age ndrugtx treat site agetreat;
  agetreat = age*treat;
run;
proc phreg data=uis;
  model time*censor(0) = age ndrugtx treat site agesite;
  agesite = age*site;
run;
proc phreg data=uis;
  model time*censor(0) = age ndrugtx treat site treatsite;
  treatsite = treat*site;
run;
*Final model with interactions;
*Now, we see that it was important to include site just as research indicated;
*since it is exactly the interaction with site that was sig.;
proc phreg data=uis;
  model time*censor(0) = age ndrugtx treat site agesite;
  agesite = age*site;
run;
*Testing for proportionality using time-dependent variables.;
proc phreg data=uis;
model time*censor(0) = age ndrugtx treat site agesite aget drugt treatt sitet;
  agesite = age*site;
  aget = age*log(time);
  drugt = ndrugtx*log(time);
  treatt = treat*log(time);
  sitet = site*log(time);
  test_proportionality: test aget, drugt, treatt, sitet;
run;
*Conclusion: all are proportional.;
*If we had a problem, two solutions: use stratification or parametric models.;
*Example, 'spoze that treat was not proportional then stratify on treat;
*Restraint is that effects are equal across strata but baseline hazards differ.;
proc phreg data=sorted;
  model time*censor(0) = age ndrugtx site agesite; 
  agesite = age*site;
  strata treat;
run; 
*Compare stratified model to non-stratified model;
proc phreg data=uis;
  model time*censor(0) = age ndrugtx treat site agesite; 
  agesite = age*site;
run; 
*One of the down-falls of SAS over other packages is that it is not easy to get;
*it to produce a nice graph after using the phreg procedure.;
*The following code will produce the survival function for a specific group of subjects.;
*Remember that each covariate pattern will have its own distinct survival function. ;
*We want to look at the survival function for people who are 30 yrs old, have only 5 ;
*previous drug treatments, received their treatment at site A and received the long treat. ;
data cov_treat1;
  age = 30;
  ndrugtx = 5;
  treat = 1;
  site = 0;
  agesite = 0;
run;
proc phreg data=uis noprint;
  model time*censor(0) = age ndrugtx treat site agesite; 
  agesite = age*site;
  baseline out=surv1 covariates=cov_treat1 survival=surv / nomean;
run;
goptions reset=all;
symbol1 c=red v=triangle h=.8 i=stepjll;
symbol2 c=blue v=circle h=.8 i=stepjll;
axis1 label=(a=90 'Survivorship function');
proc gplot data=surv1;
 plot surv*time / vaxis=axis1;
run;
quit;
* To look at survival functions for different groups we need to output baseline function ;
* multiple times, merge the data sets and the plot them.;
*Let's compare the survival function of the same people as above but for the short treatment ;
*group;
data cov_treat0;
  age = 30;
  ndrugtx = 5;
  treat = 0;
  site = 0;
  agesite = 0;
run;
proc phreg data=uis noprint;
  model time*censor(0) = age ndrugtx treat site agesite; 
  agesite = age*site;
  baseline out=surv0 covariates=cov_treat0 survival=surv / nomean;
run;
*Stacking the two data sets.;
data combo;
  set surv1 surv0;
run;
goptions reset=all;
symbol1 c=red v=triangle h=.8 i=stepjll;
symbol2 c=blue v=circle h=.8 i=stepjll;
axis1 label=(a=90 'Survivorship function');
proc gplot data=combo;
 plot surv*time=treat / vaxis=axis1;
run;
quit;
*We could have obtained the survival functions for both levels of treat (holding all ;
*other variables constant) in fewer steps.  First, we create a data set which contains both ;
*covariate patterns.  Then we use this data set in the covariate option in the baseline;
*statement in proc phreg which will generate the survival function for both covariate ;
*patterns in one long data set.;
data cov_treat;
  if _n_= 1 then do;
   age = 30;
   ndrugtx = 5;
   treat = 1;
   site = 0;
   agesite = 0;
  end;
  output;
  if _n_= 1 then do;
   age = 30;
   ndrugtx = 5;
   treat = 0;
   site = 0;
   agesite = 0;
  end;
  output;
run;
proc phreg data=uis noprint;
  model time*censor(0) = age ndrugtx treat site agesite; 
  agesite = age*site;
  baseline out=surv covariates=cov_treat survival=surv / nomean;
run;
goptions reset=all;
symbol1 c=red v=triangle h=.8 i=stepjll;
symbol2 c=blue v=circle h=.8 i=stepjll;
axis1 label=(a=90 'Survivorship function');
proc gplot data=surv;
 plot surv*time=treat / vaxis=axis1;
run;
quit;
*Including the last censored observation.;
*Note the last censored data is missing but we can fill that in to make the graph look nice. ;
*All we have to do is include an extra data point for each group where time=max(time) and ;
*survival = last survival value. ;
*First we figure out the value of the survival time at the next to last time point;
proc print data=combo ;
  where time > 600;
run;
*Then we want to determine the last time point for each level of treat.;
proc sort data=uis out=sorted;
  by treat;
run;
proc means data=sorted max;
  by treat;
  var time;
run;
*Add the two extra observations to the data set.;
data combo1;
  if _n_ = 1 then do;
  treat=0;
  time = 805;
  surv =  0.08429;
  treat = 0; 
  output;
  treat=1;
  time = 1172;
  surv = 0.15060;
  output;
  end;
  set combo;
  output;
run;
*To verify that everything went as expected.;
proc print data=combo1 ;
  where time > 600;
run;
proc sort data=combo1;
  by time;
run;
goptions reset=all;
symbol1 c=red v=triangle h=.8 i=stepjll;
symbol2 c=blue v=circle h=.8 i=stepjll;
axis1 label=(a=90 'Survivorship function');
proc gplot data=combo1;
 plot surv*time=treat / vaxis=axis1 ;
run;
quit;

