***************************************************************************************************************; * SurvC1_SAS - SAS language implementation of R macro for calculating cscore making use of time to event ; * information; * * Version 1.0, 05-Feb-2013 * * Brian Starr, Statistical Analyst (external) at Boehringer-Ingelheim.; * brian.starr.ext@boehringer-ingelheim.com * * Based on original macro, SurvC1 written by Hajime Uno, PhD, Harvard school of public health; * http://www.hsph.harvard.edu/hajime-uno/ * * R package location: * http://cran.r-project.org/web/packages/survC1/index.html * * * Free use permitted. Please acknowledge authors if included or utilized in published works.; ***************************************************************************************************************; * CScore: Macro for non-parametric calculation of c-scores using time to event data; * Parameters:; * dsin - input dataset name with time to event, score and category fields; * dsout - output dataset name with c-score, confidence interval, STD and category field. (default=cscore); * iterations - number of iterations for bootstraped confidence interval calculation (default=500); * cats - Optional numeric or character field allows input data to be separated into analysis by groups, ; * such as treatment, etc. Can be a composite of multiple fields, in a concatenated character string.; * score - The risk ranking field whose c-score is being assessed; * where - optional condition for selecting a sub-set of the data for analysis.; * seed - optional seed value for the confidence interval bootstrap. Default=0 (from system time.); * evt - the numeric field that specifies if event occured (1), or subject was censored (0). Default=evt; * time - the numeric field that specified the time to event or censoring; * Note: Wherever the term "field" was used, any valid SAS expression of the correct type may be substituted; *Recommended options:; * options nofmterr threads compress=yes; * Example call:; * %cscore(dsin=heartpatients, score=Framingham, evt=MI, TimeToMI); %macro cscore(dsin=, dsout=cscore, iterations=500, cats=1, score=, where=1, seed=0, evt=evt, time=time); data evtdata; set &dsin(where=(&where)); score=(&score); if not missing(score); cats=(&cats); evt=(&evt); time=(&time); replicate=0; keep replicate cats score time evt; run; proc sort data=evtdata; by cats time score evt; run; * Create bootstrap: note, in following code, replicate holds the bootstrap iteration or 0 for original data.; proc surveyselect data=evtdata(drop=replicate) out=boot1 noprint seed=&seed method=urs samprate=1 rep=&iterations; run; proc sort data=boot1; by Replicate cats time score evt; run; * Compress any indistinguishable patient records into 1 record plus a count (numberhits).; data evtdata1; set evtdata(in=b) boot1(rename=(numberhits=_numberhits)); by Replicate cats time score evt; if (first.evt) then numberhits=0; if (b) then numberhits+1; else numberhits+_numberhits; if last.evt; keep Replicate cats score time evt numberhits; run; proc sort data=evtdata1 out=dist(keep=Replicate cats time) nodupkey; by Replicate cats time; run; * Create horizontal array of unique times, plus placeholders yi and di. N has the dimension.; data dist1; set dist; by Replicate cats; retain max; if first.cats then n=0; n+1; var="time"; output; var="yi"; time=0; output; var="di"; output; if last.cats and replicate=0; max=max(max,n); if last.replicate then call symput("maxn",strip(put(n,best.))); run; proc transpose data=dist1 out=dist2(drop=_:); by Replicate cats; var time; id var n; run; * Create the weights by time. See Hajime Uno paper for details.; data weights; merge dist2 evtdata1(keep=Replicate cats time NumberHits evt); by Replicate cats; array timen{*} time1-time&maxn; array yin{*} yi1-yi&maxn; array din{*} di1-di&maxn; do n=1 to &maxn while ((not missing(timen(n))) and timen(n)<=time); if (time=timen(n) and evt=0) then din(n)=din(n)+NumberHits; yin(n)=yin(n)+NumberHits; end; if (last.cats) then do; n=1; do n=1 to &maxn while (not missing(timen(n))); time=timen(n); if (n=1) then do; surv=1; wi=1; nel_wk=0; output; end; else do; surv=surv*(1-nel_wk); wi=1/(surv*surv); output; end; nel_wk=din(n)/yin(n); end; end; keep Replicate cats time wi; run; * Merge the weights with times where an event occured. Turn into a horizontal array with maximum dimension; * maxn2.; data hadevent; merge evtdata1(in=b where=(evt)) weights; by Replicate cats time; retain max; if first.cats then n=0; if b then do; n+1; numberhits=numberhits*wi; output; end; if last.cats and replicate=0; max=max(max,n); if last.replicate then call symput("maxn2",strip(put(n,best.))); run; proc transpose data=hadevent name=name out=hadevent1; by Replicate cats n; var time score numberhits; run; proc transpose data=hadevent1 out=hadevent2(drop=_:); by Replicate cats; id name n; var col1; run; * Calculate the cscores.; data cscore1; merge evtdata1 hadevent2; by Replicate cats; array timen{*} time1-time&maxn2; array scoren{*} score1-score&maxn2; array numberhitsn{*} numberhits1-numberhits&maxn2; if first.cats then do; t_n=0; t_score=0; end; do n=1 to &maxn2 while ((timen(n)+evt)<=time and not missing(timen(n))); nhtemp=numberhits*numberhitsn(n); t_n+nhtemp; t_score+nhtemp*((scoren(n)>=score)+(scoren(n)>score)); end; if last.cats; cscore=t_score/(2*t_n); keep Replicate cats cscore; run; proc sort data=cscore1; by cats; run; * Calculate the bootstrapped confidence interval and STD.; proc univariate data=cscore1 noprint; by cats; var cscore; output out=cscore2 pctlpts=2.5 97.5 pctlpre=cscore_ pctlname=lcl ucl std=cscore_std; run; data &dsout; merge cscore1(where=(Replicate=0)) cscore2; by cats; format cscore cscore_lcl cscore_ucl cscore_std 5.3; drop replicate; run; %mend cscore;