Title: | Risk Regression Models and Prediction Scores for Survival Analysis with Competing Risks |
---|---|
Description: | Implementation of the following methods for event history analysis. Risk regression models for survival endpoints also in the presence of competing risks are fitted using binomial regression based on a time sequence of binary event status variables. A formula interface for the Fine-Gray regression model and an interface for the combination of cause-specific Cox regression models. A toolbox for assessing and comparing performance of risk predictions (risk markers and risk prediction models). Prediction performance is measured by the Brier score and the area under the ROC curve for binary possibly time-dependent outcome. Inverse probability of censoring weighting and pseudo values are used to deal with right censored data. Lists of risk markers and lists of risk models are assessed simultaneously. Cross-validation repeatedly splits the data, trains the risk prediction models on one part of each split and then summarizes and compares the performance across splits. |
Authors: | Thomas Alexander Gerds [aut, cre], Johan Sebastian Ohlendorff [aut], Paul Blanche [ctb], Rikke Mortensen [ctb], Marvin Wright [ctb], Nikolaj Tollenaar [ctb], John Muschelli [ctb], Ulla Brasch Mogensen [ctb], Brice Ozenne [aut] |
Maintainer: | Thomas Alexander Gerds <[email protected]> |
License: | GPL (>= 2) |
Version: | 2023.12.21 |
Built: | 2024-11-14 06:54:14 UTC |
Source: | CRAN |
Comparison of risk differences or risk ratios over all timepoints.
## S3 method for class 'ate' anova( object, allContrast = NULL, type = "diff", estimator = object$estimator[1], test = "CvM", transform = NULL, alternative = "two.sided", n.sim = 10000, print = TRUE, ... )
## S3 method for class 'ate' anova( object, allContrast = NULL, type = "diff", estimator = object$estimator[1], test = "CvM", transform = NULL, alternative = "two.sided", n.sim = 10000, print = TRUE, ... )
object |
A |
allContrast |
[matrix] contrast for which the risks should be compared. Matrix with two rows, the first being the sequence of reference treatments and the second the sequence of alternative treatments. |
type |
[character vector] the functionnal used to compare the risks: |
estimator |
[character] The type of estimator relative to which the comparison should be performed. |
test |
[character] The type of statistic used to compare the risks over times:
|
transform |
[character] Should a transformation be used, e.g. the test is performed after log-transformation of the estimate, standard error, and influence function. |
alternative |
[character] a character string specifying the alternative hypothesis, must be one of |
n.sim |
[integer, >0] the number of simulations used to compute the p-values. |
print |
[logical] should the results be displayed? |
... |
Not used. |
Experimental!!!
library(survival) library(data.table) ## Not run: ## simulate data set.seed(12) n <- 200 dtS <- sampleData(n,outcome="survival") dtS$X12 <- LETTERS[as.numeric(as.factor(paste0(dtS$X1,dtS$X2)))] dtS <- dtS[dtS$X12!="D"] ## model fit fit <- cph(formula = Surv(time,event)~ X1+X6,data=dtS,y=TRUE,x=TRUE) seqTime <- 1:10 ateFit <- ate(fit, data = dtS, treatment = "X1", contrasts = NULL, times = seqTime, B = 0, iid = TRUE, se = TRUE, verbose = TRUE, band = TRUE) ## display autoplot(ateFit) ## inference (two sided) statistic <- ateFit$diffRisk$estimate/ateFit$diffRisk$se confint(ateFit, p.value = TRUE, method.band = "bonferroni")$diffRisk confint(ateFit, p.value = TRUE, method.band = "maxT-simulation")$diffRisk anova(ateFit, test = "KS") anova(ateFit, test = "CvM") anova(ateFit, test = "sum") ## manual calculation (one sided) n.sim <- 1e4 statistic <- ateFit$diffRisk[, estimate/se] iid.norm <- scale(ateFit$iid$GFORMULA[["1"]]-ateFit$iid$GFORMULA[["0"]], scale = ateFit$diffRisk$se) ls.out <- lapply(1:n.sim, function(iSim){ iG <- rnorm(NROW(iid.norm)) iCurve <- t(iid.norm) %*% iG data.table(max = max(iCurve), L2 = sum(iCurve^2), sum = sum(iCurve), maxC = max(iCurve) - max(statistic), L2C = sum(iCurve^2) - sum(statistic^2), sumC = sum(iCurve) - sum(statistic), sim = iSim) }) dt.out <- do.call(rbind,ls.out) dt.out[,.(max = mean(.SD$maxC>=0), L2 = mean(.SD$L2C>=0), sum = mean(.SD$sumC>=0))] ## permutation n.sim <- 250 stats.perm <- vector(mode = "list", length = n.sim) pb <- txtProgressBar(max = n.sim, style=3) treatVar <- ateFit$variables["treatment"] for(iSim in 1:n.sim){ ## iSim <- 1 iData <- copy(dtS) iIndex <- sample.int(NROW(iData), replace = FALSE) iData[, c(treatVar) := .SD[[treatVar]][iIndex]] iFit <- update(fit, data = iData) iAteSim <- ate(iFit, data = iData, treatment = treatVar, times = seqTime, verbose = FALSE) iStatistic <- iAteSim$diffRisk[,estimate/se] stats.perm[[iSim]] <- cbind(iAteSim$diffRisk[,.(max = max(iStatistic), L2 = sum(iStatistic^2), sum = sum(iStatistic))], sim = iSim) stats.perm[[iSim]]$maxC <- stats.perm[[iSim]]$max - max(statistic) stats.perm[[iSim]]$L2C <- stats.perm[[iSim]]$L2 - sum(statistic^2) stats.perm[[iSim]]$sumC <- stats.perm[[iSim]]$sum - sum(statistic) setTxtProgressBar(pb, iSim) } dtstats.perm <- do.call(rbind,stats.perm) dtstats.perm[,.(max = mean(.SD$maxC>=0), L2 = mean(.SD$L2C>=0), sum = mean(.SD$sumC>=0))] ## End(Not run)
library(survival) library(data.table) ## Not run: ## simulate data set.seed(12) n <- 200 dtS <- sampleData(n,outcome="survival") dtS$X12 <- LETTERS[as.numeric(as.factor(paste0(dtS$X1,dtS$X2)))] dtS <- dtS[dtS$X12!="D"] ## model fit fit <- cph(formula = Surv(time,event)~ X1+X6,data=dtS,y=TRUE,x=TRUE) seqTime <- 1:10 ateFit <- ate(fit, data = dtS, treatment = "X1", contrasts = NULL, times = seqTime, B = 0, iid = TRUE, se = TRUE, verbose = TRUE, band = TRUE) ## display autoplot(ateFit) ## inference (two sided) statistic <- ateFit$diffRisk$estimate/ateFit$diffRisk$se confint(ateFit, p.value = TRUE, method.band = "bonferroni")$diffRisk confint(ateFit, p.value = TRUE, method.band = "maxT-simulation")$diffRisk anova(ateFit, test = "KS") anova(ateFit, test = "CvM") anova(ateFit, test = "sum") ## manual calculation (one sided) n.sim <- 1e4 statistic <- ateFit$diffRisk[, estimate/se] iid.norm <- scale(ateFit$iid$GFORMULA[["1"]]-ateFit$iid$GFORMULA[["0"]], scale = ateFit$diffRisk$se) ls.out <- lapply(1:n.sim, function(iSim){ iG <- rnorm(NROW(iid.norm)) iCurve <- t(iid.norm) %*% iG data.table(max = max(iCurve), L2 = sum(iCurve^2), sum = sum(iCurve), maxC = max(iCurve) - max(statistic), L2C = sum(iCurve^2) - sum(statistic^2), sumC = sum(iCurve) - sum(statistic), sim = iSim) }) dt.out <- do.call(rbind,ls.out) dt.out[,.(max = mean(.SD$maxC>=0), L2 = mean(.SD$L2C>=0), sum = mean(.SD$sumC>=0))] ## permutation n.sim <- 250 stats.perm <- vector(mode = "list", length = n.sim) pb <- txtProgressBar(max = n.sim, style=3) treatVar <- ateFit$variables["treatment"] for(iSim in 1:n.sim){ ## iSim <- 1 iData <- copy(dtS) iIndex <- sample.int(NROW(iData), replace = FALSE) iData[, c(treatVar) := .SD[[treatVar]][iIndex]] iFit <- update(fit, data = iData) iAteSim <- ate(iFit, data = iData, treatment = treatVar, times = seqTime, verbose = FALSE) iStatistic <- iAteSim$diffRisk[,estimate/se] stats.perm[[iSim]] <- cbind(iAteSim$diffRisk[,.(max = max(iStatistic), L2 = sum(iStatistic^2), sum = sum(iStatistic))], sim = iSim) stats.perm[[iSim]]$maxC <- stats.perm[[iSim]]$max - max(statistic) stats.perm[[iSim]]$L2C <- stats.perm[[iSim]]$L2 - sum(statistic^2) stats.perm[[iSim]]$sumC <- stats.perm[[iSim]]$sum - sum(statistic) setTxtProgressBar(pb, iSim) } dtstats.perm <- do.call(rbind,stats.perm) dtstats.perm[,.(max = mean(.SD$maxC>=0), L2 = mean(.SD$L2C>=0), sum = mean(.SD$sumC>=0))] ## End(Not run)
data.table
Turn ate object into a data.table
.
## S3 method for class 'ate' as.data.table( x, keep.rownames = FALSE, estimator = x$estimator, type = c("meanRisk", "diffRisk", "ratioRisk"), ... )
## S3 method for class 'ate' as.data.table( x, keep.rownames = FALSE, estimator = x$estimator, type = c("meanRisk", "diffRisk", "ratioRisk"), ... )
x |
object obtained with function |
keep.rownames |
Not used. |
estimator |
[character] The type of estimator relative to which the estimates should be output. |
type |
[character vector] The type of risk to export.
Can be |
... |
Not used. |
data.table
Turn influenceTest object into a data.table
.
## S3 method for class 'influenceTest' as.data.table(x, keep.rownames = FALSE, se = TRUE, ...)
## S3 method for class 'influenceTest' as.data.table(x, keep.rownames = FALSE, se = TRUE, ...)
x |
object obtained with function |
keep.rownames |
Not used. |
se |
[logical] Should standard errors/quantile for confidence bands be displayed? |
... |
Not used. |
data.table
Turn predictCox object into a data.table
.
## S3 method for class 'predictCox' as.data.table(x, keep.rownames = FALSE, se = TRUE, ...)
## S3 method for class 'predictCox' as.data.table(x, keep.rownames = FALSE, se = TRUE, ...)
x |
object obtained with function |
keep.rownames |
Not used. |
se |
[logical] Should standard errors/quantile for confidence bands be displayed? |
... |
Not used. |
data.table
Turn predictCSC object into a data.table
.
## S3 method for class 'predictCSC' as.data.table(x, keep.rownames = FALSE, se = TRUE, ...)
## S3 method for class 'predictCSC' as.data.table(x, keep.rownames = FALSE, se = TRUE, ...)
x |
object obtained with function |
keep.rownames |
not used |
se |
should standard errors/quantile for confidence bands be displayed? |
... |
not used |
Use the g-formula or the IPW or the double robust estimator to estimate the average treatment effect (absolute risk difference or ratio) based on Cox regression with or without competing risks.
ate( event, treatment, censor = NULL, data, data.index = NULL, formula = NULL, estimator = NULL, strata = NULL, contrasts = NULL, allContrasts = NULL, times, cause = NA, landmark = NULL, se = TRUE, iid = (B == 0) && (se || band), known.nuisance = FALSE, band = FALSE, B = 0, seed, handler = "foreach", mc.cores = 1, cl = NULL, verbose = TRUE, ... )
ate( event, treatment, censor = NULL, data, data.index = NULL, formula = NULL, estimator = NULL, strata = NULL, contrasts = NULL, allContrasts = NULL, times, cause = NA, landmark = NULL, se = TRUE, iid = (B == 0) && (se || band), known.nuisance = FALSE, band = FALSE, B = 0, seed, handler = "foreach", mc.cores = 1, cl = NULL, verbose = TRUE, ... )
event |
Outcome model which describes how the probability of experiencing a terminal event depends
on treatment and covariates. The object carry its own call and
have a |
treatment |
Treatment model which describes how the probability of being allocated to a treatment group depends
on covariates. The object must be a |
censor |
Censoring model which describes how the probability of being censored depends
on treatment and covariates. The object must be a |
data |
[data.frame or data.table] Data set in which to evaluate risk predictions based on the outcome model |
data.index |
[numeric vector] Position of the observation in argument data relative to the dataset used to obtain the argument event, treatment, censor. Only necessary for the standard errors when computing the Average Treatment Effects on a subset of the data set. |
formula |
For analyses with time-dependent covariates, the response formula. See examples. |
estimator |
[character] The type of estimator used to compute the average treatment effect.
Can be |
strata |
[character] Strata variable on which to compute the average risk. Incompatible with treatment. Experimental. |
contrasts |
[character vector] levels of the treatment variable for which the risks should be assessed and compared. Default is to consider all levels. |
allContrasts |
[2-row character matrix] levels of the treatment variable to be compared. Default is to consider all pairwise comparisons. |
times |
[numeric vector] Time points at which to evaluate average treatment effects. |
cause |
[integer/character] the cause of interest. |
landmark |
for models with time-dependent covariates the landmark time(s) of evaluation.
In this case, argument |
se |
[logical] If |
iid |
[logical] If |
known.nuisance |
[logical] If |
band |
[logical] If |
B |
[integer, >0] the number of bootstrap replications used to compute the confidence intervals. If it equals 0, then the influence function is used to compute Wald-type confidence intervals/bands. |
seed |
[integer, >0] sed number used to generate seeds for bootstrap and to achieve reproducible results. |
handler |
[character] Parallel handler for bootstrap.
|
mc.cores |
[integer, >0] The number of cores to use,
i.e., the upper limit for the number of child processes that run simultaneously.
Passed to |
cl |
A parallel socket cluster used to perform cluster calculation in parallel (output by |
verbose |
[logical] If |
... |
passed to predictRisk |
Brice Ozenne [email protected] and Thomas Alexander Gerds [email protected]
Brice Maxime Hugues Ozenne, Thomas Harder Scheike, Laila Staerk, and Thomas Alexander Gerds. On the estimation of average treatment effects with right- censored time to event outcome and competing risks. Biometrical Journal, 62 (3):751–763, 2020.
as.data.table
to extract the estimates in a data.table
object.
autoplot.ate
for a graphical representation the standardized risks.
confint.ate
to compute (pointwise/simultaneous) confidence intervals and (unadjusted/adjusted) p-values, possibly using a transformation.
summary.ate
for a table containing the standardized risks over time and treatment/strata.
library(survival) library(rms) library(prodlim) library(data.table) set.seed(10) #### Survival settings #### #### ATE with Cox model #### ## generate data n <- 100 dtS <- sampleData(n, outcome="survival") dtS$time <- round(dtS$time,1) dtS$X1 <- factor(rbinom(n, prob = c(0.3,0.4) , size = 2), labels = paste0("T",0:2)) ## estimate the Cox model fit <- cph(formula = Surv(time,event)~ X1+X2,data=dtS,y=TRUE,x=TRUE) ## compute the ATE at times 5, 6, 7, and 8 using X1 as the treatment variable ## standard error computed using the influence function ## confidence intervals / p-values based on asymptotic results ateFit1a <- ate(fit, data = dtS, treatment = "X1", times = 5:8) summary(ateFit1a) summary(ateFit1a, short = TRUE, type = "meanRisk") summary(ateFit1a, short = TRUE, type = "diffRisk") summary(ateFit1a, short = TRUE, type = "ratioRisk") ## Not run: ## same as before with in addition the confidence bands / adjusted p-values ## (argument band = TRUE) ateFit1b <- ate(fit, data = dtS, treatment = "X1", times = 5:8, band = TRUE) summary(ateFit1b) ## by default bands/adjuste p-values computed separately for each treatment modality summary(ateFit1b, band = 1, se = FALSE, type = "diffRisk", short = TRUE, quantile = TRUE) ## adjustment over treatment and time using the band argument of confint summary(ateFit1b, band = 2, se = FALSE, type = "diffRisk", short = TRUE, quantile = TRUE) ## confidence intervals / p-values computed using 1000 bootstrap samples ## (argument se = TRUE and B = 1000) ateFit1c <- ate(fit, data = dtS, treatment = "X1", times = 5:8, se = TRUE, B = 50, handler = "mclapply") ## NOTE: for real applications 50 bootstrap samples is not enough ## same but using 2 cpus for generating and analyzing the bootstrap samples ## (parallel computation, argument mc.cores = 2) ateFit1d <- ate(fit, data = dtS, treatment = "X1", times = 5:8, se = TRUE, B = 50, mc.cores = 2) ## manually defining the cluster to be used ## useful when specific packages need to be loaded in each cluster fit <- cph(formula = Surv(time,event)~ X1+X2+rcs(X6),data=dtS,y=TRUE,x=TRUE) cl <- parallel::makeCluster(2) parallel::clusterEvalQ(cl, library(rms)) ateFit1e <- ate(fit, data = dtS, treatment = "X1", times = 5:8, se = TRUE, B = 50, handler = "foreach", cl = cl) ## End(Not run) #### Survival settings without censoring #### #### ATE with glm #### ## generate data n <- 100 dtB <- sampleData(n, outcome="binary") dtB[, X2 := as.numeric(X2)] ## estimate a logistic regression model fit <- glm(formula = Y ~ X1+X2, data=dtB, family = "binomial") ## compute the ATE using X1 as the treatment variable ## only point estimate (argument se = FALSE) ateFit1a <- ate(fit, data = dtB, treatment = "X1", se = FALSE) ateFit1a ## Not run: ## with confidence intervals ateFit1b <- ate(fit, data = dtB, treatment = "X1", times = 5) ## just for having a nice output not used in computations summary(ateFit1b, short = TRUE) ## using the lava package library(lava) ateLava <- estimate(fit, function(p, data){ a <- p["(Intercept)"] ; b <- p["X11"] ; c <- p["X2"] ; R.X11 <- expit(a + b + c * data[["X2"]]) R.X10 <- expit(a + c * data[["X2"]]) list(risk0=R.X10,risk1=R.X11,riskdiff=R.X11-R.X10)}, average=TRUE) ateLava ## End(Not run) ## see wglm for handling right-censoring with glm #### Competing risks settings #### #### ATE with cause specific Cox regression #### ## generate data n <- 500 set.seed(10) dt <- sampleData(n, outcome="competing.risks") dt$X1 <- factor(rbinom(n, prob = c(0.2,0.3) , size = 2), labels = paste0("T",0:2)) ## estimate cause specific Cox model fitCR <- CSC(Hist(time,event)~ X1+X8,data=dt,cause=1) ## compute the ATE at times 1, 5, 10 using X1 as the treatment variable ateFit2a <- ate(fitCR, data = dt, treatment = "X1", times = c(1,5,10), cause = 1, se = TRUE, band = TRUE) summary(ateFit2a) as.data.table(ateFit2a) #### Double robust estimator #### ## Not run: ## generate data n <- 500 set.seed(10) dt <- sampleData(n, outcome="competing.risks") dt$time <- round(dt$time,1) dt$X1 <- factor(rbinom(n, prob = c(0.4) , size = 1), labels = paste0("T",0:1)) ## working models m.event <- CSC(Hist(time,event)~ X1+X2+X3+X5+X8,data=dt) m.censor <- coxph(Surv(time,event==0)~ X1+X2+X3+X5+X8,data=dt, x = TRUE, y = TRUE) m.treatment <- glm(X1~X2+X3+X5+X8,data=dt,family=binomial(link="logit")) ## prediction + average ateRobust <- ate(event = m.event, treatment = m.treatment, censor = m.censor, data = dt, times = 5:10, cause = 1, band = TRUE) ## compare various estimators ateRobust3 <- ate(event = m.event, treatment = m.treatment, censor = m.censor, estimator = c("GFORMULA","IPTW","AIPTW"), data = dt, times = c(5:10), cause = 1, se = TRUE) print(setkeyv(as.data.table(ateRobust3, type = "meanRisk"),"time")) print(setkeyv(as.data.table(ateRobust3, type = "diffRisk"),"time")) ## End(Not run) #### time-dependent covariates ### ## Not run: library(survival) fit <- coxph(Surv(time, status) ~ celltype+karno + age + trt, veteran) vet2 <- survSplit(Surv(time, status) ~., veteran, cut=c(60, 120), episode ="timegroup") fitTD <- coxph(Surv(tstart, time, status) ~ celltype +karno + age + trt, data= vet2,x=1) set.seed(16) resVet <- ate(fitTD,formula=Hist(entry=tstart,time=time,event=status)~1, data = vet2, treatment = "celltype", times=5,verbose=1, landmark = c(0,30,60,90), cause = 1, B = 50, se = 1, band = FALSE, mc.cores=1) summary(resVet) ## End(Not run) ## Not run: set.seed(137) d=sampleDataTD(127) library(survival) d[,status:=1*(event==1)] d[,X3:=as.factor(X3)] ## ignore competing risks cox1TD <- coxph(Surv(start,time, status,type="counting") ~ X3+X5+X6+X8, data=d, x = TRUE) resTD1 <- ate(cox1TD,formula=Hist(entry=start,time=time,event=status)~1, data = d, treatment = "X3", contrasts = NULL, times=.5,verbose=1, landmark = c(0,0.5,1), B = 20, se = 1, band = FALSE, mc.cores=1) resTD1 ## account for competing risks cscTD <- CSC(Hist(time=time, event=event,entry=start) ~ X3+X5+X6+X8, data=d) set.seed(16) resTD <- ate(cscTD,formula=Hist(entry=start,time=time,event=event)~1, data = d, treatment = "X3", contrasts = NULL, times=.5,verbose=1, landmark = c(0,0.5,1), cause = 1, B = 20, se = 1, band = FALSE, mc.cores=1) resTD ## End(Not run)
library(survival) library(rms) library(prodlim) library(data.table) set.seed(10) #### Survival settings #### #### ATE with Cox model #### ## generate data n <- 100 dtS <- sampleData(n, outcome="survival") dtS$time <- round(dtS$time,1) dtS$X1 <- factor(rbinom(n, prob = c(0.3,0.4) , size = 2), labels = paste0("T",0:2)) ## estimate the Cox model fit <- cph(formula = Surv(time,event)~ X1+X2,data=dtS,y=TRUE,x=TRUE) ## compute the ATE at times 5, 6, 7, and 8 using X1 as the treatment variable ## standard error computed using the influence function ## confidence intervals / p-values based on asymptotic results ateFit1a <- ate(fit, data = dtS, treatment = "X1", times = 5:8) summary(ateFit1a) summary(ateFit1a, short = TRUE, type = "meanRisk") summary(ateFit1a, short = TRUE, type = "diffRisk") summary(ateFit1a, short = TRUE, type = "ratioRisk") ## Not run: ## same as before with in addition the confidence bands / adjusted p-values ## (argument band = TRUE) ateFit1b <- ate(fit, data = dtS, treatment = "X1", times = 5:8, band = TRUE) summary(ateFit1b) ## by default bands/adjuste p-values computed separately for each treatment modality summary(ateFit1b, band = 1, se = FALSE, type = "diffRisk", short = TRUE, quantile = TRUE) ## adjustment over treatment and time using the band argument of confint summary(ateFit1b, band = 2, se = FALSE, type = "diffRisk", short = TRUE, quantile = TRUE) ## confidence intervals / p-values computed using 1000 bootstrap samples ## (argument se = TRUE and B = 1000) ateFit1c <- ate(fit, data = dtS, treatment = "X1", times = 5:8, se = TRUE, B = 50, handler = "mclapply") ## NOTE: for real applications 50 bootstrap samples is not enough ## same but using 2 cpus for generating and analyzing the bootstrap samples ## (parallel computation, argument mc.cores = 2) ateFit1d <- ate(fit, data = dtS, treatment = "X1", times = 5:8, se = TRUE, B = 50, mc.cores = 2) ## manually defining the cluster to be used ## useful when specific packages need to be loaded in each cluster fit <- cph(formula = Surv(time,event)~ X1+X2+rcs(X6),data=dtS,y=TRUE,x=TRUE) cl <- parallel::makeCluster(2) parallel::clusterEvalQ(cl, library(rms)) ateFit1e <- ate(fit, data = dtS, treatment = "X1", times = 5:8, se = TRUE, B = 50, handler = "foreach", cl = cl) ## End(Not run) #### Survival settings without censoring #### #### ATE with glm #### ## generate data n <- 100 dtB <- sampleData(n, outcome="binary") dtB[, X2 := as.numeric(X2)] ## estimate a logistic regression model fit <- glm(formula = Y ~ X1+X2, data=dtB, family = "binomial") ## compute the ATE using X1 as the treatment variable ## only point estimate (argument se = FALSE) ateFit1a <- ate(fit, data = dtB, treatment = "X1", se = FALSE) ateFit1a ## Not run: ## with confidence intervals ateFit1b <- ate(fit, data = dtB, treatment = "X1", times = 5) ## just for having a nice output not used in computations summary(ateFit1b, short = TRUE) ## using the lava package library(lava) ateLava <- estimate(fit, function(p, data){ a <- p["(Intercept)"] ; b <- p["X11"] ; c <- p["X2"] ; R.X11 <- expit(a + b + c * data[["X2"]]) R.X10 <- expit(a + c * data[["X2"]]) list(risk0=R.X10,risk1=R.X11,riskdiff=R.X11-R.X10)}, average=TRUE) ateLava ## End(Not run) ## see wglm for handling right-censoring with glm #### Competing risks settings #### #### ATE with cause specific Cox regression #### ## generate data n <- 500 set.seed(10) dt <- sampleData(n, outcome="competing.risks") dt$X1 <- factor(rbinom(n, prob = c(0.2,0.3) , size = 2), labels = paste0("T",0:2)) ## estimate cause specific Cox model fitCR <- CSC(Hist(time,event)~ X1+X8,data=dt,cause=1) ## compute the ATE at times 1, 5, 10 using X1 as the treatment variable ateFit2a <- ate(fitCR, data = dt, treatment = "X1", times = c(1,5,10), cause = 1, se = TRUE, band = TRUE) summary(ateFit2a) as.data.table(ateFit2a) #### Double robust estimator #### ## Not run: ## generate data n <- 500 set.seed(10) dt <- sampleData(n, outcome="competing.risks") dt$time <- round(dt$time,1) dt$X1 <- factor(rbinom(n, prob = c(0.4) , size = 1), labels = paste0("T",0:1)) ## working models m.event <- CSC(Hist(time,event)~ X1+X2+X3+X5+X8,data=dt) m.censor <- coxph(Surv(time,event==0)~ X1+X2+X3+X5+X8,data=dt, x = TRUE, y = TRUE) m.treatment <- glm(X1~X2+X3+X5+X8,data=dt,family=binomial(link="logit")) ## prediction + average ateRobust <- ate(event = m.event, treatment = m.treatment, censor = m.censor, data = dt, times = 5:10, cause = 1, band = TRUE) ## compare various estimators ateRobust3 <- ate(event = m.event, treatment = m.treatment, censor = m.censor, estimator = c("GFORMULA","IPTW","AIPTW"), data = dt, times = c(5:10), cause = 1, se = TRUE) print(setkeyv(as.data.table(ateRobust3, type = "meanRisk"),"time")) print(setkeyv(as.data.table(ateRobust3, type = "diffRisk"),"time")) ## End(Not run) #### time-dependent covariates ### ## Not run: library(survival) fit <- coxph(Surv(time, status) ~ celltype+karno + age + trt, veteran) vet2 <- survSplit(Surv(time, status) ~., veteran, cut=c(60, 120), episode ="timegroup") fitTD <- coxph(Surv(tstart, time, status) ~ celltype +karno + age + trt, data= vet2,x=1) set.seed(16) resVet <- ate(fitTD,formula=Hist(entry=tstart,time=time,event=status)~1, data = vet2, treatment = "celltype", times=5,verbose=1, landmark = c(0,30,60,90), cause = 1, B = 50, se = 1, band = FALSE, mc.cores=1) summary(resVet) ## End(Not run) ## Not run: set.seed(137) d=sampleDataTD(127) library(survival) d[,status:=1*(event==1)] d[,X3:=as.factor(X3)] ## ignore competing risks cox1TD <- coxph(Surv(start,time, status,type="counting") ~ X3+X5+X6+X8, data=d, x = TRUE) resTD1 <- ate(cox1TD,formula=Hist(entry=start,time=time,event=status)~1, data = d, treatment = "X3", contrasts = NULL, times=.5,verbose=1, landmark = c(0,0.5,1), B = 20, se = 1, band = FALSE, mc.cores=1) resTD1 ## account for competing risks cscTD <- CSC(Hist(time=time, event=event,entry=start) ~ X3+X5+X6+X8, data=d) set.seed(16) resTD <- ate(cscTD,formula=Hist(entry=start,time=time,event=event)~1, data = d, treatment = "X3", contrasts = NULL, times=.5,verbose=1, landmark = c(0,0.5,1), cause = 1, B = 20, se = 1, band = FALSE, mc.cores=1) resTD ## End(Not run)
Plot average risks.
## S3 method for class 'ate' autoplot( object, type = "meanRisk", first.derivative = FALSE, estimator = object$estimator[1], ci = object$inference$ci, band = object$inference$band, plot.type = "1", plot = TRUE, smooth = FALSE, digits = 2, alpha = NA, ylab = NULL, ... )
## S3 method for class 'ate' autoplot( object, type = "meanRisk", first.derivative = FALSE, estimator = object$estimator[1], ci = object$inference$ci, band = object$inference$band, plot.type = "1", plot = TRUE, smooth = FALSE, digits = 2, alpha = NA, ylab = NULL, ... )
object |
Object obtained with the function |
type |
[character vector] what to displayed.
Can be |
first.derivative |
[logical] If |
estimator |
[character] The type of estimator relative to which the risks should be displayed. |
ci |
[logical] If |
band |
[logical] If |
plot.type |
[character] Type of plot to be used.
|
plot |
[logical] Should the graphic be plotted. |
smooth |
[logical] Should a smooth version of the risk function be plotted instead of a simple function? |
digits |
[integer, >0] Number of decimal places. |
alpha |
[numeric, 0-1] Transparency of the confidence bands. Argument passed to |
ylab |
[character] Label for the y axis. |
... |
Additional parameters to cutomize the display. |
Invisible. A list containing:
plot: the ggplot object.
data: the data used to create the plot.
ate
to compute average risks.
library(survival) library(rms) library(ggplot2) #### simulate data #### n <- 1e2 set.seed(10) dtS <- sampleData(n,outcome="survival") seqTimes <- c(0,sort(dtS$time[dtS$event==1]),max(dtS$time)) #### Cox model #### fit <- cph(formula = Surv(time,event)~ X1+X2,data=dtS,y=TRUE,x=TRUE) #### plot.type = 1: for few timepoints #### ateFit <- ate(fit, data = dtS, treatment = "X1", times = c(1,2,5,10), se = TRUE, band = TRUE) ggplot2::autoplot(ateFit) ## Not run: ggplot2::autoplot(ateFit, band = FALSE) ggplot2::autoplot(ateFit, type = "diffRisk") ggplot2::autoplot(ateFit, type = "ratioRisk") ## End(Not run) #### plot.type = 2: when looking at all jump times #### ## Not run: ateFit <- ate(fit, data = dtS, treatment = "X1", times = seqTimes, se = TRUE, band = TRUE) ggplot2::autoplot(ateFit, plot.type = "2") ## customize plot outGG <- ggplot2::autoplot(ateFit, plot.type = "2", alpha = 0.25) outGG$plot + facet_wrap(~X1, labeller = label_both) ## Looking at the difference after smoothing outGGS <- ggplot2::autoplot(ateFit, plot.type = "2", alpha = NA, smooth = TRUE) outGGS$plot + facet_wrap(~X1, labeller = label_both) ## first derivative ## (computation of the confidence intervals takes time) ## (based on simulation - n.sim parameter) ggplot2::autoplot(ateFit, plot.type = "2", smooth = TRUE, band = FALSE, type = "diffRisk") ggplot2::autoplot(ateFit, plot.type = "2", smooth = TRUE, first.derivative = TRUE, band = FALSE, type = "diffRisk") ## End(Not run)
library(survival) library(rms) library(ggplot2) #### simulate data #### n <- 1e2 set.seed(10) dtS <- sampleData(n,outcome="survival") seqTimes <- c(0,sort(dtS$time[dtS$event==1]),max(dtS$time)) #### Cox model #### fit <- cph(formula = Surv(time,event)~ X1+X2,data=dtS,y=TRUE,x=TRUE) #### plot.type = 1: for few timepoints #### ateFit <- ate(fit, data = dtS, treatment = "X1", times = c(1,2,5,10), se = TRUE, band = TRUE) ggplot2::autoplot(ateFit) ## Not run: ggplot2::autoplot(ateFit, band = FALSE) ggplot2::autoplot(ateFit, type = "diffRisk") ggplot2::autoplot(ateFit, type = "ratioRisk") ## End(Not run) #### plot.type = 2: when looking at all jump times #### ## Not run: ateFit <- ate(fit, data = dtS, treatment = "X1", times = seqTimes, se = TRUE, band = TRUE) ggplot2::autoplot(ateFit, plot.type = "2") ## customize plot outGG <- ggplot2::autoplot(ateFit, plot.type = "2", alpha = 0.25) outGG$plot + facet_wrap(~X1, labeller = label_both) ## Looking at the difference after smoothing outGGS <- ggplot2::autoplot(ateFit, plot.type = "2", alpha = NA, smooth = TRUE) outGGS$plot + facet_wrap(~X1, labeller = label_both) ## first derivative ## (computation of the confidence intervals takes time) ## (based on simulation - n.sim parameter) ggplot2::autoplot(ateFit, plot.type = "2", smooth = TRUE, band = FALSE, type = "diffRisk") ggplot2::autoplot(ateFit, plot.type = "2", smooth = TRUE, first.derivative = TRUE, band = FALSE, type = "diffRisk") ## End(Not run)
Plot predictions from a Cox model.
## S3 method for class 'predictCox' autoplot( object, type = NULL, ci = object$se, band = object$band, plot = TRUE, smooth = NULL, digits = 2, alpha = NA, group.by = "row", reduce.data = FALSE, ylab = NULL, first.derivative = FALSE, ... )
## S3 method for class 'predictCox' autoplot( object, type = NULL, ci = object$se, band = object$band, plot = TRUE, smooth = NULL, digits = 2, alpha = NA, group.by = "row", reduce.data = FALSE, ylab = NULL, first.derivative = FALSE, ... )
object |
Object obtained with the function |
type |
[character] The type of predicted value to display.
Choices are:
|
ci |
[logical] If |
band |
[logical] If |
plot |
[logical] Should the graphic be plotted. |
smooth |
[logical] Should a smooth version of the risk function be plotted instead of a simple function? |
digits |
[integer] Number of decimal places when displaying the values of the covariates in the caption. |
alpha |
[numeric, 0-1] Transparency of the confidence bands. Argument passed to |
group.by |
[character] The grouping factor used to color the prediction curves. Can be |
reduce.data |
[logical] If |
ylab |
[character] Label for the y axis. |
first.derivative |
[logical] If |
... |
Additional parameters to cutomize the display. |
Invisible. A list containing:
plot: the ggplot object.
data: the data used to create the plot.
predictCox
to compute cumulative hazard and survival based on a Cox model.
library(survival) library(ggplot2) #### simulate data #### set.seed(10) d <- sampleData(1e2, outcome = "survival") seqTau <- c(0,sort(unique(d$time[d$event==1])), max(d$time)) #### Cox model #### m.cox <- coxph(Surv(time,event)~ X1 + X2 + X3, data = d, x = TRUE, y = TRUE) ## display baseline hazard e.basehaz <- predictCox(m.cox) autoplot(e.basehaz, type = "cumhazard") ## Not run: autoplot(e.basehaz, type = "cumhazard", size.point = 0) ## without points autoplot(e.basehaz, type = "cumhazard", smooth = TRUE) autoplot(e.basehaz, type = "cumhazard", smooth = TRUE, first.derivative = TRUE) ## End(Not run) ## display baseline hazard with type of event ## Not run: e.basehaz <- predictCox(m.cox, keep.newdata = TRUE) autoplot(e.basehaz, type = "cumhazard") autoplot(e.basehaz, type = "cumhazard", shape.point = c(3,NA)) ## End(Not run) ## display predicted survival ## Not run: pred.cox <- predictCox(m.cox, newdata = d[1:2,], times = seqTau, type = "survival", keep.newdata = TRUE) autoplot(pred.cox) autoplot(pred.cox, smooth = TRUE) autoplot(pred.cox, group.by = "covariates") autoplot(pred.cox, group.by = "covariates", reduce.data = TRUE) autoplot(pred.cox, group.by = "X1", reduce.data = TRUE) ## End(Not run) ## predictions with confidence interval/bands ## Not run: pred.cox <- predictCox(m.cox, newdata = d[1:2,,drop=FALSE], times = seqTau, type = "survival", band = TRUE, se = TRUE, keep.newdata = TRUE) res <- autoplot(pred.cox, ci = TRUE, band = TRUE, plot = FALSE) res$plot + facet_wrap(~row) res2 <- autoplot(pred.cox, ci = TRUE, band = TRUE, alpha = 0.1, plot = FALSE) res2$plot + facet_wrap(~row) ## End(Not run) #### Stratified Cox model #### ## Not run: m.cox.strata <- coxph(Surv(time,event)~ strata(X1) + strata(X2) + X3 + X4, data = d, x = TRUE, y = TRUE) ## baseline hazard pred.baseline <- predictCox(m.cox.strata, keep.newdata = TRUE, type = "survival") res <- autoplot(pred.baseline) res$plot + facet_wrap(~strata, labeller = label_both) ## predictions pred.cox.strata <- predictCox(m.cox.strata, newdata = d[1:3,,drop=FALSE], time = seqTau, keep.newdata = TRUE, se = TRUE) res2 <- autoplot(pred.cox.strata, type = "survival", group.by = "strata", plot = FALSE) res2$plot + facet_wrap(~strata, labeller = label_both) + theme(legend.position="bottom") ## smooth version autoplot(pred.cox.strata, type = "survival", group.by = "strata", smooth = TRUE, ci = FALSE) ## End(Not run) #### Cox model with splines #### ## Not run: require(splines) m.cox.spline <- coxph(Surv(time,event)~ X1 + X2 + ns(X6,4), data = d, x = TRUE, y = TRUE) grid <- data.frame(X1 = factor(0,0:1), X2 = factor(0,0:1), X6 = seq(min(d$X6),max(d$X6), length.out = 100)) pred.spline <- predictCox(m.cox.spline, newdata = grid, keep.newdata = TRUE, se = TRUE, band = TRUE, centered = TRUE, type = "lp") autoplot(pred.spline, group.by = "X6") autoplot(pred.spline, group.by = "X6", alpha = 0.5) grid2 <- data.frame(X1 = factor(1,0:1), X2 = factor(0,0:1), X6 = seq(min(d$X6),max(d$X6), length.out = 100)) pred.spline <- predictCox(m.cox.spline, newdata = rbind(grid,grid2), keep.newdata = TRUE, se = TRUE, band = TRUE, centered = TRUE, type = "lp") autoplot(pred.spline, group.by = c("X6","X1"), alpha = 0.5, plot = FALSE)$plot + facet_wrap(~X1) ## End(Not run)
library(survival) library(ggplot2) #### simulate data #### set.seed(10) d <- sampleData(1e2, outcome = "survival") seqTau <- c(0,sort(unique(d$time[d$event==1])), max(d$time)) #### Cox model #### m.cox <- coxph(Surv(time,event)~ X1 + X2 + X3, data = d, x = TRUE, y = TRUE) ## display baseline hazard e.basehaz <- predictCox(m.cox) autoplot(e.basehaz, type = "cumhazard") ## Not run: autoplot(e.basehaz, type = "cumhazard", size.point = 0) ## without points autoplot(e.basehaz, type = "cumhazard", smooth = TRUE) autoplot(e.basehaz, type = "cumhazard", smooth = TRUE, first.derivative = TRUE) ## End(Not run) ## display baseline hazard with type of event ## Not run: e.basehaz <- predictCox(m.cox, keep.newdata = TRUE) autoplot(e.basehaz, type = "cumhazard") autoplot(e.basehaz, type = "cumhazard", shape.point = c(3,NA)) ## End(Not run) ## display predicted survival ## Not run: pred.cox <- predictCox(m.cox, newdata = d[1:2,], times = seqTau, type = "survival", keep.newdata = TRUE) autoplot(pred.cox) autoplot(pred.cox, smooth = TRUE) autoplot(pred.cox, group.by = "covariates") autoplot(pred.cox, group.by = "covariates", reduce.data = TRUE) autoplot(pred.cox, group.by = "X1", reduce.data = TRUE) ## End(Not run) ## predictions with confidence interval/bands ## Not run: pred.cox <- predictCox(m.cox, newdata = d[1:2,,drop=FALSE], times = seqTau, type = "survival", band = TRUE, se = TRUE, keep.newdata = TRUE) res <- autoplot(pred.cox, ci = TRUE, band = TRUE, plot = FALSE) res$plot + facet_wrap(~row) res2 <- autoplot(pred.cox, ci = TRUE, band = TRUE, alpha = 0.1, plot = FALSE) res2$plot + facet_wrap(~row) ## End(Not run) #### Stratified Cox model #### ## Not run: m.cox.strata <- coxph(Surv(time,event)~ strata(X1) + strata(X2) + X3 + X4, data = d, x = TRUE, y = TRUE) ## baseline hazard pred.baseline <- predictCox(m.cox.strata, keep.newdata = TRUE, type = "survival") res <- autoplot(pred.baseline) res$plot + facet_wrap(~strata, labeller = label_both) ## predictions pred.cox.strata <- predictCox(m.cox.strata, newdata = d[1:3,,drop=FALSE], time = seqTau, keep.newdata = TRUE, se = TRUE) res2 <- autoplot(pred.cox.strata, type = "survival", group.by = "strata", plot = FALSE) res2$plot + facet_wrap(~strata, labeller = label_both) + theme(legend.position="bottom") ## smooth version autoplot(pred.cox.strata, type = "survival", group.by = "strata", smooth = TRUE, ci = FALSE) ## End(Not run) #### Cox model with splines #### ## Not run: require(splines) m.cox.spline <- coxph(Surv(time,event)~ X1 + X2 + ns(X6,4), data = d, x = TRUE, y = TRUE) grid <- data.frame(X1 = factor(0,0:1), X2 = factor(0,0:1), X6 = seq(min(d$X6),max(d$X6), length.out = 100)) pred.spline <- predictCox(m.cox.spline, newdata = grid, keep.newdata = TRUE, se = TRUE, band = TRUE, centered = TRUE, type = "lp") autoplot(pred.spline, group.by = "X6") autoplot(pred.spline, group.by = "X6", alpha = 0.5) grid2 <- data.frame(X1 = factor(1,0:1), X2 = factor(0,0:1), X6 = seq(min(d$X6),max(d$X6), length.out = 100)) pred.spline <- predictCox(m.cox.spline, newdata = rbind(grid,grid2), keep.newdata = TRUE, se = TRUE, band = TRUE, centered = TRUE, type = "lp") autoplot(pred.spline, group.by = c("X6","X1"), alpha = 0.5, plot = FALSE)$plot + facet_wrap(~X1) ## End(Not run)
Plot predictions from a Cause-specific Cox proportional hazard regression.
## S3 method for class 'predictCSC' autoplot( object, ci = object$se, band = object$band, plot = TRUE, smooth = FALSE, digits = 2, alpha = NA, group.by = "row", reduce.data = FALSE, ... )
## S3 method for class 'predictCSC' autoplot( object, ci = object$se, band = object$band, plot = TRUE, smooth = FALSE, digits = 2, alpha = NA, group.by = "row", reduce.data = FALSE, ... )
object |
Object obtained with the function |
ci |
[logical] If |
band |
[logical] If |
plot |
[logical] Should the graphic be plotted. |
smooth |
[logical] Should a smooth version of the risk function be plotted instead of a simple function? |
digits |
[integer] Number of decimal places. |
alpha |
[numeric, 0-1] Transparency of the confidence bands. Argument passed to |
group.by |
[character] The grouping factor used to color the prediction curves. Can be |
reduce.data |
[logical] If |
... |
Additional parameters to cutomize the display. |
Invisible. A list containing:
plot: the ggplot object.
data: the data used to create the plot.
predict.CauseSpecificCox
to compute risks based on a CSC model.
library(survival) library(rms) library(ggplot2) library(prodlim) #### simulate data #### set.seed(10) d <- sampleData(1e2, outcome = "competing.risks") seqTau <- c(0,unique(sort(d[d$event==1,time])), max(d$time)) #### CSC model #### m.CSC <- CSC(Hist(time,event)~ X1 + X2 + X6, data = d) pred.CSC <- predict(m.CSC, newdata = d[1:2,], time = seqTau, cause = 1, band = TRUE) autoplot(pred.CSC, alpha = 0.2) #### stratified CSC model #### m.SCSC <- CSC(Hist(time,event)~ strata(X1) + strata(X2) + X6, data = d) pred.SCSC <- predict(m.SCSC, time = seqTau, newdata = d[1:4,], cause = 1, keep.newdata = TRUE, keep.strata = TRUE) autoplot(pred.SCSC, group.by = "strata")
library(survival) library(rms) library(ggplot2) library(prodlim) #### simulate data #### set.seed(10) d <- sampleData(1e2, outcome = "competing.risks") seqTau <- c(0,unique(sort(d[d$event==1,time])), max(d$time)) #### CSC model #### m.CSC <- CSC(Hist(time,event)~ X1 + X2 + X6, data = d) pred.CSC <- predict(m.CSC, newdata = d[1:2,], time = seqTau, cause = 1, band = TRUE) autoplot(pred.CSC, alpha = 0.2) #### stratified CSC model #### m.SCSC <- CSC(Hist(time,event)~ strata(X1) + strata(X2) + X6, data = d) pred.SCSC <- predict(m.SCSC, time = seqTau, newdata = d[1:4,], cause = 1, keep.newdata = TRUE, keep.strata = TRUE) autoplot(pred.SCSC, group.by = "strata")
ggplot AUC curves
## S3 method for class 'Score' autoplot( object, models, type = "score", lwd = 2, xlim, ylim, axes = TRUE, conf.int = FALSE, ... )
## S3 method for class 'Score' autoplot( object, models, type = "score", lwd = 2, xlim, ylim, axes = TRUE, conf.int = FALSE, ... )
object |
Object obtained with |
models |
Choice of models to plot |
type |
Character. Either |
lwd |
Line width |
xlim |
Limits for x-axis |
ylim |
Limits for y-axis |
axes |
Logical. If |
conf.int |
Logical. If |
... |
Not yet used |
library(survival) library(ggplot2) set.seed(10) d=sampleData(100,outcome="survival") nd=sampleData(100,outcome="survival") f1=coxph(Surv(time,event)~X1+X6+X8,data=d,x=TRUE,y=TRUE) f2=coxph(Surv(time,event)~X2+X5+X9,data=d,x=TRUE,y=TRUE) xx=Score(list(f1,f2), formula=Surv(time,event)~1, data=nd, metrics="auc", null.model=FALSE, times=seq(3:10)) g <- autoplot(xx) print(g) aucgraph <- plotAUC(xx) plotAUC(xx,conf.int=TRUE) plotAUC(xx,which="contrasts") plotAUC(xx,which="contrasts",conf.int=TRUE)
library(survival) library(ggplot2) set.seed(10) d=sampleData(100,outcome="survival") nd=sampleData(100,outcome="survival") f1=coxph(Surv(time,event)~X1+X6+X8,data=d,x=TRUE,y=TRUE) f2=coxph(Surv(time,event)~X2+X5+X9,data=d,x=TRUE,y=TRUE) xx=Score(list(f1,f2), formula=Surv(time,event)~1, data=nd, metrics="auc", null.model=FALSE, times=seq(3:10)) g <- autoplot(xx) print(g) aucgraph <- plotAUC(xx) plotAUC(xx,conf.int=TRUE) plotAUC(xx,which="contrasts") plotAUC(xx,which="contrasts",conf.int=TRUE)
C++ function to estimate the baseline hazard from a Cox Model
baseHaz_cpp( starttimes, stoptimes, status, eXb, strata, predtimes, emaxtimes, nPatients, nStrata, cause, Efron )
baseHaz_cpp( starttimes, stoptimes, status, eXb, strata, predtimes, emaxtimes, nPatients, nStrata, cause, Efron )
starttimes |
a vector of times (begin at risk period). |
stoptimes |
a vector of times (end at risk period). |
status |
a vector indicating censoring or event. |
eXb |
a numeric vector (exponential of the linear predictor). |
strata |
a vector of integers (index of the strata for each observation). |
predtimes |
a vector of times (time at which to evaluate the hazard). Must be sorted. |
emaxtimes |
another vector of times, one per strata (last observation time in each strata). |
nPatients |
number of observations. |
nStrata |
number of strata |
cause |
the status value corresponding to event. |
Efron |
whether Efron or Breslow estimator should be used in presence of ties. |
WARNING stoptimes status eXb and strata must be sorted by strata, stoptimes, and status
Compute the p.value associated with the estimated statistic using a bootstrap sample of its distribution under H1.
boot2pvalue( x, null, estimate = NULL, alternative = "two.sided", FUN.ci = quantileCI, tol = .Machine$double.eps^0.5 )
boot2pvalue( x, null, estimate = NULL, alternative = "two.sided", FUN.ci = quantileCI, tol = .Machine$double.eps^0.5 )
x |
[numeric vector] a vector of bootstrap estimates of the statistic. |
null |
[numeric] value of the statistic under the null hypothesis. |
estimate |
[numeric] the estimated statistic. |
alternative |
[character] a character string specifying the alternative hypothesis, must be one of "two.sided" (default), "greater" or "less". |
FUN.ci |
[function] the function used to compute the confidence interval.
Must take |
tol |
[numeric] the absolute convergence tolerance. |
For test statistic close to 0, this function returns 1.
For positive test statistic, this function search the quantile alpha such that:
quantile(x, probs = alpha)=0
when the argument alternative is set to "greater"
.
quantile(x, probs = 0.5*alpha)=0
when the argument alternative is set to "two.sided"
.
If the argument alternative is set to "less"
, it returns 1.
For negative test statistic, this function search the quantile alpha such that:
quantile(x, probs = 1-alpha=0
when the argument alternative is set to "less"
.
quantile(x, probs = 1-0.5*alpha=0
when the argument alternative is set to "two.sided"
.
If the argument alternative is set to "greater"
, it returns 1.
set.seed(10) #### no effect #### x <- rnorm(1e3) boot2pvalue(x, null = 0, estimate = mean(x), alternative = "two.sided") ## expected value of 1 boot2pvalue(x, null = 0, estimate = mean(x), alternative = "greater") ## expected value of 0.5 boot2pvalue(x, null = 0, estimate = mean(x), alternative = "less") ## expected value of 0.5 #### positive effect #### x <- rnorm(1e3, mean = 1) boot2pvalue(x, null = 0, estimate = 1, alternative = "two.sided") ## expected value of 0.32 = 2*pnorm(q = 0, mean = -1) = 2*mean(x<=0) boot2pvalue(x, null = 0, estimate = 1, alternative = "greater") ## expected value of 0.16 = pnorm(q = 0, mean = 1) = mean(x<=0) boot2pvalue(x, null = 0, estimate = 1, alternative = "less") ## expected value of 0.84 = 1-pnorm(q = 0, mean = 1) = mean(x>=0) #### negative effect #### x <- rnorm(1e3, mean = -1) boot2pvalue(x, null = 0, estimate = -1, alternative = "two.sided") ## expected value of 0.32 = 2*(1-pnorm(q = 0, mean = -1)) = 2*mean(x>=0) boot2pvalue(x, null = 0, estimate = -1, alternative = "greater") ## expected value of 0.84 = pnorm(q = 0, mean = -1) = mean(x<=0) boot2pvalue(x, null = 0, estimate = -1, alternative = "less") # pnorm(q = 0, mean = -1) ## expected value of 0.16 = 1-pnorm(q = 0, mean = -1) = mean(x>=0)
set.seed(10) #### no effect #### x <- rnorm(1e3) boot2pvalue(x, null = 0, estimate = mean(x), alternative = "two.sided") ## expected value of 1 boot2pvalue(x, null = 0, estimate = mean(x), alternative = "greater") ## expected value of 0.5 boot2pvalue(x, null = 0, estimate = mean(x), alternative = "less") ## expected value of 0.5 #### positive effect #### x <- rnorm(1e3, mean = 1) boot2pvalue(x, null = 0, estimate = 1, alternative = "two.sided") ## expected value of 0.32 = 2*pnorm(q = 0, mean = -1) = 2*mean(x<=0) boot2pvalue(x, null = 0, estimate = 1, alternative = "greater") ## expected value of 0.16 = pnorm(q = 0, mean = 1) = mean(x<=0) boot2pvalue(x, null = 0, estimate = 1, alternative = "less") ## expected value of 0.84 = 1-pnorm(q = 0, mean = 1) = mean(x>=0) #### negative effect #### x <- rnorm(1e3, mean = -1) boot2pvalue(x, null = 0, estimate = -1, alternative = "two.sided") ## expected value of 0.32 = 2*(1-pnorm(q = 0, mean = -1)) = 2*mean(x>=0) boot2pvalue(x, null = 0, estimate = -1, alternative = "greater") ## expected value of 0.84 = pnorm(q = 0, mean = -1) = mean(x<=0) boot2pvalue(x, null = 0, estimate = -1, alternative = "less") # pnorm(q = 0, mean = -1) ## expected value of 0.16 = 1-pnorm(q = 0, mean = -1) = mean(x>=0)
Retrospective boxplots of risk quantiles conditional on outcome
## S3 method for class 'Score' boxplot( x, model, reference, type = "risk", timepoint, overall = 1L, lwd = 3, xlim, xlab = "", main, outcome.label, outcome.label.offset = 0, event.labels, refline = (type != "risk"), add = FALSE, ... )
## S3 method for class 'Score' boxplot( x, model, reference, type = "risk", timepoint, overall = 1L, lwd = 3, xlim, xlab = "", main, outcome.label, outcome.label.offset = 0, event.labels, refline = (type != "risk"), add = FALSE, ... )
x |
Score object obtained by calling function |
model |
Choice of risk prediction model |
reference |
Choice of reference risk prediction model for calculation of risk differences. |
type |
Either |
timepoint |
time point specifying the prediction horizon |
overall |
Logical. Tag to be documented. |
lwd |
line width |
xlim |
x-axis limits |
xlab |
x-axis label |
main |
title of plot |
outcome.label |
Title label for column which shows the outcome status |
outcome.label.offset |
Vertical offset for outcome.label |
event.labels |
Labels for the different events (causes). |
refline |
Logical, for |
add |
Logical. Tag to be documented. |
... |
not used |
# binary outcome library(data.table) library(prodlim) set.seed(10) db=sampleData(40,outcome="binary") fitconv=glm(Y~X3+X5,data=db,family=binomial) fitnew=glm(Y~X1+X3+X5+X6+X7,data=db,family=binomial) x=Score(list(new=fitnew,conv=fitconv), formula=Y~1,contrasts=list(c(2,1)), data=db,plots="box",null.model=FALSE) boxplot(x) # survival outcome library(survival) ds=sampleData(40,outcome="survival") fit=coxph(Surv(time,event)~X6+X9,data=ds,x=TRUE,y=TRUE) ## Not run: scoreobj=Score(list("Cox"=fit), formula=Hist(time,event)~1, data=ds, metrics=NULL, plots="box", times=c(1,5),null.model=FALSE) boxplot(scoreobj,timepoint=5) boxplot(scoreobj,timepoint=1) ## End(Not run) # competing risks outcome library(survival) data(Melanoma, package = "riskRegression") fit = CSC(Hist(time,event,cens.code="censored")~invasion+age+sex,data=Melanoma) scoreobj=Score(list("CSC"=fit), formula=Hist(time,event,cens.code="censored")~1, data=Melanoma,plots="box",times=5*365.25,null.model=FALSE) par(mar=c(4,12,4,4)) boxplot(scoreobj,timepoint=5*365.25) # more than 2 competing risks m=lava::lvm(~X1+X2+X3) lava::distribution(m, "eventtime1") <- lava::coxWeibull.lvm(scale = 1/100) lava::distribution(m, "eventtime2") <- lava::coxWeibull.lvm(scale = 1/100) lava::distribution(m, "eventtime3") <- lava::coxWeibull.lvm(scale = 1/100) lava::distribution(m, "censtime") <- lava::coxWeibull.lvm(scale = 1/100) lava::regression(m,eventtime2~X3)=1.3 m <- lava::eventTime(m, time ~ min(eventtime1 = 1, eventtime2 = 2, eventtime3 = 3, censtime = 0), "event") set.seed(101) dcr=as.data.table(lava::sim(m,101)) fit = CSC(Hist(time,event)~X1+X2+X3,data=dcr) scoreobj=Score(list("my model"=fit), formula=Hist(time,event)~1, data=dcr,plots="box",times=5,null.model=FALSE) boxplot(scoreobj)
# binary outcome library(data.table) library(prodlim) set.seed(10) db=sampleData(40,outcome="binary") fitconv=glm(Y~X3+X5,data=db,family=binomial) fitnew=glm(Y~X1+X3+X5+X6+X7,data=db,family=binomial) x=Score(list(new=fitnew,conv=fitconv), formula=Y~1,contrasts=list(c(2,1)), data=db,plots="box",null.model=FALSE) boxplot(x) # survival outcome library(survival) ds=sampleData(40,outcome="survival") fit=coxph(Surv(time,event)~X6+X9,data=ds,x=TRUE,y=TRUE) ## Not run: scoreobj=Score(list("Cox"=fit), formula=Hist(time,event)~1, data=ds, metrics=NULL, plots="box", times=c(1,5),null.model=FALSE) boxplot(scoreobj,timepoint=5) boxplot(scoreobj,timepoint=1) ## End(Not run) # competing risks outcome library(survival) data(Melanoma, package = "riskRegression") fit = CSC(Hist(time,event,cens.code="censored")~invasion+age+sex,data=Melanoma) scoreobj=Score(list("CSC"=fit), formula=Hist(time,event,cens.code="censored")~1, data=Melanoma,plots="box",times=5*365.25,null.model=FALSE) par(mar=c(4,12,4,4)) boxplot(scoreobj,timepoint=5*365.25) # more than 2 competing risks m=lava::lvm(~X1+X2+X3) lava::distribution(m, "eventtime1") <- lava::coxWeibull.lvm(scale = 1/100) lava::distribution(m, "eventtime2") <- lava::coxWeibull.lvm(scale = 1/100) lava::distribution(m, "eventtime3") <- lava::coxWeibull.lvm(scale = 1/100) lava::distribution(m, "censtime") <- lava::coxWeibull.lvm(scale = 1/100) lava::regression(m,eventtime2~X3)=1.3 m <- lava::eventTime(m, time ~ min(eventtime1 = 1, eventtime2 = 2, eventtime3 = 3, censtime = 0), "event") set.seed(101) dcr=as.data.table(lava::sim(m,101)) fit = CSC(Hist(time,event)~X1+X2+X3,data=dcr) scoreobj=Score(list("my model"=fit), formula=Hist(time,event)~1, data=dcr,plots="box",times=5,null.model=FALSE) boxplot(scoreobj)
Compute the standard error associated to the predictions from Cox regression model using a first order von Mises expansion of the functional (cumulative hazard or survival).
calcSeCox( object, times, nTimes, type, diag, Lambda0, object.n, object.time, object.eXb, object.strata, nStrata, new.n, new.eXb, new.LPdata, new.strata, new.survival, nVar.lp, export, store.iid )
calcSeCox( object, times, nTimes, type, diag, Lambda0, object.n, object.time, object.eXb, object.strata, nStrata, new.n, new.eXb, new.LPdata, new.strata, new.survival, nVar.lp, export, store.iid )
object |
The fitted Cox regression model object either
obtained with |
times |
Vector of times at which to return the estimated hazard/survival. |
nTimes |
the length of the argument |
type |
One or several strings that match (either in lower or upper case or mixtures) one
or several of the strings |
diag |
[logical] when |
Lambda0 |
the baseline hazard estimate returned by |
object.n |
the number of observations in the dataset used to estimate the object. |
object.time |
the time to event of the observations used to estimate the object. |
object.eXb |
the exponential of the linear predictor relative to the observations used to estimate the object. |
object.strata |
the strata index of the observations used to estimate the object. |
nStrata |
the number of strata. |
new.n |
the number of observations for which the prediction was performed. |
new.eXb |
the linear predictor evaluated for the new observations. |
new.LPdata |
the variables involved in the linear predictor for the new observations. |
new.strata |
the strata indicator for the new observations. |
new.survival |
the survival evaluated for the new observations. |
nVar.lp |
the number of variables that form the linear predictor. |
export |
can be "iid" to return the value of the influence function for each observation. "se" to return the standard error for a given timepoint. "average.iid" to return the value of the average influence function over the observations for which the prediction was performed. |
store.iid |
Implementation used to estimate the influence function and the standard error.
Can be |
store.iid="full"
compute the influence function for each observation at each time in the argument times
before computing the standard error / influence functions.
store.iid="minimal"
recompute for each subject specific prediction the influence function for the baseline hazard.
This avoid to store all the influence functions but may lead to repeated evaluation of the influence function.
This solution is therefore more efficient in memory usage but may not be in terms of computation time.
A list optionally containing the standard error for the survival, cumulative hazard and hazard.
Brice Ozenne [email protected], Thomas A. Gerds [email protected]
Standard error of the absolute risk predicted from cause-specific Cox models using a first order von Mises expansion of the absolute risk functional.
calcSeCSC( object, cif, hazard, cumhazard, survival, object.time, object.maxtime, eXb, new.LPdata, new.strata, times, surv.type, ls.infoVar, new.n, cause, nCause, nVar.lp, export, store.iid, diag )
calcSeCSC( object, cif, hazard, cumhazard, survival, object.time, object.maxtime, eXb, new.LPdata, new.strata, times, surv.type, ls.infoVar, new.n, cause, nCause, nVar.lp, export, store.iid, diag )
object |
The fitted cause specific Cox model |
cif |
the cumulative incidence function at each prediction time for each individual. |
hazard |
list containing the baseline hazard for each cause in a matrix form. Columns correspond to the strata. |
cumhazard |
list containing the cumulative baseline hazard for each cause in a matrix form. Columns correspond to the strata. |
survival |
list containing the (all cause) survival in a matrix form at t-. Columns correspond to event times. |
object.time |
a vector containing all the events regardless to the cause. |
object.maxtime |
a matrix containing the latest event in the strata of the observation for each cause. |
eXb |
a matrix containing the exponential of the linear predictor evaluated for the new observations (rows) for each cause (columns) |
new.LPdata |
a list of design matrices for the new observations for each cause. |
new.strata |
a matrix containing the strata indicator for each observation and each cause. |
times |
the time points at which to evaluate the predictions. |
surv.type |
see the surv.type argument of |
ls.infoVar |
A list containing the output of |
new.n |
the number of new observations. |
cause |
the cause of interest. |
nCause |
the number of causes. |
nVar.lp |
the number of variables that form the linear predictor in each Cox model |
export |
can be "iid" to return the value of the influence function for each observation "se" to return the standard error for a given timepoint |
store.iid |
the method used to compute the influence function and the standard error.
Can be |
diag |
[logical] when |
Can also return the empirical influence function of the functionals cumulative hazard or survival or the sum over the observations of the empirical influence function.
store.iid="full"
compute the influence function for each observation at each time in the argument times
before computing the standard error / influence functions.
store.iid="minimal"
recompute for each subject specific prediction the influence function for the baseline hazard.
This avoid to store all the influence functions but may lead to repeated evaluation of the influence function.
This solution is therefore efficient more efficient in memory usage but may not be in term of computation time.
S3-wrapper function for cforest from the party package
Cforest(formula, data, ...)
Cforest(formula, data, ...)
formula |
Passed on as is. See |
data |
Passed on as is. See |
... |
Passed on as they are. See |
See cforest
of the party
package.
list with two elements: cforest and call
Ulla B. Mogensen, Hemant Ishwaran, Thomas A. Gerds (2012). Evaluating Random Forests for Survival Analysis Using Prediction Error Curves. Journal of Statistical Software, 50(11), 1-23. URL http://www.jstatsoft.org/v50/i11/.
Extract coefficients from a Cause-Specific Cox regression model
## S3 method for class 'CauseSpecificCox' coef(object, ...)
## S3 method for class 'CauseSpecificCox' coef(object, ...)
object |
Object obtained with CSC |
... |
not used |
Extract coefficients from riskRegression model
## S3 method for class 'riskRegression' coef(object, digits = 3, eps = 10^-4, ...)
## S3 method for class 'riskRegression' coef(object, digits = 3, eps = 10^-4, ...)
object |
Object obtained with |
digits |
Number of digits |
eps |
P-values below this number are shown as |
... |
not used |
Fast computation of sweep(X, MARGIN = 1, FUN = "-", STATS = center)
colCenter_cpp(X, center)
colCenter_cpp(X, center)
X |
A matrix. |
center |
a numeric vector of length equal to the number of rows of |
A matrix of same size as X.
Brice Ozenne <[email protected]>
x <- matrix(1,6,5) sweep(x, MARGIN = 1, FUN = "-", STATS = 1:6) colCenter_cpp(x, 1:6 )
x <- matrix(1,6,5) sweep(x, MARGIN = 1, FUN = "-", STATS = 1:6) colCenter_cpp(x, 1:6 )
Fast computation of apply(x,2,cumsum)
colCumSum(x)
colCumSum(x)
x |
A matrix. |
A matrix of same size as x.
Thomas Alexander Gerds <[email protected]>
x <- matrix(1:8,ncol=2) colCumSum(x)
x <- matrix(1:8,ncol=2) colCumSum(x)
Fast computation of sweep(X, MARGIN = 1, FUN = "*", STATS = scale)
colMultiply_cpp(X, scale)
colMultiply_cpp(X, scale)
X |
A matrix. |
scale |
a numeric vector of length equal to the number of rows of |
A matrix of same size as X.
Brice Ozenne <[email protected]>
x <- matrix(1,6,5) sweep(x, MARGIN = 1, FUN = "*", STATS = 1:6) colMultiply_cpp(x, 1:6 )
x <- matrix(1,6,5) sweep(x, MARGIN = 1, FUN = "*", STATS = 1:6) colMultiply_cpp(x, 1:6 )
Fast computation of sweep(X, MARGIN = 1, FUN = "/", STATS = scale)
colScale_cpp(X, scale)
colScale_cpp(X, scale)
X |
A matrix. |
scale |
a numeric vector of length equal to the number of rows of |
A matrix of same size as X.
Brice Ozenne <[email protected]>
x <- matrix(1,6,5) sweep(x, MARGIN = 1, FUN = "/", STATS = 1:6) colScale_cpp(x, 1:6 )
x <- matrix(1,6,5) sweep(x, MARGIN = 1, FUN = "/", STATS = 1:6) colScale_cpp(x, 1:6 )
Confidence intervals and confidence Bands for the predicted absolute risk (cumulative incidence function).
## S3 method for class 'ate' confint( object, parm = NULL, level = 0.95, n.sim = 10000, estimator = object$estimator, contrasts = object$contrasts, allContrasts = object$allContrasts, meanRisk.transform = "none", diffRisk.transform = "none", ratioRisk.transform = "none", seed = NA, ci = object$inference$se, band = object$inference$band, p.value = TRUE, method.band = "maxT-simulation", alternative = "two.sided", bootci.method = "perc", ... )
## S3 method for class 'ate' confint( object, parm = NULL, level = 0.95, n.sim = 10000, estimator = object$estimator, contrasts = object$contrasts, allContrasts = object$allContrasts, meanRisk.transform = "none", diffRisk.transform = "none", ratioRisk.transform = "none", seed = NA, ci = object$inference$se, band = object$inference$band, p.value = TRUE, method.band = "maxT-simulation", alternative = "two.sided", bootci.method = "perc", ... )
object |
A |
parm |
not used. For compatibility with the generic method. |
level |
[numeric, 0-1] Level of confidence. |
n.sim |
[integer, >0] the number of simulations used to compute the quantiles for the confidence bands and/or perform adjustment for multiple comparisons. |
estimator |
[character] The type of estimator relative to which the estimates should be displayed. |
contrasts |
[character vector] levels of the treatment variable for which the risks should be assessed and compared. Default is to consider all levels. |
allContrasts |
[2-row character matrix] levels of the treatment variable to be compared. Default is to consider all pairwise comparisons. |
meanRisk.transform |
[character] the transformation used to improve coverage
of the confidence intervals for the mean risk in small samples.
Can be |
diffRisk.transform |
[character] the transformation used to improve coverage
of the confidence intervals for the risk difference in small samples.
Can be |
ratioRisk.transform |
[character] the transformation used to improve coverage
of the confidence intervals for the risk ratio in small samples.
Can be |
seed |
[integer, >0] seed number set when performing simulation for the confidence bands. If not given or NA no seed is set. |
ci |
[logical] should the confidence intervals be computed? |
band |
[logical] should the confidence bands be computed? |
p.value |
[logical] should the p-values/adjusted p-values be computed?
Requires argument |
method.band |
[character] method used to adjust for multiple comparisons.
Can be any element of |
alternative |
[character] a character string specifying the alternative hypothesis,
must be one of |
bootci.method |
[character] Method for constructing bootstrap confidence intervals. Either "perc" (the default), "norm", "basic", "stud", or "bca". |
... |
not used. |
Argument ci
, band
, p.value
, method.band
, alternative
, meanRisk.transform
, diffRisk.transform
, ratioRisk.transform
are only active when the ate
object contains the influence function.
Argument bootci.method
is only active when the ate
object contains bootstrap samples.
Influence function: confidence bands and confidence intervals computed via the influence function are automatically restricted to the interval of definition of the parameter (e.g. [0;1] for the average risk).
Single step max adjustment for multiple comparisons, i.e. accounting for the correlation between the test statistics but not for the ordering of the tests, can be performed setting the arguemnt method.band
to "maxT-integration"
or "maxT-simulation"
. The former uses numerical integration (pmvnorm
and qmvnorm
to perform the adjustment while the latter using simulation. Both assume that the test statistics are jointly normally distributed.
Bootstrap: confidence intervals obtained via bootstrap are computed
using the boot.ci
function of the boot
package.
p-value are obtained using test inversion method
(finding the smallest confidence level such that the interval contain the null hypothesis).
Brice Ozenne
library(survival) library(data.table) ## ## generate data #### set.seed(10) d <- sampleData(70,outcome="survival") d[, X1 := paste0("T",rbinom(.N, size = 2, prob = c(0.51)))] ## table(d$X1) #### stratified Cox model #### fit <- coxph(Surv(time,event)~X1 + strata(X2) + X6, data=d, ties="breslow", x = TRUE, y = TRUE) #### average treatment effect #### fit.ate <- ate(fit, treatment = "X1", times = 1:3, data = d, se = TRUE, iid = TRUE, band = TRUE) summary(fit.ate) dt.ate <- as.data.table(fit.ate) ## manual calculation of se dd <- copy(d) dd$X1 <- rep(factor("T0", levels = paste0("T",0:2)), NROW(dd)) out <- predictCox(fit, newdata = dd, se = TRUE, times = 1:3, average.iid = TRUE) term1 <- -out$survival.average.iid term2 <- sweep(1-out$survival, MARGIN = 2, FUN = "-", STATS = colMeans(1-out$survival)) sqrt(colSums((term1 + term2/NROW(d))^2)) ## fit.ate$meanRisk[treatment=="T0",se] ## note out2 <- predictCox(fit, newdata = dd, se = TRUE, times = 1:3, iid = TRUE) mean(out2$survival.iid[1,1,]) out$survival.average.iid[1,1] ## check confidence intervals (no transformation) dt.ate[,.(lower = pmax(0,estimate + qnorm(0.025) * se), lower2 = lower, upper = estimate + qnorm(0.975) * se, upper2 = upper)] ## add confidence intervals computed on the log-log scale ## and backtransformed outCI <- confint(fit.ate, meanRisk.transform = "loglog", diffRisk.transform = "atanh", ratioRisk.transform = "log") summary(outCI, type = "risk", short = TRUE) dt.ate[type == "meanRisk", newse := se/(estimate*log(estimate))] dt.ate[type == "meanRisk", .(lower = exp(-exp(log(-log(estimate)) - 1.96 * newse)), upper = exp(-exp(log(-log(estimate)) + 1.96 * newse)))]
library(survival) library(data.table) ## ## generate data #### set.seed(10) d <- sampleData(70,outcome="survival") d[, X1 := paste0("T",rbinom(.N, size = 2, prob = c(0.51)))] ## table(d$X1) #### stratified Cox model #### fit <- coxph(Surv(time,event)~X1 + strata(X2) + X6, data=d, ties="breslow", x = TRUE, y = TRUE) #### average treatment effect #### fit.ate <- ate(fit, treatment = "X1", times = 1:3, data = d, se = TRUE, iid = TRUE, band = TRUE) summary(fit.ate) dt.ate <- as.data.table(fit.ate) ## manual calculation of se dd <- copy(d) dd$X1 <- rep(factor("T0", levels = paste0("T",0:2)), NROW(dd)) out <- predictCox(fit, newdata = dd, se = TRUE, times = 1:3, average.iid = TRUE) term1 <- -out$survival.average.iid term2 <- sweep(1-out$survival, MARGIN = 2, FUN = "-", STATS = colMeans(1-out$survival)) sqrt(colSums((term1 + term2/NROW(d))^2)) ## fit.ate$meanRisk[treatment=="T0",se] ## note out2 <- predictCox(fit, newdata = dd, se = TRUE, times = 1:3, iid = TRUE) mean(out2$survival.iid[1,1,]) out$survival.average.iid[1,1] ## check confidence intervals (no transformation) dt.ate[,.(lower = pmax(0,estimate + qnorm(0.025) * se), lower2 = lower, upper = estimate + qnorm(0.975) * se, upper2 = upper)] ## add confidence intervals computed on the log-log scale ## and backtransformed outCI <- confint(fit.ate, meanRisk.transform = "loglog", diffRisk.transform = "atanh", ratioRisk.transform = "log") summary(outCI, type = "risk", short = TRUE) dt.ate[type == "meanRisk", newse := se/(estimate*log(estimate))] dt.ate[type == "meanRisk", .(lower = exp(-exp(log(-log(estimate)) - 1.96 * newse)), upper = exp(-exp(log(-log(estimate)) + 1.96 * newse)))]
Confidence intervals and confidence Bands for the difference between two estimates.
## S3 method for class 'influenceTest' confint( object, parm = NULL, level = 0.95, n.sim = 10000, transform = "none", seed = NA, ... )
## S3 method for class 'influenceTest' confint( object, parm = NULL, level = 0.95, n.sim = 10000, transform = "none", seed = NA, ... )
object |
A |
parm |
not used. For compatibility with the generic method. |
level |
[numeric, 0-1] Level of confidence. |
n.sim |
[integer, >0] the number of simulations used to compute the quantiles for the confidence bands. |
transform |
[character] the transformation used to improve coverage of the confidence intervals.
Can be |
seed |
[integer, >0] seed number set before performing simulations for the confidence bands. If not given or NA no seed is set. |
... |
not used. |
Except for the cumulative hazard, the confidence bands and confidence intervals are automatically restricted to the interval [-1;1].
Brice Ozenne
Confidence intervals and confidence Bands for the predicted survival/cumulative Hazard.
## S3 method for class 'predictCox' confint( object, parm = NULL, level = 0.95, n.sim = 10000, cumhazard.transform = "log", survival.transform = "loglog", seed = NA, ... )
## S3 method for class 'predictCox' confint( object, parm = NULL, level = 0.95, n.sim = 10000, cumhazard.transform = "log", survival.transform = "loglog", seed = NA, ... )
object |
A |
parm |
[character] the type of predicted value for which the confidence intervals should be output.
Can be |
level |
[numeric, 0-1] Level of confidence. |
n.sim |
[integer, >0] the number of simulations used to compute the quantiles for the confidence bands. |
cumhazard.transform |
[character] the transformation used to improve coverage
of the confidence intervals for the cumlative hazard in small samples.
Can be |
survival.transform |
[character] the transformation used to improve coverage
of the confidence intervals for the survival in small samples.
Can be |
seed |
[integer, >0] seed number set before performing simulations for the confidence bands. If not given or NA no seed is set. |
... |
not used. |
The confidence bands and confidence intervals are automatically restricted to the interval of definition of the statistic, i.e. a confidence interval for the survival of [0.5;1.2] will become [0.5;1].
Brice Ozenne
library(survival) #### generate data #### set.seed(10) d <- sampleData(40,outcome="survival") #### estimate a stratified Cox model #### fit <- coxph(Surv(time,event)~X1 + strata(X2) + X6, data=d, ties="breslow", x = TRUE, y = TRUE) #### compute individual specific survival probabilities fit.pred <- predictCox(fit, newdata=d[1:3], times=c(3,8), type = "survival", se = TRUE, iid = TRUE, band = TRUE) fit.pred ## check standard error sqrt(rowSums(fit.pred$survival.iid[,,1]^2)) ## se for individual 1 ## check confidence interval newse <- fit.pred$survival.se/(-fit.pred$survival*log(fit.pred$survival)) cbind(lower = as.double(exp(-exp(log(-log(fit.pred$survival)) + 1.96 * newse))), upper = as.double(exp(-exp(log(-log(fit.pred$survival)) - 1.96 * newse))) ) #### compute confidence intervals without transformation confint(fit.pred, survival.transform = "none") cbind(lower = as.double(fit.pred$survival - 1.96 * fit.pred$survival.se), upper = as.double(fit.pred$survival + 1.96 * fit.pred$survival.se) )
library(survival) #### generate data #### set.seed(10) d <- sampleData(40,outcome="survival") #### estimate a stratified Cox model #### fit <- coxph(Surv(time,event)~X1 + strata(X2) + X6, data=d, ties="breslow", x = TRUE, y = TRUE) #### compute individual specific survival probabilities fit.pred <- predictCox(fit, newdata=d[1:3], times=c(3,8), type = "survival", se = TRUE, iid = TRUE, band = TRUE) fit.pred ## check standard error sqrt(rowSums(fit.pred$survival.iid[,,1]^2)) ## se for individual 1 ## check confidence interval newse <- fit.pred$survival.se/(-fit.pred$survival*log(fit.pred$survival)) cbind(lower = as.double(exp(-exp(log(-log(fit.pred$survival)) + 1.96 * newse))), upper = as.double(exp(-exp(log(-log(fit.pred$survival)) - 1.96 * newse))) ) #### compute confidence intervals without transformation confint(fit.pred, survival.transform = "none") cbind(lower = as.double(fit.pred$survival - 1.96 * fit.pred$survival.se), upper = as.double(fit.pred$survival + 1.96 * fit.pred$survival.se) )
Confidence intervals and confidence Bands for the predicted absolute risk (cumulative incidence function).
## S3 method for class 'predictCSC' confint( object, parm = NULL, level = 0.95, n.sim = 10000, absRisk.transform = "loglog", seed = NA, ... )
## S3 method for class 'predictCSC' confint( object, parm = NULL, level = 0.95, n.sim = 10000, absRisk.transform = "loglog", seed = NA, ... )
object |
A |
parm |
not used. For compatibility with the generic method. |
level |
[numeric, 0-1] Level of confidence. |
n.sim |
[integer, >0] the number of simulations used to compute the quantiles for the confidence bands. |
absRisk.transform |
[character] the transformation used to improve coverage
of the confidence intervals for the predicted absolute risk in small samples.
Can be |
seed |
[integer, >0] seed number set before performing simulations for the confidence bands. If not given or NA no seed is set. |
... |
not used. |
The confidence bands and confidence intervals are automatically restricted to the interval [0;1].
Brice Ozenne
library(survival) library(prodlim) #### generate data #### set.seed(10) d <- sampleData(100) #### estimate a stratified CSC model ### fit <- CSC(Hist(time,event)~ X1 + strata(X2) + X6, data=d) #### compute individual specific risks fit.pred <- predict(fit, newdata=d[1:3], times=c(3,8), cause = 1, se = TRUE, iid = TRUE, band = TRUE) fit.pred ## check confidence intervals newse <- fit.pred$absRisk.se/(-fit.pred$absRisk*log(fit.pred$absRisk)) cbind(lower = as.double(exp(-exp(log(-log(fit.pred$absRisk)) + 1.96 * newse))), upper = as.double(exp(-exp(log(-log(fit.pred$absRisk)) - 1.96 * newse))) ) #### compute confidence intervals without transformation confint(fit.pred, absRisk.transform = "none") cbind(lower = as.double(fit.pred$absRisk - 1.96 * fit.pred$absRisk.se), upper = as.double(fit.pred$absRisk + 1.96 * fit.pred$absRisk.se) )
library(survival) library(prodlim) #### generate data #### set.seed(10) d <- sampleData(100) #### estimate a stratified CSC model ### fit <- CSC(Hist(time,event)~ X1 + strata(X2) + X6, data=d) #### compute individual specific risks fit.pred <- predict(fit, newdata=d[1:3], times=c(3,8), cause = 1, se = TRUE, iid = TRUE, band = TRUE) fit.pred ## check confidence intervals newse <- fit.pred$absRisk.se/(-fit.pred$absRisk*log(fit.pred$absRisk)) cbind(lower = as.double(exp(-exp(log(-log(fit.pred$absRisk)) + 1.96 * newse))), upper = as.double(exp(-exp(log(-log(fit.pred$absRisk)) - 1.96 * newse))) ) #### compute confidence intervals without transformation confint(fit.pred, absRisk.transform = "none") cbind(lower = as.double(fit.pred$absRisk - 1.96 * fit.pred$absRisk.se), upper = as.double(fit.pred$absRisk + 1.96 * fit.pred$absRisk.se) )
Extract the type of estimator for the baseline hazard
coxBaseEstimator(object) ## S3 method for class 'coxph' coxBaseEstimator(object) ## S3 method for class 'phreg' coxBaseEstimator(object)
coxBaseEstimator(object) ## S3 method for class 'coxph' coxBaseEstimator(object) ## S3 method for class 'phreg' coxBaseEstimator(object)
object |
The fitted Cox regression model object either
obtained with |
Brice Ozenne [email protected]
Extract the mean value of the covariates
coxCenter(object) ## S3 method for class 'cph' coxCenter(object) ## S3 method for class 'coxph' coxCenter(object) ## S3 method for class 'phreg' coxCenter(object)
coxCenter(object) ## S3 method for class 'cph' coxCenter(object) ## S3 method for class 'coxph' coxCenter(object) ## S3 method for class 'phreg' coxCenter(object)
object |
The fitted Cox regression model object either
obtained with |
Brice Ozenne [email protected]
Extract the formula from a Cox model
coxFormula(object) ## S3 method for class 'cph' coxFormula(object) ## S3 method for class 'coxph' coxFormula(object) ## S3 method for class 'phreg' coxFormula(object) ## S3 method for class 'glm' coxFormula(object)
coxFormula(object) ## S3 method for class 'cph' coxFormula(object) ## S3 method for class 'coxph' coxFormula(object) ## S3 method for class 'phreg' coxFormula(object) ## S3 method for class 'glm' coxFormula(object)
object |
The fitted Cox regression model object either
obtained with |
Brice Ozenne [email protected]
Compute the linear predictor of a Cox model
coxLP(object, data, center) ## S3 method for class 'cph' coxLP(object, data, center) ## S3 method for class 'coxph' coxLP(object, data, center) ## S3 method for class 'phreg' coxLP(object, data, center)
coxLP(object, data, center) ## S3 method for class 'cph' coxLP(object, data, center) ## S3 method for class 'coxph' coxLP(object, data, center) ## S3 method for class 'phreg' coxLP(object, data, center)
object |
The fitted Cox regression model object either
obtained with |
data |
a |
center |
should the linear predictor be computed after centering the covariates |
In case of empty linear predictor returns a vector of 0 with the same length as the number of rows of the dataset
Brice Ozenne [email protected]
Extract the design matrix used to train a Cox model. Should contain the time of event, the type of event, the variable for the linear predictor, the strata variables and the date of entry (in case of delayed entry).
coxModelFrame(object, center) ## S3 method for class 'coxph' coxModelFrame(object, center = FALSE) ## S3 method for class 'cph' coxModelFrame(object, center = FALSE) ## S3 method for class 'phreg' coxModelFrame(object, center = FALSE)
coxModelFrame(object, center) ## S3 method for class 'coxph' coxModelFrame(object, center = FALSE) ## S3 method for class 'cph' coxModelFrame(object, center = FALSE) ## S3 method for class 'phreg' coxModelFrame(object, center = FALSE)
object |
The fitted Cox regression model object either
obtained with |
center |
[logical] Should the variables of the linear predictor be added ? |
Brice Ozenne [email protected]
Extract the number of observations from a Cox model
coxN(object) ## S3 method for class 'cph' coxN(object) ## S3 method for class 'coxph' coxN(object) ## S3 method for class 'phreg' coxN(object) ## S3 method for class 'CauseSpecificCox' coxN(object) ## S3 method for class 'glm' coxN(object)
coxN(object) ## S3 method for class 'cph' coxN(object) ## S3 method for class 'coxph' coxN(object) ## S3 method for class 'phreg' coxN(object) ## S3 method for class 'CauseSpecificCox' coxN(object) ## S3 method for class 'glm' coxN(object)
object |
The fitted Cox regression model object either
obtained with |
Brice Ozenne [email protected]
Return the special character(s) of the Cox model, e.g. used to indicate the strata variables.
coxSpecial(object) ## S3 method for class 'coxph' coxSpecial(object) ## S3 method for class 'cph' coxSpecial(object) ## S3 method for class 'phreg' coxSpecial(object)
coxSpecial(object) ## S3 method for class 'coxph' coxSpecial(object) ## S3 method for class 'cph' coxSpecial(object) ## S3 method for class 'phreg' coxSpecial(object)
object |
The fitted Cox regression model object either
obtained with |
Must return a list with at least one element strata indicating the character in the formula marking the variable(s) defining the strata.
Brice Ozenne [email protected]
Define the strata in a dataset to match those of a stratified Cox model
coxStrata(object, data, sterms, strata.vars, strata.levels) ## S3 method for class 'cph' coxStrata(object, data, sterms, strata.vars, strata.levels) ## S3 method for class 'coxph' coxStrata(object, data, sterms, strata.vars, strata.levels) ## S3 method for class 'phreg' coxStrata(object, data, sterms, strata.vars, strata.levels)
coxStrata(object, data, sterms, strata.vars, strata.levels) ## S3 method for class 'cph' coxStrata(object, data, sterms, strata.vars, strata.levels) ## S3 method for class 'coxph' coxStrata(object, data, sterms, strata.vars, strata.levels) ## S3 method for class 'phreg' coxStrata(object, data, sterms, strata.vars, strata.levels)
object |
The fitted Cox regression model object either
obtained with |
data |
a |
sterms |
terms in the formula corresponding to the strata variables |
strata.vars |
the name of the variables used to define the strata |
strata.levels |
a named list containing for each variable used to form the strata all its possible levels |
levels |
the strata levels that have been used to fit the Cox model |
if no strata variables returns a vector of "1"
(factor).
Brice Ozenne [email protected]
Return the name of the strata in Cox model
coxStrataLevel(object) ## S3 method for class 'coxph' coxStrataLevel(object) ## S3 method for class 'cph' coxStrataLevel(object) ## S3 method for class 'phreg' coxStrataLevel(object)
coxStrataLevel(object) ## S3 method for class 'coxph' coxStrataLevel(object) ## S3 method for class 'cph' coxStrataLevel(object) ## S3 method for class 'phreg' coxStrataLevel(object)
object |
The fitted Cox regression model object either
obtained with |
Brice Ozenne [email protected]
Extract the variance covariance matrix of the beta from a Cox model
coxVarCov(object) ## S3 method for class 'cph' coxVarCov(object) ## S3 method for class 'coxph' coxVarCov(object) ## S3 method for class 'phreg' coxVarCov(object)
coxVarCov(object) ## S3 method for class 'cph' coxVarCov(object) ## S3 method for class 'coxph' coxVarCov(object) ## S3 method for class 'phreg' coxVarCov(object)
object |
The fitted Cox regression model object either
obtained with |
Should return NULL
if the Cox model has no covariate.
The rows and columns of the variance covariance matrix must be named with the names used in the design matrix.
Brice Ozenne [email protected]
Extract the name of the variables belonging to the linear predictor or used to form the strata
coxVariableName(object, model.frame)
coxVariableName(object, model.frame)
object |
The fitted Cox regression model object either
obtained with |
model.frame |
[data.frame] dataset containing all the relevant variables (entry, time to event, type of event, variables in the linear predictor, strata).
Output from |
Brice Ozenne [email protected]
Interface for fitting cause-specific Cox proportional hazard regression models in competing risk.
CSC(formula, data, cause, surv.type = "hazard", fitter = "coxph", ...)
CSC(formula, data, cause, surv.type = "hazard", fitter = "coxph", ...)
formula |
Either a single |
data |
A data in which to fit the models. |
cause |
The cause of interest. Defaults to the first cause (see Details). |
surv.type |
Either |
fitter |
Routine to fit the Cox regression models.
If |
... |
Arguments given to |
The causes and their order
are determined by prodlim::getStates()
applied to the Hist object.
models |
a list with the fitted (cause-specific) Cox regression objects |
response |
the event history response |
eventTimes |
the sorted (unique) event times |
surv.type |
the
value of |
theCause |
the cause of interest. see
|
causes |
the other causes |
Thomas A. Gerds [email protected] and Ulla B. Mogensen
B. Ozenne, A. L. Soerensen, T.H. Scheike, C.T. Torp-Pedersen, and T.A. Gerds. riskregression: Predicting the risk of an event using Cox regression models. R Journal, 9(2):440–460, 2017.
J Benichou and Mitchell H Gail. Estimates of absolute cause-specific risk in cohort studies. Biometrics, pages 813–826, 1990.
T.A. Gerds, T.H. Scheike, and P.K. Andersen. Absolute risk regression for competing risks: Interpretation, link functions, and prediction. Statistics in Medicine, 31(29):3921–3930, 2012.
library(prodlim) library(survival) data(Melanoma) ## fit two cause-specific Cox models ## different formula for the two causes fit1 <- CSC(list(Hist(time,status)~sex+age,Hist(time,status)~invasion+epicel+log(thick)), data=Melanoma) print(fit1) ## Not run: library(Publish) publish(fit1) ## End(Not run) ## model hazard of all cause mortality instead of hazard of type 2 fit1a <- CSC(list(Hist(time,status)~sex+age,Hist(time,status)~invasion+epicel+log(thick)), data=Melanoma, surv.type="surv") ## the predicted probabilities are similar plot(predictRisk(fit1,times=500,cause=1,newdata=Melanoma), predictRisk(fit1a,times=500,cause=1,newdata=Melanoma)) ## special case where cause 2 has no covariates fit1b <- CSC(list(Hist(time,status)~sex+age,Hist(time,status)~1), data=Melanoma) print(fit1b) predict(fit1b,cause=1,times=100,newdata=Melanoma) ## same formula for both causes fit2 <- CSC(Hist(time,status)~invasion+epicel+age, data=Melanoma) print(fit2) ## combine a cause-specific Cox regression model for cause 2 ## and a Cox regression model for the event-free survival: ## different formula for cause 2 and event-free survival fit3 <- CSC(list(Hist(time,status)~sex+invasion+epicel+age, Hist(time,status)~invasion+epicel+age), surv.type="surv", data=Melanoma) print(fit3) ## same formula for both causes fit4 <- CSC(Hist(time,status)~invasion+epicel+age, data=Melanoma, surv.type="surv") print(fit4) ## strata fit5 <- CSC(Hist(time,status)~invasion+epicel+age+strata(sex), data=Melanoma, surv.type="surv") print(fit5) ## sanity checks cox1 <- coxph(Surv(time,status==1)~invasion+epicel+age+strata(sex),data=Melanoma) cox2 <- coxph(Surv(time,status!=0)~invasion+epicel+age+strata(sex),data=Melanoma) all.equal(coef(cox1),coef(fit5$models[[1]])) all.equal(coef(cox2),coef(fit5$models[[2]])) ## predictions ## ## surv.type = "hazard": predictions for both causes can be extracted ## from the same fit fit2 <- CSC(Hist(time,status)~invasion+epicel+age, data=Melanoma) predict(fit2,cause=1,newdata=Melanoma[c(17,99,108),],times=c(100,1000,10000)) predictRisk(fit2,cause=1,newdata=Melanoma[c(17,99,108),],times=c(100,1000,10000)) predictRisk(fit2,cause=2,newdata=Melanoma[c(17,99,108),],times=c(100,1000,10000)) predict(fit2,cause=1,newdata=Melanoma[c(17,99,108),],times=c(100,1000,10000)) predict(fit2,cause=2,newdata=Melanoma[c(17,99,108),],times=c(100,1000,10000)) ## surv.type = "surv" we need to change the cause of interest library(survival) fit5.2 <- CSC(Hist(time,status)~invasion+epicel+age+strata(sex), data=Melanoma, surv.type="surv",cause=2) ## now this does not work try(predictRisk(fit5.2,cause=1,newdata=Melanoma,times=4)) ## but this does predictRisk(fit5.2,cause=2,newdata=Melanoma,times=100) predict(fit5.2,cause=2,newdata=Melanoma,times=100) predict(fit5.2,cause=2,newdata=Melanoma[4,],times=100)
library(prodlim) library(survival) data(Melanoma) ## fit two cause-specific Cox models ## different formula for the two causes fit1 <- CSC(list(Hist(time,status)~sex+age,Hist(time,status)~invasion+epicel+log(thick)), data=Melanoma) print(fit1) ## Not run: library(Publish) publish(fit1) ## End(Not run) ## model hazard of all cause mortality instead of hazard of type 2 fit1a <- CSC(list(Hist(time,status)~sex+age,Hist(time,status)~invasion+epicel+log(thick)), data=Melanoma, surv.type="surv") ## the predicted probabilities are similar plot(predictRisk(fit1,times=500,cause=1,newdata=Melanoma), predictRisk(fit1a,times=500,cause=1,newdata=Melanoma)) ## special case where cause 2 has no covariates fit1b <- CSC(list(Hist(time,status)~sex+age,Hist(time,status)~1), data=Melanoma) print(fit1b) predict(fit1b,cause=1,times=100,newdata=Melanoma) ## same formula for both causes fit2 <- CSC(Hist(time,status)~invasion+epicel+age, data=Melanoma) print(fit2) ## combine a cause-specific Cox regression model for cause 2 ## and a Cox regression model for the event-free survival: ## different formula for cause 2 and event-free survival fit3 <- CSC(list(Hist(time,status)~sex+invasion+epicel+age, Hist(time,status)~invasion+epicel+age), surv.type="surv", data=Melanoma) print(fit3) ## same formula for both causes fit4 <- CSC(Hist(time,status)~invasion+epicel+age, data=Melanoma, surv.type="surv") print(fit4) ## strata fit5 <- CSC(Hist(time,status)~invasion+epicel+age+strata(sex), data=Melanoma, surv.type="surv") print(fit5) ## sanity checks cox1 <- coxph(Surv(time,status==1)~invasion+epicel+age+strata(sex),data=Melanoma) cox2 <- coxph(Surv(time,status!=0)~invasion+epicel+age+strata(sex),data=Melanoma) all.equal(coef(cox1),coef(fit5$models[[1]])) all.equal(coef(cox2),coef(fit5$models[[2]])) ## predictions ## ## surv.type = "hazard": predictions for both causes can be extracted ## from the same fit fit2 <- CSC(Hist(time,status)~invasion+epicel+age, data=Melanoma) predict(fit2,cause=1,newdata=Melanoma[c(17,99,108),],times=c(100,1000,10000)) predictRisk(fit2,cause=1,newdata=Melanoma[c(17,99,108),],times=c(100,1000,10000)) predictRisk(fit2,cause=2,newdata=Melanoma[c(17,99,108),],times=c(100,1000,10000)) predict(fit2,cause=1,newdata=Melanoma[c(17,99,108),],times=c(100,1000,10000)) predict(fit2,cause=2,newdata=Melanoma[c(17,99,108),],times=c(100,1000,10000)) ## surv.type = "surv" we need to change the cause of interest library(survival) fit5.2 <- CSC(Hist(time,status)~invasion+epicel+age+strata(sex), data=Melanoma, surv.type="surv",cause=2) ## now this does not work try(predictRisk(fit5.2,cause=1,newdata=Melanoma,times=4)) ## but this does predictRisk(fit5.2,cause=2,newdata=Melanoma,times=100) predict(fit5.2,cause=2,newdata=Melanoma,times=100) predict(fit5.2,cause=2,newdata=Melanoma[4,],times=100)
The call is added to an ctree object
Ctree(...)
Ctree(...)
... |
passed to ctree |
list with two elements: ctree and call
Thomas A. Gerds <[email protected]>
Cforest
if (require("party",quietly=TRUE)){ library(prodlim) library(party) library(survival) set.seed(50) d <- SimSurv(50) nd <- data.frame(X1=c(0,1,0),X2=c(-1,0,1)) f <- Ctree(Surv(time,status)~X1+X2,data=d) predictRisk(f,newdata=nd,times=c(3,8)) }
if (require("party",quietly=TRUE)){ library(prodlim) library(party) library(survival) set.seed(50) d <- SimSurv(50) nd <- data.frame(X1=c(0,1,0),X2=c(-1,0,1)) f <- Ctree(Surv(time,status)~X1+X2,data=d) predictRisk(f,newdata=nd,times=c(3,8)) }
Find the root of a monotone function on a discrete grid of value using dichotomic search
discreteRoot( fn, grid, increasing = TRUE, check = TRUE, tol = .Machine$double.eps^0.5 )
discreteRoot( fn, grid, increasing = TRUE, check = TRUE, tol = .Machine$double.eps^0.5 )
fn |
[function] objective function to minimize in absolute value. |
grid |
[vector] possible minimizers. |
increasing |
[logical] is the function fn increasing? |
check |
[logical] should the program check that fn takes a different sign for the first vs. the last value of the grid? |
tol |
[numeric] the absolute convergence tolerance. |
Formula interface for Fine-Gray regression competing risk models.
FGR(formula, data, cause = 1, y = TRUE, ...)
FGR(formula, data, cause = 1, y = TRUE, ...)
formula |
A formula whose left hand side is a |
data |
A data.frame in which all the variables of |
cause |
The failure type of interest. Defaults to |
y |
logical value: if |
... |
... |
Formula interface for the function crr
from the cmprsk
package.
The function crr
allows to multiply some covariates by time before
they enter the linear predictor. This can be achieved with the formula
interface, however, the code becomes a little cumbersome. See the examples.
Note that FGR does not allow for delayed entry (left-truncation).
The assumed value for indicating censored observations in the event variable
is 0
. The function Hist
has an argument cens.code
which can change this (if you do not want to change the event variable).
See crr
.
Thomas Alexander Gerds [email protected]
Gerds, TA and Scheike, T and Andersen, PK (2011) Absolute risk regression for competing risks: interpretation, link functions and prediction Research report 11/7. Department of Biostatistics, University of Copenhagen
library(prodlim) library(survival) library(cmprsk) library(lava) d <- prodlim::SimCompRisk(100) f1 <- FGR(Hist(time,cause)~X1+X2,data=d) print(f1) ## crr allows that some covariates are multiplied by ## a function of time (see argument tf of crr) ## by FGR uses the identity matrix f2 <- FGR(Hist(time,cause)~cov2(X1)+X2,data=d) print(f2) ## same thing, but more explicit: f3 <- FGR(Hist(time,cause)~cov2(X1)+cov1(X2),data=d) print(f3) ## both variables can enter cov2: f4 <- FGR(Hist(time,cause)~cov2(X1)+cov2(X2),data=d) print(f4) ## change the function of time qFun <- function(x){x^2} noFun <- function(x){x} sqFun <- function(x){x^0.5} ## multiply X1 by time^2 and X2 by time: f5 <- FGR(Hist(time,cause)~cov2(X1,tf=qFun)+cov2(X2),data=d) print(f5) print(f5$crrFit) ## same results as crr with(d,crr(ftime=time, fstatus=cause, cov2=d[,c("X1","X2")], tf=function(time){cbind(qFun(time),time)})) ## still same result, but more explicit f5a <- FGR(Hist(time,cause)~cov2(X1,tf=qFun)+cov2(X2,tf=noFun),data=d) f5a$crrFit ## multiply X1 by time^2 and X2 by sqrt(time) f5b <- FGR(Hist(time,cause)~cov2(X1,tf=qFun)+cov2(X2,tf=sqFun),data=d,cause=1) ## additional arguments for crr f6<- FGR(Hist(time,cause)~X1+X2,data=d, cause=1,gtol=1e-5) f6 f6a<- FGR(Hist(time,cause)~X1+X2,data=d, cause=1,gtol=0.1) f6a
library(prodlim) library(survival) library(cmprsk) library(lava) d <- prodlim::SimCompRisk(100) f1 <- FGR(Hist(time,cause)~X1+X2,data=d) print(f1) ## crr allows that some covariates are multiplied by ## a function of time (see argument tf of crr) ## by FGR uses the identity matrix f2 <- FGR(Hist(time,cause)~cov2(X1)+X2,data=d) print(f2) ## same thing, but more explicit: f3 <- FGR(Hist(time,cause)~cov2(X1)+cov1(X2),data=d) print(f3) ## both variables can enter cov2: f4 <- FGR(Hist(time,cause)~cov2(X1)+cov2(X2),data=d) print(f4) ## change the function of time qFun <- function(x){x^2} noFun <- function(x){x} sqFun <- function(x){x^0.5} ## multiply X1 by time^2 and X2 by time: f5 <- FGR(Hist(time,cause)~cov2(X1,tf=qFun)+cov2(X2),data=d) print(f5) print(f5$crrFit) ## same results as crr with(d,crr(ftime=time, fstatus=cause, cov2=d[,c("X1","X2")], tf=function(time){cbind(qFun(time),time)})) ## still same result, but more explicit f5a <- FGR(Hist(time,cause)~cov2(X1,tf=qFun)+cov2(X2,tf=noFun),data=d) f5a$crrFit ## multiply X1 by time^2 and X2 by sqrt(time) f5b <- FGR(Hist(time,cause)~cov2(X1,tf=qFun)+cov2(X2,tf=sqFun),data=d,cause=1) ## additional arguments for crr f6<- FGR(Hist(time,cause)~X1+X2,data=d, cause=1,gtol=1e-5) f6 f6a<- FGR(Hist(time,cause)~X1+X2,data=d, cause=1,gtol=0.1) f6a
Parse hyperparameters for data splitting algorithm
getSplitMethod(split.method, B, N, M, seed)
getSplitMethod(split.method, B, N, M, seed)
split.method |
A character string specifying the algorithm for data splitting:
|
B |
Number of repetitions of bootstrap or k-fold cross-validation |
N |
Sample size |
M |
Subsample size. Default is N (no subsampling). |
seed |
Integer passed to set.seed. If not given or NA no seed is set. |
A list with the following elements:
split.methodName: the print name of the algorithm
split.method: the internal name of the algorithm
index: the index for data splitting. For bootstrap splitting this is a matrix with B columns and M rows identifying the in-bag subjects. For k-fold cross-validation this is a matrix with B columns identifying the membership to the k groups.
k: the k of k-fold cross-validation
N: the sample size
M: the subsample size
Thomas A. Gerds <[email protected]>
Score
# 3-fold crossvalidation getSplitMethod("cv3",B=4,N=37) # bootstrap with replacement getSplitMethod("loob",B=4,N=37) # bootstrap without replacement getSplitMethod("loob",B=4,N=37,M=20)
# 3-fold crossvalidation getSplitMethod("cv3",B=4,N=37) # bootstrap with replacement getSplitMethod("loob",B=4,N=37) # bootstrap without replacement getSplitMethod("loob",B=4,N=37,M=20)
Fit GLMnet models via a formula and a data set for use with predictRisk
.
GLMnet( formula, data, lambda = NULL, cv = TRUE, alpha = 1, nfolds = 10, type.measure = "deviance", family, ... )
GLMnet( formula, data, lambda = NULL, cv = TRUE, alpha = 1, nfolds = 10, type.measure = "deviance", family, ... )
formula |
A formula. |
data |
The data on which to fit the model. |
lambda |
The tuning parameters for GLMnet. If set to NULL, then it the parameters are chosen for you. |
cv |
Whether to use cross-validation or not. Default is TRUE. |
alpha |
The elasticnet mixing parameter. See the ?glmnet for more details. |
nfolds |
Number of folds for cross-validation. Default is 10. |
type.measure |
loss to use for cross-validation. Default is deviance. |
family |
passed to |
... |
Additional arguments that are passed on to the glmnet. |
Fit HAL models via a formula and a data set for use with predictRisk
.
Hal9001(formula, data, lambda = NULL, ...)
Hal9001(formula, data, lambda = NULL, ...)
formula |
A formula. |
data |
The data on which to fit the model. |
lambda |
The tuning parameters for HAL. If set to NULL, then it the parameters are chosen for you. |
... |
Additional arguments that are passed on to the hal_fit. |
Compute the decomposition in iid elements of the ML estimor of IPCW logistic regressions.
## S3 method for class 'wglm' iid(x, times = NULL, simplifies = TRUE, ...)
## S3 method for class 'wglm' iid(x, times = NULL, simplifies = TRUE, ...)
x |
a wglm object. |
times |
[numeric vector] time points at which the iid should be output. |
simplifies |
[logical] should the ouput be converted to a matrix when only one timepoint is requested. Otherwise will always return a list. |
... |
Not used. |
Compute the influence function for each observation used to estimate the model
iidCox( object, newdata, baseline.iid, tau.hazard, tau.max, store.iid, keep.times, return.object ) ## S3 method for class 'coxph' iidCox( object, newdata = NULL, baseline.iid = TRUE, tau.hazard = NULL, tau.max = NULL, store.iid = "full", keep.times = TRUE, return.object = TRUE ) ## S3 method for class 'cph' iidCox( object, newdata = NULL, baseline.iid = TRUE, tau.hazard = NULL, tau.max = NULL, store.iid = "full", keep.times = TRUE, return.object = TRUE ) ## S3 method for class 'phreg' iidCox( object, newdata = NULL, baseline.iid = TRUE, tau.hazard = NULL, tau.max = NULL, store.iid = "full", keep.times = TRUE, return.object = TRUE ) ## S3 method for class 'CauseSpecificCox' iidCox( object, newdata = NULL, baseline.iid = TRUE, tau.hazard = NULL, tau.max = NULL, store.iid = "full", keep.times = TRUE, return.object = TRUE )
iidCox( object, newdata, baseline.iid, tau.hazard, tau.max, store.iid, keep.times, return.object ) ## S3 method for class 'coxph' iidCox( object, newdata = NULL, baseline.iid = TRUE, tau.hazard = NULL, tau.max = NULL, store.iid = "full", keep.times = TRUE, return.object = TRUE ) ## S3 method for class 'cph' iidCox( object, newdata = NULL, baseline.iid = TRUE, tau.hazard = NULL, tau.max = NULL, store.iid = "full", keep.times = TRUE, return.object = TRUE ) ## S3 method for class 'phreg' iidCox( object, newdata = NULL, baseline.iid = TRUE, tau.hazard = NULL, tau.max = NULL, store.iid = "full", keep.times = TRUE, return.object = TRUE ) ## S3 method for class 'CauseSpecificCox' iidCox( object, newdata = NULL, baseline.iid = TRUE, tau.hazard = NULL, tau.max = NULL, store.iid = "full", keep.times = TRUE, return.object = TRUE )
object |
object The fitted Cox regression model object either
obtained with |
newdata |
[data.frame] Optional new data at which to do iid decomposition |
baseline.iid |
[logical] Should the influence function for the baseline hazard be computed. |
tau.hazard |
[numeric vector] the vector of times at which the i.i.d decomposition of the baseline hazard will be computed |
tau.max |
[numeric] latest time at which the i.i.d decomposition of the baseline hazard will be computed. Alternative to |
store.iid |
[character] the method used to compute the influence function and the standard error.
Can be |
keep.times |
[logical] If |
return.object |
[logical] If |
This function implements the first three formula (no number,10,11) of the subsection "Empirical estimates" in Ozenne et al. (2017). If there is no event in a strata, the influence function for the baseline hazard is set to 0.
Argument store.iid:
If n
denotes the sample size, J
the number of jump times, and p
the number of coefficients:
store.iid="full"
exports the influence function for the coefficients and the baseline hazard at each event time.
store.iid="minimal"
exports the influence function for the coefficients. For the
baseline hazard it only computes the quantities necessary to compute the influence function in order to save memory.
More details can be found in appendix B of Ozenne et al. (2017).
For Cox models, it returns the object with an additional iid slot (i.e. object$iid
).
It is a list containing:
IFbeta: Influence function for the regression coefficient.
IFhazard: Time differential of the influence function of the hazard.
IFcumhazard: Influence function of the cumulative hazard.
calcIFhazard: Elements used to compute the influence function at a given time.
time: Times at which the influence function has been evaluated.
etime1.min: Time of first event (i.e. jump) in each strata.
etime.max: Last observation time (i.e. jump or censoring) in each strata.
indexObs: Index of the observation in the original dataset.
For Cause-Specific Cox models,
it returns the object with an additional iid slot for each model (e.g. object$models[[1]]iid
).
Brice Ozenne, Anne Lyngholm Sorensen, Thomas Scheike, Christian Torp-Pedersen and Thomas Alexander Gerds. riskRegression: Predicting the Risk of an Event using Cox Regression Models. The R Journal (2017) 9:2, pages 440-460.
library(survival) library(data.table) library(prodlim) set.seed(10) d <- sampleData(100, outcome = "survival")[,.(eventtime,event,X1,X6)] setkey(d, eventtime) m.cox <- coxph(Surv(eventtime, event) ~ X1+X6, data = d, y = TRUE, x = TRUE) system.time(IF.cox <- iidCox(m.cox)) IF.cox.all <- iidCox(m.cox, tau.hazard = sort(unique(c(7,d$eventtime)))) IF.cox.beta <- iidCox(m.cox, baseline.iid = FALSE)
library(survival) library(data.table) library(prodlim) set.seed(10) d <- sampleData(100, outcome = "survival")[,.(eventtime,event,X1,X6)] setkey(d, eventtime) m.cox <- coxph(Surv(eventtime, event) ~ X1+X6, data = d, y = TRUE, x = TRUE) system.time(IF.cox <- iidCox(m.cox)) IF.cox.all <- iidCox(m.cox, tau.hazard = sort(unique(c(7,d$eventtime)))) IF.cox.beta <- iidCox(m.cox, baseline.iid = FALSE)
Compare two estimates using their influence function
influenceTest(object, ...) ## S3 method for class 'list' influenceTest( object, newdata, times, type, cause, keep.newdata = TRUE, keep.strata = FALSE, ... ) ## Default S3 method: influenceTest(object, object2, band = TRUE, ...)
influenceTest(object, ...) ## S3 method for class 'list' influenceTest( object, newdata, times, type, cause, keep.newdata = TRUE, keep.strata = FALSE, ... ) ## Default S3 method: influenceTest(object, object2, band = TRUE, ...)
object |
either a list of models or an object of class predictCox or predictCSC. |
... |
additional arguments to be passed to lower level functions. |
newdata |
[data.frame or data.table] Contain the values of the predictor variables defining subject specific predictions. |
times |
[numeric vector] Time points at which to return the estimated absolute risk. |
type |
[character]the type of predicted value. |
cause |
[integer/character] Identifies the cause of interest among the competing events. |
keep.newdata |
[logical] If |
keep.strata |
[logical] If |
object2 |
same as predict1 but for another model. |
band |
[logical] If |
library(lava) library(survival) library(prodlim) library(data.table) n <- 100 #### Under H1 set.seed(1) newdata <- data.frame(X1=0:1) ## simulate non proportional hazard using lava m <- lvm() regression(m) <- y ~ 1 regression(m) <- s ~ exp(-2*X1) distribution(m,~X1) <- binomial.lvm() distribution(m,~cens) <- coxWeibull.lvm(scale=1) distribution(m,~y) <- coxWeibull.lvm(scale=1,shape=~s) eventTime(m) <- eventtime ~ min(y=1,cens=0) d <- as.data.table(sim(m,n)) setkey(d, eventtime) ## fit cox models m.cox <- coxph(Surv(eventtime, status) ~ X1, data = d, y = TRUE, x = TRUE) mStrata.cox <- coxph(Surv(eventtime, status) ~ strata(X1), data = d, y = TRUE, x = TRUE) ## compare models # one time point outIF <- influenceTest(list(m.cox, mStrata.cox), type = "survival", newdata = newdata, times = 0.5) confint(outIF) # several timepoints outIF <- influenceTest(list(m.cox, mStrata.cox), type = "survival", newdata = newdata, times = c(0.5,1,1.5)) confint(outIF) #### Under H0 (Cox) #### set.seed(1) ## simulate proportional hazard using lava m <- lvm() regression(m) <- y ~ 1 distribution(m,~X1) <- binomial.lvm() distribution(m,~cens) <- coxWeibull.lvm() distribution(m,~y) <- coxWeibull.lvm() eventTime(m) <- eventtime ~ min(y=1,cens=0) d <- as.data.table(sim(m,n)) setkey(d, eventtime) ## fit cox models Utime <- sort(unique(d$eventtime)) m.cox <- coxph(Surv(eventtime, status) ~ X1, data = d, y = TRUE, x = TRUE) mStrata.cox <- coxph(Surv(eventtime, status) ~ strata(X1), data = d, y = TRUE, x = TRUE) p.cox <- predictCox(m.cox, newdata = newdata, time = Utime, type = "survival") p.coxStrata <- predictCox(mStrata.cox, newdata = newdata, time = Utime, type = "survival") ## display library(ggplot2) autoplot(p.cox) autoplot(p.coxStrata) ## compare models outIF <- influenceTest(list(m.cox, mStrata.cox), type = "survival", newdata = newdata, times = Utime[1:6]) confint(outIF) #### Under H0 (CSC) #### set.seed(1) ff <- ~ f(X1,2) + f(X2,-0.033) ff <- update(ff, ~ .+ f(X3,0) + f(X4,0) + f(X5,0)) ff <- update(ff, ~ .+ f(X6,0) + f(X7,0) + f(X8,0) + f(X9,0)) d <- sampleData(n, outcome = "competing.risk", formula = ff) d[,X1:=as.numeric(as.character(X1))] d[,X2:=as.numeric(as.character(X2))] d[,X3:=as.numeric(as.character(X3))] d[,X4:=as.numeric(as.character(X4))] d[,X5:=as.numeric(as.character(X5))] setkey(d, time) Utime <- sort(unique(d$time)) ## fit cox models m.CSC <- CSC(Hist(time, event) ~ X1 + X2, data = d) mStrata.CSC <- CSC(Hist(time, event) ~ strata(X1) + X2 + X3, data = d) ## compare models outIF <- influenceTest(list(m.CSC, mStrata.CSC), cause = 1, newdata = unique(d[,.(X1,X2,X3)]), times = Utime[1:5]) confint(outIF)
library(lava) library(survival) library(prodlim) library(data.table) n <- 100 #### Under H1 set.seed(1) newdata <- data.frame(X1=0:1) ## simulate non proportional hazard using lava m <- lvm() regression(m) <- y ~ 1 regression(m) <- s ~ exp(-2*X1) distribution(m,~X1) <- binomial.lvm() distribution(m,~cens) <- coxWeibull.lvm(scale=1) distribution(m,~y) <- coxWeibull.lvm(scale=1,shape=~s) eventTime(m) <- eventtime ~ min(y=1,cens=0) d <- as.data.table(sim(m,n)) setkey(d, eventtime) ## fit cox models m.cox <- coxph(Surv(eventtime, status) ~ X1, data = d, y = TRUE, x = TRUE) mStrata.cox <- coxph(Surv(eventtime, status) ~ strata(X1), data = d, y = TRUE, x = TRUE) ## compare models # one time point outIF <- influenceTest(list(m.cox, mStrata.cox), type = "survival", newdata = newdata, times = 0.5) confint(outIF) # several timepoints outIF <- influenceTest(list(m.cox, mStrata.cox), type = "survival", newdata = newdata, times = c(0.5,1,1.5)) confint(outIF) #### Under H0 (Cox) #### set.seed(1) ## simulate proportional hazard using lava m <- lvm() regression(m) <- y ~ 1 distribution(m,~X1) <- binomial.lvm() distribution(m,~cens) <- coxWeibull.lvm() distribution(m,~y) <- coxWeibull.lvm() eventTime(m) <- eventtime ~ min(y=1,cens=0) d <- as.data.table(sim(m,n)) setkey(d, eventtime) ## fit cox models Utime <- sort(unique(d$eventtime)) m.cox <- coxph(Surv(eventtime, status) ~ X1, data = d, y = TRUE, x = TRUE) mStrata.cox <- coxph(Surv(eventtime, status) ~ strata(X1), data = d, y = TRUE, x = TRUE) p.cox <- predictCox(m.cox, newdata = newdata, time = Utime, type = "survival") p.coxStrata <- predictCox(mStrata.cox, newdata = newdata, time = Utime, type = "survival") ## display library(ggplot2) autoplot(p.cox) autoplot(p.coxStrata) ## compare models outIF <- influenceTest(list(m.cox, mStrata.cox), type = "survival", newdata = newdata, times = Utime[1:6]) confint(outIF) #### Under H0 (CSC) #### set.seed(1) ff <- ~ f(X1,2) + f(X2,-0.033) ff <- update(ff, ~ .+ f(X3,0) + f(X4,0) + f(X5,0)) ff <- update(ff, ~ .+ f(X6,0) + f(X7,0) + f(X8,0) + f(X9,0)) d <- sampleData(n, outcome = "competing.risk", formula = ff) d[,X1:=as.numeric(as.character(X1))] d[,X2:=as.numeric(as.character(X2))] d[,X3:=as.numeric(as.character(X3))] d[,X4:=as.numeric(as.character(X4))] d[,X5:=as.numeric(as.character(X5))] setkey(d, time) Utime <- sort(unique(d$time)) ## fit cox models m.CSC <- CSC(Hist(time, event) ~ X1 + X2, data = d) mStrata.CSC <- CSC(Hist(time, event) ~ strata(X1) + X2 + X3, data = d) ## compare models outIF <- influenceTest(list(m.CSC, mStrata.CSC), cause = 1, newdata = unique(d[,.(X1,X2,X3)]), times = Utime[1:5]) confint(outIF)
Compute the information (i.e. opposit of the expectation of the second derivative of the log-likelihood) for IPCW logistic regressions.
## S3 method for class 'wglm' information(x, times = NULL, simplifies = TRUE, ...)
## S3 method for class 'wglm' information(x, times = NULL, simplifies = TRUE, ...)
x |
a wglm object. |
times |
[numeric vector] time points at which the score should be output. |
simplifies |
[logical] should the ouput be converted to a matrix when only one timepoint is requested. Otherwise will always return a list. |
... |
Not used. |
Index of Prediction Accuracy: General R^2 for binary outcome and right censored time to event (survival) outcome also with competing risks
rsquared(object,...) IPA(object,...) ## Default S3 method: rsquared(object,formula,newdata,times,cause,...) ## S3 method for class 'glm' rsquared(object,formula,newdata,...) ## S3 method for class 'coxph' rsquared(object,formula,newdata,times,...) ## S3 method for class 'CauseSpecificCox' rsquared(object,formula,newdata,times,cause,...) ## Default S3 method: IPA(object,formula,newdata,times,cause,...) ## S3 method for class 'glm' IPA(object,formula,newdata,...) ## S3 method for class 'coxph' IPA(object,formula,newdata,times,...) ## S3 method for class 'CauseSpecificCox' IPA(object,formula,newdata,times,cause,...)
rsquared(object,...) IPA(object,...) ## Default S3 method: rsquared(object,formula,newdata,times,cause,...) ## S3 method for class 'glm' rsquared(object,formula,newdata,...) ## S3 method for class 'coxph' rsquared(object,formula,newdata,times,...) ## S3 method for class 'CauseSpecificCox' rsquared(object,formula,newdata,times,cause,...) ## Default S3 method: IPA(object,formula,newdata,times,cause,...) ## S3 method for class 'glm' IPA(object,formula,newdata,...) ## S3 method for class 'coxph' IPA(object,formula,newdata,times,...) ## S3 method for class 'CauseSpecificCox' IPA(object,formula,newdata,times,cause,...)
object |
Model for which we want IPA. |
... |
passed to |
newdata |
Optional validation data set in which to compute IPA |
formula |
Formula passed to |
cause |
For competing risk models the event of interest |
times |
Vector of time points used as prediction horizon for the computation of Brier scores. |
IPA (R^2) is calculated based on the model's predicted risks. The Brier score of the model is compared to the Brier score of the null model.
Data frame with explained variation values for the full model.
Thomas A. Gerds <[email protected]>
Score
library(prodlim) library(data.table) # binary outcome library(lava) set.seed(18) learndat <- sampleData(48,outcome="binary") lr1 = glm(Y~X1+X2+X7+X9,data=learndat,family=binomial) IPA(lr1) ## validation data valdat=sampleData(94,outcome="binary") IPA(lr1,newdata=valdat) ## predicted risks externally given p1=predictRisk(lr1,newdata=valdat) IPA(p1,formula=Y~1,valdat) # survival library(survival) data(pbc) pbc=na.omit(pbc) pbctest=(1:NROW(pbc)) %in% sample(1:NROW(pbc),size=.632*NROW(pbc)) pbclearn=pbc[pbctest,] cox1= coxph(Surv(time,status!=0)~age+sex+log(bili)+log(albumin)+log(protime), data=pbclearn,x=TRUE) ## same data IPA(cox1,formula=Surv(time,status!=0)~1,times=1000) ## validation data pbcval=pbc[!pbctest,] IPA(cox1,formula=Surv(time,status!=0)~1,newdata=pbcval,times=1000) ## predicted risks externally given p2=predictRisk(cox1,newdata=pbcval,times=1000) IPA(cox1,formula=Surv(time,status!=0)~1,newdata=pbcval,times=1000) # competing risks data(Melanoma) Melanomatest=(1:NROW(Melanoma)) %in% sample(1:NROW(Melanoma),size=.632*NROW(Melanoma)) Melanomalearn=Melanoma[Melanomatest,] fit1 <- CSC(list(Hist(time,status)~sex, Hist(time,status)~invasion+epicel+age), data=Melanoma) IPA(fit1,times=1000,cause=2) ## validation data Melanomaval=Melanoma[!Melanomatest,] IPA(fit1,formula=Hist(time,status)~1,newdata=Melanomaval,times=1000) ## predicted risks externally given p3= predictRisk(fit1,cause=1,newdata=Melanomaval,times=1000) IPA(p3,formula=Hist(time,status)~1,cause=1,newdata=Melanomaval,times=1000)
library(prodlim) library(data.table) # binary outcome library(lava) set.seed(18) learndat <- sampleData(48,outcome="binary") lr1 = glm(Y~X1+X2+X7+X9,data=learndat,family=binomial) IPA(lr1) ## validation data valdat=sampleData(94,outcome="binary") IPA(lr1,newdata=valdat) ## predicted risks externally given p1=predictRisk(lr1,newdata=valdat) IPA(p1,formula=Y~1,valdat) # survival library(survival) data(pbc) pbc=na.omit(pbc) pbctest=(1:NROW(pbc)) %in% sample(1:NROW(pbc),size=.632*NROW(pbc)) pbclearn=pbc[pbctest,] cox1= coxph(Surv(time,status!=0)~age+sex+log(bili)+log(albumin)+log(protime), data=pbclearn,x=TRUE) ## same data IPA(cox1,formula=Surv(time,status!=0)~1,times=1000) ## validation data pbcval=pbc[!pbctest,] IPA(cox1,formula=Surv(time,status!=0)~1,newdata=pbcval,times=1000) ## predicted risks externally given p2=predictRisk(cox1,newdata=pbcval,times=1000) IPA(cox1,formula=Surv(time,status!=0)~1,newdata=pbcval,times=1000) # competing risks data(Melanoma) Melanomatest=(1:NROW(Melanoma)) %in% sample(1:NROW(Melanoma),size=.632*NROW(Melanoma)) Melanomalearn=Melanoma[Melanomatest,] fit1 <- CSC(list(Hist(time,status)~sex, Hist(time,status)~invasion+epicel+age), data=Melanoma) IPA(fit1,times=1000,cause=2) ## validation data Melanomaval=Melanoma[!Melanomatest,] IPA(fit1,formula=Hist(time,status)~1,newdata=Melanomaval,times=1000) ## predicted risks externally given p3= predictRisk(fit1,cause=1,newdata=Melanomaval,times=1000) IPA(p3,formula=Hist(time,status)~1,cause=1,newdata=Melanomaval,times=1000)
This function is used internally to obtain inverse of the probability of censoring weights.
ipcw( formula, data, method, args, times, subject.times, lag = 1, what, keep = NULL )
ipcw( formula, data, method, args, times, subject.times, lag = 1, what, keep = NULL )
formula |
A survival formula like, |
data |
The data used for fitting the censoring model |
method |
Censoring model used for estimation of the (conditional) censoring distribution. |
args |
A list of arguments which is passed to method |
times |
For |
subject.times |
For |
lag |
If equal to |
what |
Decide about what to do: If equal to
|
keep |
Which elements to add to the output. Any subset of the vector |
Inverse of the probability of censoring weights (IPCW) usually refer to the probabilities of not being censored at certain time points. These probabilities are also the values of the conditional survival function of the censoring time given covariates. The function ipcw estimates the conditional survival function of the censoring times and derives the weights.
IMPORTANT: the data set should be ordered, order(time,-status)
in
order to get the values IPCW.subject.times
in the right order for some
choices of method
.
A list with elements depending on argument keep
.
times |
The times at which weights are estimated |
IPCW.times |
Estimated weights at |
IPCW.subject.times |
Estimated weights at individual time values
|
fit |
The fitted censoring model |
method |
The method for modelling the censoring distribution |
call |
The call |
Thomas A. Gerds [email protected]
library(prodlim) library(rms) dat=SimSurv(30) dat <- dat[order(dat$time),] # using the marginal Kaplan-Meier for the censoring times WKM=ipcw(Hist(time,status)~X2, data=dat, method="marginal", times=sort(unique(dat$time)), subject.times=dat$time,keep=c("fit")) plot(WKM$fit) WKM$fit # using the Cox model for the censoring times given X2 library(survival) WCox=ipcw(Hist(time=time,event=status)~X2, data=dat, method="cox", times=sort(unique(dat$time)), subject.times=dat$time,keep=c("fit")) WCox$fit plot(WKM$fit) lines(sort(unique(dat$time)), 1-WCox$IPCW.times[1,], type="l", col=2, lty=3, lwd=3) lines(sort(unique(dat$time)), 1-WCox$IPCW.times[5,], type="l", col=3, lty=3, lwd=3) # using the stratified Kaplan-Meier # for the censoring times given X2 WKM2=ipcw(Hist(time,status)~X2, data=dat, method="nonpar", times=sort(unique(dat$time)), subject.times=dat$time,keep=c("fit")) plot(WKM2$fit,add=FALSE)
library(prodlim) library(rms) dat=SimSurv(30) dat <- dat[order(dat$time),] # using the marginal Kaplan-Meier for the censoring times WKM=ipcw(Hist(time,status)~X2, data=dat, method="marginal", times=sort(unique(dat$time)), subject.times=dat$time,keep=c("fit")) plot(WKM$fit) WKM$fit # using the Cox model for the censoring times given X2 library(survival) WCox=ipcw(Hist(time=time,event=status)~X2, data=dat, method="cox", times=sort(unique(dat$time)), subject.times=dat$time,keep=c("fit")) WCox$fit plot(WKM$fit) lines(sort(unique(dat$time)), 1-WCox$IPCW.times[1,], type="l", col=2, lty=3, lwd=3) lines(sort(unique(dat$time)), 1-WCox$IPCW.times[5,], type="l", col=3, lty=3, lwd=3) # using the stratified Kaplan-Meier # for the censoring times given X2 WKM2=ipcw(Hist(time,status)~X2, data=dat, method="nonpar", times=sort(unique(dat$time)), subject.times=dat$time,keep=c("fit")) plot(WKM2$fit,add=FALSE)
In the period 1962-77, 205 patients with malignant melanoma (cancer of the skin) had a radical operation performed at Odense University Hospital, Denmark. All patients were followed until the end of 1977 by which time 134 were still alive while 71 had died (of out whom 57 had died from cancer and 14 from other causes).
A data frame with 205 observations on the following 12 variables.
time in days from operation
a numeric with values 0=censored
1=death.malignant.melanoma
2=death.other.causes
a factor with levels censored
death.malignant.melanoma
death.other.causes
a factor with levels level.0
, level.1
, level.2
inflammatory cell infiltration (IFI): 0, 1, 2 or 3
a factor with levels not present
present
a factor with levels not present
present
tumour thickness (in 1/100 mm)
a factor with levels Female
Male
age at operation (years)
tumour thickness on log-scale
The object of the study was to assess the effect of risk factors on survival. Among such risk factors were the sex and age of the patients and the histological variables tumor thickness and ulceration (absent vs. present).
Regression with linear predictors (2010)
Andersen, P.K. and Skovgaard, L.T.
Springer Verlag
data(Melanoma)
data(Melanoma)
Extract design matrix for cph objects
## S3 method for class 'cph' model.matrix(object, data, ...)
## S3 method for class 'cph' model.matrix(object, data, ...)
object |
a cph object. |
data |
a dataset. |
... |
not used |
Extract design matrix for phreg objects
## S3 method for class 'phreg' model.matrix(object, data, ...)
## S3 method for class 'phreg' model.matrix(object, data, ...)
object |
a phreg object. |
data |
a dataset. |
... |
not used |
mainly a copy paste of the begining of the phreg
function.
PAQUID is a prospective cohort study initiated in 1988 in South Western France to explore functional and cerebral ageing. This sample includes n=2561 subjects. Data contains a time-to-event, a type of event and two cognitive scores measured at baseline.
A data frame with 2561 observations on the following 4 variables.
time
the time-to-event (in years).
status
the type of event 0
= censored, 1
= dementia onset and 2
= death without dementia.
DSST
score at the Digit Symbol Substitution Score Test. This test explores attention and psychomotor speed.
MMSE
score at the Mini Mental State Examination. This test is often used as an index of global cognitive performance.
The data have been first made publicly available via the package timeROC.
Dartigues, J., Gagnon, M., Barberger-Gateau, P., Letenneur, L., Commenges, D., Sauvel, C., Michel, P., and Salamon, R. (1992). The paquid epidemiological program on brain ageing. Neuroepidemiology, 11(1):14–18.
Blanche, P., Dartigues, J. F., & Jacqmin-Gadda, H. (2013). Estimating and comparing time-dependent areas under receiver operating characteristic curves for censored event times with competing risks. Statistics in Medicine, 32(30), 5381-5397.
data(Paquid)
data(Paquid)
S3-wrapper for S4 function penalized
penalizedS3(formula, data, type = "elastic.net", lambda1, lambda2, fold, ...)
penalizedS3(formula, data, type = "elastic.net", lambda1, lambda2, fold, ...)
formula |
Communicated outcome and explanatory variables. See examples. |
data |
Data set in which formula is to be interpreted |
type |
String specifying the type of penalization. Should match one of the following values:
|
lambda1 |
Lasso penalty |
lambda2 |
ridge penalty |
fold |
passed to |
... |
Arguments passed to penalized |
library(prodlim) ## Not run: ## too slow if (require("penalized",quietly=TRUE)){ library(penalized) set.seed(8) d <- sampleData(200,outcome="binary") newd <- sampleData(80,outcome="binary") fitridge <- penalizedS3(Y~X1+X2+pen(7:8), data=d, type="ridge", standardize=TRUE, model="logistic",trace=FALSE) fitlasso <- penalizedS3(Y~X1+X2+pen(7:8), data=d, type="lasso", standardize=TRUE, model="logistic",trace=FALSE) # fitnet <- penalizedS3(Y~X1+X2+pen(7:8), data=d, type="elastic.net", # standardize=TRUE, model="logistic",trace=FALSE) predictRisk(fitridge,newdata=newd) predictRisk(fitlasso,newdata=newd) # predictRisk(fitnet,newdata=newd) Score(list(fitridge),data=newd,formula=Y~1) Score(list(fitridge),data=newd,formula=Y~1,split.method="bootcv",B=2) data(nki70) ## S4 fit fitS4 <- penalized(Surv(time, event), penalized = nki70[,8:77], unpenalized = ~ER+Age+Diam+N+Grade, data = nki70, lambda1 = 1) fitS3 <- penalizedS3(Surv(time,event)~ER+Age+Diam+pen(8:77)+N+Grade, data=nki70, lambda1=1) ## or penS3 <- penalizedS3(Surv(time,event)~ER+pen(TSPYL5,Contig63649_RC)+pen(10:77)+N+Grade, data=nki70, lambda1=1) ## also this works penS3 <- penalizedS3(Surv(time,event)~ER+Age+pen(8:33)+Diam+pen(34:77)+N+Grade, data=nki70, lambda1=1) } ## End(Not run)
library(prodlim) ## Not run: ## too slow if (require("penalized",quietly=TRUE)){ library(penalized) set.seed(8) d <- sampleData(200,outcome="binary") newd <- sampleData(80,outcome="binary") fitridge <- penalizedS3(Y~X1+X2+pen(7:8), data=d, type="ridge", standardize=TRUE, model="logistic",trace=FALSE) fitlasso <- penalizedS3(Y~X1+X2+pen(7:8), data=d, type="lasso", standardize=TRUE, model="logistic",trace=FALSE) # fitnet <- penalizedS3(Y~X1+X2+pen(7:8), data=d, type="elastic.net", # standardize=TRUE, model="logistic",trace=FALSE) predictRisk(fitridge,newdata=newd) predictRisk(fitlasso,newdata=newd) # predictRisk(fitnet,newdata=newd) Score(list(fitridge),data=newd,formula=Y~1) Score(list(fitridge),data=newd,formula=Y~1,split.method="bootcv",B=2) data(nki70) ## S4 fit fitS4 <- penalized(Surv(time, event), penalized = nki70[,8:77], unpenalized = ~ER+Age+Diam+N+Grade, data = nki70, lambda1 = 1) fitS3 <- penalizedS3(Surv(time,event)~ER+Age+Diam+pen(8:77)+N+Grade, data=nki70, lambda1=1) ## or penS3 <- penalizedS3(Surv(time,event)~ER+pen(TSPYL5,Contig63649_RC)+pen(10:77)+N+Grade, data=nki70, lambda1=1) ## also this works penS3 <- penalizedS3(Surv(time,event)~ER+Age+pen(8:33)+Diam+pen(34:77)+N+Grade, data=nki70, lambda1=1) } ## End(Not run)
Show predicted risk obtained by a risk prediction model as a function of time.
## S3 method for class 'riskRegression' plot(x, cause, newdata, xlab, ylab, xlim, ylim, lwd, col, lty, axes=TRUE, percent=TRUE, legend=TRUE, add=FALSE, ...)
## S3 method for class 'riskRegression' plot(x, cause, newdata, xlab, ylab, xlim, ylim, lwd, col, lty, axes=TRUE, percent=TRUE, legend=TRUE, add=FALSE, ...)
x |
Fitted object obtained with one of |
cause |
For CauseSpecificCox models the cause of interest. |
newdata |
A data frame containing predictor variable combinations for which to compute predicted risk. |
xlab |
See |
ylab |
See |
xlim |
See |
ylim |
See |
lwd |
A vector of line thicknesses for the regression coefficients. |
col |
A vector of colors for the regression coefficients. |
lty |
A vector of line types for the regression coefficients. |
axes |
Logical. If |
percent |
If true the y-axis is labeled in percent. |
legend |
If true draw a legend. |
add |
Logical. If |
... |
Used for transclusion of smart arguments for |
Thomas Alexander Gerds <[email protected]>
library(survival) library(prodlim) data(Melanoma) fit.arr <- ARR(Hist(time,status)~invasion+age+strata(sex),data=Melanoma,cause=1) plot(fit.arr,xlim=c(500,3000))
library(survival) library(prodlim) data(Melanoma) fit.arr <- ARR(Hist(time,status)~invasion+age+strata(sex),data=Melanoma,cause=1) plot(fit.arr,xlim=c(500,3000))
Plot of time-dependent AUC curves
plotAUC( x, models, which = "score", xlim, ylim, xlab, ylab, col, lwd, lty = 1, cex = 1, pch = 1, type = "l", axes = 1L, percent = 1L, conf.int = 0L, legend = 1L, ... )
plotAUC( x, models, which = "score", xlim, ylim, xlab, ylab, col, lwd, lty = 1, cex = 1, pch = 1, type = "l", axes = 1L, percent = 1L, conf.int = 0L, legend = 1L, ... )
x |
Object obtained with |
models |
Choice of models to plot |
which |
Character. Either |
xlim |
Limits for x-axis |
ylim |
Limits for y-axis |
xlab |
Label for x-axis |
ylab |
Label for y-axis |
col |
line color |
lwd |
line width |
lty |
line style |
cex |
point size |
pch |
point style |
type |
line type |
axes |
Logical. If |
percent |
Logical. If |
conf.int |
Logical. If |
legend |
Logical. If |
... |
Used for additional control of the subroutines: plot, |
set.seed(9) library(survival) library(prodlim) set.seed(10) d=sampleData(100,outcome="survival") nd=sampleData(100,outcome="survival") f1=coxph(Surv(time,event)~X1+X6+X8,data=d,x=TRUE,y=TRUE) f2=coxph(Surv(time,event)~X2+X5+X9,data=d,x=TRUE,y=TRUE) xx=Score(list("X1+X6+X8"=f1,"X2+X5+X9"=f2), formula=Surv(time,event)~1, data=nd, metrics="auc", null.model=FALSE, times=seq(3:10)) aucgraph <- plotAUC(xx) plotAUC(xx,conf.int=TRUE) ## difference between plotAUC(xx,which="contrasts",conf.int=TRUE)
set.seed(9) library(survival) library(prodlim) set.seed(10) d=sampleData(100,outcome="survival") nd=sampleData(100,outcome="survival") f1=coxph(Surv(time,event)~X1+X6+X8,data=d,x=TRUE,y=TRUE) f2=coxph(Surv(time,event)~X2+X5+X9,data=d,x=TRUE,y=TRUE) xx=Score(list("X1+X6+X8"=f1,"X2+X5+X9"=f2), formula=Surv(time,event)~1, data=nd, metrics="auc", null.model=FALSE, times=seq(3:10)) aucgraph <- plotAUC(xx) plotAUC(xx,conf.int=TRUE) ## difference between plotAUC(xx,which="contrasts",conf.int=TRUE)
Plot Brier score curves
plotBrier( x, models, which = "score", xlim, ylim, xlab, ylab, col, lwd, lty = 1, cex = 1, pch = 1, type = "l", axes = 1L, percent = 1L, conf.int = 0L, legend = 1L, ... )
plotBrier( x, models, which = "score", xlim, ylim, xlab, ylab, col, lwd, lty = 1, cex = 1, pch = 1, type = "l", axes = 1L, percent = 1L, conf.int = 0L, legend = 1L, ... )
x |
Object obtained with |
models |
Choice of models to plot |
which |
Character. Either |
xlim |
Limits for x-axis |
ylim |
Limits for y-axis |
xlab |
Label for x-axis |
ylab |
Label for y-axis |
col |
line color |
lwd |
line width |
lty |
line style |
cex |
point size |
pch |
point style |
type |
line type |
axes |
Logical. If |
percent |
Logical. If |
conf.int |
Logical. If |
legend |
Logical. If |
... |
Used for additional control of the subroutines: plot,
axis, lines, legend. See |
# survival library(survival) library(prodlim) set.seed(7) ds1=sampleData(40,outcome="survival") ds2=sampleData(40,outcome="survival") f1 <- coxph(Surv(time,event)~X1+X3+X5+X7+X9,data=ds1,x=TRUE) f2 <- coxph(Surv(time,event)~X2+X4+X6+X8+X10,data=ds1,x=TRUE) xscore <- Score(list(f1,f2),formula=Hist(time,event)~1,data=ds2,times=0:12,metrics="brier") plotBrier(xscore)
# survival library(survival) library(prodlim) set.seed(7) ds1=sampleData(40,outcome="survival") ds2=sampleData(40,outcome="survival") f1 <- coxph(Surv(time,event)~X1+X3+X5+X7+X9,data=ds1,x=TRUE) f2 <- coxph(Surv(time,event)~X2+X4+X6+X8+X10,data=ds1,x=TRUE) xscore <- Score(list(f1,f2),formula=Hist(time,event)~1,data=ds2,times=0:12,metrics="brier") plotBrier(xscore)
Plot Calibration curves for risk prediction models
plotCalibration( x, models, times, method = "nne", cens.method, round = TRUE, bandwidth = NULL, q = 10, bars = FALSE, hanging = FALSE, names = "quantiles", pseudo = FALSE, rug, show.frequencies = FALSE, plot = TRUE, add = FALSE, diag = !add, legend = !add, auc.in.legend, brier.in.legend, axes = !add, xlim = c(0, 1), ylim = c(0, 1), xlab = ifelse(bars, "Risk groups", "Predicted risk"), ylab, col, lwd, lty, pch, type, percent = TRUE, na.action = na.fail, cex = 1, ... )
plotCalibration( x, models, times, method = "nne", cens.method, round = TRUE, bandwidth = NULL, q = 10, bars = FALSE, hanging = FALSE, names = "quantiles", pseudo = FALSE, rug, show.frequencies = FALSE, plot = TRUE, add = FALSE, diag = !add, legend = !add, auc.in.legend, brier.in.legend, axes = !add, xlim = c(0, 1), ylim = c(0, 1), xlab = ifelse(bars, "Risk groups", "Predicted risk"), ylab, col, lwd, lty, pch, type, percent = TRUE, na.action = na.fail, cex = 1, ... )
x |
Object obtained with function |
models |
Choice of models to plot |
times |
Time point specifying the prediction horizon. |
method |
The method for estimating the calibration curve(s):
|
cens.method |
For right censored data only. How observed proportions are calculated. Either
|
round |
If |
bandwidth |
The bandwidth for |
q |
The number of quantiles for |
bars |
If |
hanging |
Barplots only. If |
names |
Barplots only. Names argument passed to
|
pseudo |
If |
rug |
If |
show.frequencies |
Barplots only. If |
plot |
If |
add |
If |
diag |
If |
legend |
Logical. If |
auc.in.legend |
Logical. If |
brier.in.legend |
Logical. If |
axes |
If |
xlim |
Limits of x-axis. |
ylim |
Limits of y-axis. |
xlab |
Label for y-axis. |
ylab |
Label for x-axis. |
col |
Vector with colors, one for each element of
object. Passed to |
lwd |
Vector with line widths, one for each element of
object. Passed to |
lty |
lwd Vector with line style, one for each element of
object. Passed to |
pch |
Passed to |
type |
Passed to |
percent |
If TRUE axes labels are multiplied by 100 and thus interpretable on a percent scale. |
na.action |
what to do with NA values. Passed to
|
cex |
Default cex used for legend and labels. |
... |
Used to control the subroutines: plot, axis, lines,
barplot, legend, addtable2plot, points (pseudo values), rug. See
|
In uncensored data, the observed frequency of the outcome event is calculated locally at the predicted risk. In right censored data with and without competing risks, the actual risk is calculated using the Kaplan-Meier and the Aalen-Johansen method, respectively, locally at the predicted risk.
library(prodlim) # binary set.seed(10) db=sampleData(100,outcome="binary") fb1=glm(Y~X1+X5+X7,data=db,family="binomial") fb2=glm(Y~X1+X3+X6+X7,data=db,family="binomial") xb=Score(list(model1=fb1,model2=fb2),Y~1,data=db, plots="cal") plotCalibration(xb,brier.in.legend=TRUE) plotCalibration(xb,bars=TRUE,model="model1") plotCalibration(xb,models=1,bars=TRUE,names.cex=1.3) # survival library(survival) library(prodlim) dslearn=sampleData(56,outcome="survival") dstest=sampleData(100,outcome="survival") fs1=coxph(Surv(time,event)~X1+X5+X7,data=dslearn,x=1) fs2=coxph(Surv(time,event)~strata(X1)+X3+X6+X7,data=dslearn,x=1) xs=Score(list(Cox1=fs1,Cox2=fs2),Surv(time,event)~1,data=dstest, plots="cal",metrics=NULL) plotCalibration(xs) plotCalibration(xs,cens.method="local",pseudo=1) plotCalibration(xs,method="quantile") # competing risks ## Not run: data(Melanoma) f1 <- CSC(Hist(time,status)~age+sex+epicel+ulcer,data=Melanoma) f2 <- CSC(Hist(time,status)~age+sex+logthick+epicel+ulcer,data=Melanoma) x <- Score(list(model1=f1,model2=f2),Hist(time,status)~1,data=Melanoma, cause= 2,times=5*365.25,plots="cal") plotCalibration(x) ## End(Not run)
library(prodlim) # binary set.seed(10) db=sampleData(100,outcome="binary") fb1=glm(Y~X1+X5+X7,data=db,family="binomial") fb2=glm(Y~X1+X3+X6+X7,data=db,family="binomial") xb=Score(list(model1=fb1,model2=fb2),Y~1,data=db, plots="cal") plotCalibration(xb,brier.in.legend=TRUE) plotCalibration(xb,bars=TRUE,model="model1") plotCalibration(xb,models=1,bars=TRUE,names.cex=1.3) # survival library(survival) library(prodlim) dslearn=sampleData(56,outcome="survival") dstest=sampleData(100,outcome="survival") fs1=coxph(Surv(time,event)~X1+X5+X7,data=dslearn,x=1) fs2=coxph(Surv(time,event)~strata(X1)+X3+X6+X7,data=dslearn,x=1) xs=Score(list(Cox1=fs1,Cox2=fs2),Surv(time,event)~1,data=dstest, plots="cal",metrics=NULL) plotCalibration(xs) plotCalibration(xs,cens.method="local",pseudo=1) plotCalibration(xs,method="quantile") # competing risks ## Not run: data(Melanoma) f1 <- CSC(Hist(time,status)~age+sex+epicel+ulcer,data=Melanoma) f2 <- CSC(Hist(time,status)~age+sex+logthick+epicel+ulcer,data=Melanoma) x <- Score(list(model1=f1,model2=f2),Hist(time,status)~1,data=Melanoma, cause= 2,times=5*365.25,plots="cal") plotCalibration(x) ## End(Not run)
Plot time-varying effects from a risk regression model.
plotEffects( x, formula, level, ref.line = TRUE, conf.int = 0.95, xlim, ylim, xlab = "Time", ylab = "Cumulative coefficient", col, lty, lwd, add = FALSE, legend, axes = TRUE, ... )
plotEffects( x, formula, level, ref.line = TRUE, conf.int = 0.95, xlim, ylim, xlab = "Time", ylab = "Cumulative coefficient", col, lty, lwd, add = FALSE, legend, axes = TRUE, ... )
x |
Fitted object obtained with one of |
formula |
A formula to specify the variable(s) whose regression coefficients should be plotted. |
level |
For categorical variables the level (group) whose contrast to the reference level (group) should be plotted. |
ref.line |
Logical. If |
conf.int |
Logical. If |
xlim |
See |
ylim |
See |
xlab |
See |
ylab |
See |
col |
A vector of colors for the regression coefficients. |
lty |
A vector of line types for the regression coefficients. |
lwd |
A vector of line thicknesses for the regression coefficients. |
add |
Logical. If |
legend |
Logical. If |
axes |
Logical. If |
... |
Used for transclusion of smart arguments for |
Thomas H. Scheike [email protected]
Thomas A. Gerds [email protected]
library(survival) library(prodlim) data(Melanoma) fit.tarr <- ARR(Hist(time,status)~strata(sex), data=Melanoma, cause=1) plotEffects(fit.tarr) fit.tarr <- ARR(Hist(time,status)~strata(sex)+strata(invasion), data=Melanoma, cause=1, times=seq(800,3000,20)) plotEffects(fit.tarr,formula=~sex) plotEffects(fit.tarr,formula=~invasion) plotEffects(fit.tarr, formula=~invasion, level="invasionlevel.1") ## legend arguments are transcluded: plotEffects(fit.tarr, formula=~invasion, legend.bty="b", legend.cex=1) ## and other smart arguments too: plotEffects(fit.tarr, formula=~invasion, legend.bty="b", axis2.las=2, legend.cex=1)
library(survival) library(prodlim) data(Melanoma) fit.tarr <- ARR(Hist(time,status)~strata(sex), data=Melanoma, cause=1) plotEffects(fit.tarr) fit.tarr <- ARR(Hist(time,status)~strata(sex)+strata(invasion), data=Melanoma, cause=1, times=seq(800,3000,20)) plotEffects(fit.tarr,formula=~sex) plotEffects(fit.tarr,formula=~invasion) plotEffects(fit.tarr, formula=~invasion, level="invasionlevel.1") ## legend arguments are transcluded: plotEffects(fit.tarr, formula=~invasion, legend.bty="b", legend.cex=1) ## and other smart arguments too: plotEffects(fit.tarr, formula=~invasion, legend.bty="b", axis2.las=2, legend.cex=1)
Time-dependent event risk predictions.
plotPredictRisk( x, newdata, times, cause = 1, xlim, ylim, xlab, ylab, axes = TRUE, col, density, lty, lwd, add = FALSE, legend = TRUE, percent = FALSE, ... )
plotPredictRisk( x, newdata, times, cause = 1, xlim, ylim, xlab, ylab, axes = TRUE, col, density, lty, lwd, add = FALSE, legend = TRUE, percent = FALSE, ... )
x |
Object specifying an event risk prediction model. |
newdata |
A data frame with the same variable names as those that were
used to fit the model |
times |
Vector of times at which to return the estimated probabilities. |
cause |
Show predicted risk of events of this cause |
xlim |
Plotting range on the x-axis. |
ylim |
Plotting range on the y-axis. |
xlab |
Label given to the x-axis. |
ylab |
Label given to the y-axis. |
axes |
Logical. If |
col |
Vector of colors given to the survival curve. |
density |
Densitiy of the color – useful for showing many (overlapping) curves. |
lty |
Vector of lty's given to the survival curve. |
lwd |
Vector of lwd's given to the survival curve. |
add |
Logical. If |
legend |
Logical. If TRUE a legend is plotted by calling the function
legend. Optional arguments of the function |
percent |
Logical. If |
... |
Parameters that are filtered by |
Arguments for the invoked functions legend
and axis
can be
specified as legend.lty=2
. The specification is not case sensitive,
thus Legend.lty=2
or LEGEND.lty=2
will have the same effect.
The function axis
is called twice, and arguments of the form
axis1.labels
, axis1.at
are used for the time axis whereas
axis2.pos
, axis1.labels
, etc., are used for the y-axis.
These arguments are processed via ...{}
of
plotPredictRisk
and inside by using the function
SmartControl
.
The (invisible) object.
Ulla B. Mogensen and Thomas A. Gerds [email protected]
Ulla B. Mogensen, Hemant Ishwaran, Thomas A. Gerds (2012). Evaluating Random Forests for Survival Analysis Using Prediction Error Curves. Journal of Statistical Software, 50(11), 1-23. URL http://www.jstatsoft.org/v50/i11/.
library(survival) # generate survival data # no effect set.seed(8) d <- sampleData(80,outcome="survival",formula = ~f(X6, 0) + f(X7, 0)) d[,table(event)] f <- coxph(Surv(time,event)~X6+X7,data=d,x=1) plotPredictRisk(f) # large effect set.seed(8) d <- sampleData(80,outcome="survival",formula = ~f(X6, 0.1) + f(X7, -0.1)) d[,table(event)] f <- coxph(Surv(time,event)~X6+X7,data=d,x=1) plotPredictRisk(f) # generate competing risk data # small effect set.seed(8) d <- sampleData(40,formula = ~f(X6, 0.01) + f(X7, -0.01)) d[,table(event)] f <- CSC(Hist(time,event)~X5+X6,data=d) plotPredictRisk(f) # large effect set.seed(8) d <- sampleData(40,formula = ~f(X6, 0.1) + f(X7, -0.1)) d[,table(event)] f <- CSC(Hist(time,event)~X5+X6,data=d) plotPredictRisk(f)
library(survival) # generate survival data # no effect set.seed(8) d <- sampleData(80,outcome="survival",formula = ~f(X6, 0) + f(X7, 0)) d[,table(event)] f <- coxph(Surv(time,event)~X6+X7,data=d,x=1) plotPredictRisk(f) # large effect set.seed(8) d <- sampleData(80,outcome="survival",formula = ~f(X6, 0.1) + f(X7, -0.1)) d[,table(event)] f <- coxph(Surv(time,event)~X6+X7,data=d,x=1) plotPredictRisk(f) # generate competing risk data # small effect set.seed(8) d <- sampleData(40,formula = ~f(X6, 0.01) + f(X7, -0.01)) d[,table(event)] f <- CSC(Hist(time,event)~X5+X6,data=d) plotPredictRisk(f) # large effect set.seed(8) d <- sampleData(40,formula = ~f(X6, 0.1) + f(X7, -0.1)) d[,table(event)] f <- CSC(Hist(time,event)~X5+X6,data=d) plotPredictRisk(f)
plot predicted risks
plotRisk( x, models, times, xlim = c(0, 1), ylim = c(0, 1), xlab, ylab, col, pch, cex = 1, preclipse = 0, preclipse.shade = FALSE, ... )
plotRisk( x, models, times, xlim = c(0, 1), ylim = c(0, 1), xlab, ylab, col, pch, cex = 1, preclipse = 0, preclipse.shade = FALSE, ... )
x |
Object obtained with function |
models |
Choice of two models to plot. The predicted risks of the first (second) are shown along the x-axis (y-axis). |
times |
Time point specifying the prediction horizon. |
xlim |
x-axis limits |
ylim |
y-axis limits |
xlab |
x-axis labels |
ylab |
y-axis labels |
col |
Colors used according to the outcome. binary outcome (two colors: no event, event), survival outcome (three colors: censored, event, no event) competing risk outcome (4 or more colors: event, competing risk 1, ..., competing risk k, censored, no event) |
pch |
Symbols used according to the outcome binary outcome (two symbols: no event, event), survival outcome (three symbols: censored, event, no event) competing risk outcome (4 or more symbols: event, competing risk 1, ..., competing risk k, censored, no event) |
cex |
point size |
preclipse |
Value between 0 and 1 defining the preclipse area |
preclipse.shade |
Logical. If |
... |
Used to control the subroutines: plot, axis, lines,
barplot, legend. See |
Two rival prediction models are applied to the same data.
a nice graph
Thomas A. Gerds <[email protected]>
library(prodlim) ## uncensored set.seed(10) learndat = sampleData(40,outcome="binary") testdat = sampleData(40,outcome="binary") lr1 = glm(Y~X1+X2+X7+X9,data=learndat,family="binomial") lr2 = glm(Y~X3+X5+X6,data=learndat,family="binomial") xb=Score(list("LR(X1+X2+X7+X9)"=lr1,"LR(X3+X5+X6)"=lr2),formula=Y~1, data=testdat,summary="risks",null.model=0L) plotRisk(xb) ## survival library(survival) set.seed(10) learndat = sampleData(40,outcome="survival") testdat = sampleData(40,outcome="survival") cox1 = coxph(Surv(time,event)~X1+X2+X7+X9,data=learndat,x=TRUE) cox2 = coxph(Surv(time,event)~X3+X5+X6,data=learndat,x=TRUE) xs=Score(list("Cox(X1+X2+X7+X9)"=cox1,"Cox(X3+X5+X6)"=cox2),formula=Surv(time,event)~1, data=testdat,summary="risks",null.model=0L,times=c(3,5,6)) plotRisk(xs,times=5) ## competing risk ## Not run: library(prodlim) library(survival) set.seed(8) learndat = sampleData(80,outcome="competing.risk") testdat = sampleData(140,outcome="competing.risk") m1 = FGR(Hist(time,event)~X2+X7+X9,data=learndat,cause=1) m2 = CSC(Hist(time,event)~X2+X7+X9,data=learndat,cause=1) xcr=Score(list("FGR"=m1,"CSC"=m2),formula=Hist(time,event)~1, data=testdat,summary="risks",null.model=0L,times=c(3,5)) plotRisk(xcr,times=3) ## End(Not run)
library(prodlim) ## uncensored set.seed(10) learndat = sampleData(40,outcome="binary") testdat = sampleData(40,outcome="binary") lr1 = glm(Y~X1+X2+X7+X9,data=learndat,family="binomial") lr2 = glm(Y~X3+X5+X6,data=learndat,family="binomial") xb=Score(list("LR(X1+X2+X7+X9)"=lr1,"LR(X3+X5+X6)"=lr2),formula=Y~1, data=testdat,summary="risks",null.model=0L) plotRisk(xb) ## survival library(survival) set.seed(10) learndat = sampleData(40,outcome="survival") testdat = sampleData(40,outcome="survival") cox1 = coxph(Surv(time,event)~X1+X2+X7+X9,data=learndat,x=TRUE) cox2 = coxph(Surv(time,event)~X3+X5+X6,data=learndat,x=TRUE) xs=Score(list("Cox(X1+X2+X7+X9)"=cox1,"Cox(X3+X5+X6)"=cox2),formula=Surv(time,event)~1, data=testdat,summary="risks",null.model=0L,times=c(3,5,6)) plotRisk(xs,times=5) ## competing risk ## Not run: library(prodlim) library(survival) set.seed(8) learndat = sampleData(80,outcome="competing.risk") testdat = sampleData(140,outcome="competing.risk") m1 = FGR(Hist(time,event)~X2+X7+X9,data=learndat,cause=1) m2 = CSC(Hist(time,event)~X2+X7+X9,data=learndat,cause=1) xcr=Score(list("FGR"=m1,"CSC"=m2),formula=Hist(time,event)~1, data=testdat,summary="risks",null.model=0L,times=c(3,5)) plotRisk(xcr,times=3) ## End(Not run)
Plot ROC curve
plotROC( x, models, times, xlab = "1-Specificity", ylab = "Sensitivity", col, lwd, lty = 1, cex = 1, pch = 1, legend = !add, auc.in.legend = TRUE, brier.in.legend = FALSE, add = FALSE, ... )
plotROC( x, models, times, xlab = "1-Specificity", ylab = "Sensitivity", col, lwd, lty = 1, cex = 1, pch = 1, legend = !add, auc.in.legend = TRUE, brier.in.legend = FALSE, add = FALSE, ... )
x |
Object obtained with function |
models |
Choice of models to plot |
times |
Time point(s) specifying the prediction horizon |
xlab |
Label for x-axis |
ylab |
Label for y-axis |
col |
line color |
lwd |
line width |
lty |
line style |
cex |
point size |
pch |
point style |
legend |
logical. If |
auc.in.legend |
Logical. If |
brier.in.legend |
Logical. If |
add |
logical. If |
... |
Used for additional control of the subroutines: plot,
axis, lines, legend, addtable2plot. See |
## binary set.seed(18) if (require("randomForest",quietly=TRUE)){ library(randomForest) library(prodlim) bdl <- sampleData(40,outcome="binary") bdt <- sampleData(58,outcome="binary") bdl[,y:=factor(Y)] bdt[,y:=factor(Y)] fb1 <- glm(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10,data=bdl,family="binomial") fb2 <- randomForest(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10,data=bdl) xb <- Score(list("glm"=fb1,"rf"=fb2),y~1,data=bdt, plots="roc",metrics=c("auc","brier")) plotROC(xb,brier.in.legend=1L) # with cross-validation ## Not run: xb3 <- Score(list("glm"=fb1,"rf"=fb2),y~1,data=bdl, plots="roc",B=3,split.method="bootcv", metrics=c("auc")) ## End(Not run) } ## survival set.seed(18) library(survival) sdl <- sampleData(40,outcome="survival") sdt <- sampleData(58,outcome="survival") fs1 <- coxph(Surv(time,event)~X3+X5+X6+X7+X8+X10,data=sdl,x=TRUE) fs2 <- coxph(Surv(time,event)~X1+X2+X9,data=sdl,x=TRUE) xs <- Score(list(model1=fs1,model2=fs2),Hist(time,event)~1,data=sdt, times=5,plots="roc",metrics="auc") plotROC(xs) ## competing risks data(Melanoma) f1 <- CSC(Hist(time,status)~age+sex+epicel+ulcer,data=Melanoma) f2 <- CSC(Hist(time,status)~age+sex+logthick+epicel+ulcer,data=Melanoma) x <- Score(list(model1=f1,model2=f2),Hist(time,status)~1,data=Melanoma, cause=1,times=5*365.25,plots="roc",metrics="auc") plotROC(x)
## binary set.seed(18) if (require("randomForest",quietly=TRUE)){ library(randomForest) library(prodlim) bdl <- sampleData(40,outcome="binary") bdt <- sampleData(58,outcome="binary") bdl[,y:=factor(Y)] bdt[,y:=factor(Y)] fb1 <- glm(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10,data=bdl,family="binomial") fb2 <- randomForest(y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10,data=bdl) xb <- Score(list("glm"=fb1,"rf"=fb2),y~1,data=bdt, plots="roc",metrics=c("auc","brier")) plotROC(xb,brier.in.legend=1L) # with cross-validation ## Not run: xb3 <- Score(list("glm"=fb1,"rf"=fb2),y~1,data=bdl, plots="roc",B=3,split.method="bootcv", metrics=c("auc")) ## End(Not run) } ## survival set.seed(18) library(survival) sdl <- sampleData(40,outcome="survival") sdt <- sampleData(58,outcome="survival") fs1 <- coxph(Surv(time,event)~X3+X5+X6+X7+X8+X10,data=sdl,x=TRUE) fs2 <- coxph(Surv(time,event)~X1+X2+X9,data=sdl,x=TRUE) xs <- Score(list(model1=fs1,model2=fs2),Hist(time,event)~1,data=sdt, times=5,plots="roc",metrics="auc") plotROC(xs) ## competing risks data(Melanoma) f1 <- CSC(Hist(time,status)~age+sex+epicel+ulcer,data=Melanoma) f2 <- CSC(Hist(time,status)~age+sex+logthick+epicel+ulcer,data=Melanoma) x <- Score(list(model1=f1,model2=f2),Hist(time,status)~1,data=Melanoma, cause=1,times=5*365.25,plots="roc",metrics="auc") plotROC(x)
Apply formula to combine two or more Cox models into absolute risk (cumulative incidence function).
## S3 method for class 'CauseSpecificCox' predict( object, newdata, times, cause, type = "absRisk", landmark = NA, keep.times = 1L, keep.newdata = 1L, keep.strata = 1L, se = FALSE, band = FALSE, iid = FALSE, confint = (se + band) > 0, average.iid = FALSE, product.limit = TRUE, store.iid = "full", diag = FALSE, max.time = NULL, ... )
## S3 method for class 'CauseSpecificCox' predict( object, newdata, times, cause, type = "absRisk", landmark = NA, keep.times = 1L, keep.newdata = 1L, keep.strata = 1L, se = FALSE, band = FALSE, iid = FALSE, confint = (se + band) > 0, average.iid = FALSE, product.limit = TRUE, store.iid = "full", diag = FALSE, max.time = NULL, ... )
object |
The fitted cause specific Cox model |
newdata |
[data.frame or data.table] Contain the values of the predictor variables
defining subject specific predictions relative to each cause.
Should have the same structure as the data set used to fit the |
times |
[numeric vector] Time points at which to return the estimated absolute risk. |
cause |
[integer/character] Identifies the cause of interest among the competing events. |
type |
[character] Can be changed to |
landmark |
[integer] The starting time for the computation of the cumulative risk. |
keep.times |
[logical] If |
keep.newdata |
[logical] If |
keep.strata |
[logical] If |
se |
[logical] If |
band |
[logical] If |
iid |
[logical] If |
confint |
[logical] If |
average.iid |
[logical]. If |
product.limit |
[logical]. If |
store.iid |
[character] Implementation used to estimate the influence function and the standard error.
Can be |
diag |
[logical] when |
max.time |
[numeric] maximum time of the response of the fitted data. Only relevant if
model |
... |
not used. |
This function computes the absolute risk as given by formula 2 of (Ozenne et al., 2017). Confidence intervals and confidence bands can be computed using a first order von Mises expansion. See the section "Construction of the confidence intervals" in (Ozenne et al., 2017).
A detailed explanation about the meaning of the argument store.iid
can be found
in (Ozenne et al., 2017) Appendix B "Saving the influence functions".
Note: for Cox regression models with time varying covariates it does not make sense to use this function, because the predicted risk has to be a measurable function of the data available at the time origin.
The iid decomposition is output using an array containing the value of the influence of each subject used to fit the object (dim 1), for each subject in newdata (dim 3), and each time (dim 2).
Brice Ozenne [email protected], Thomas A. Gerds [email protected]
Brice Ozenne, Anne Lyngholm Sorensen, Thomas Scheike, Christian Torp-Pedersen and Thomas Alexander Gerds. riskRegression: Predicting the Risk of an Event using Cox Regression Models. The R Journal (2017) 9:2, pages 440-460.
confint.predictCSC
to compute confidence intervals/bands.
autoplot.predictCSC
to display the predictions.
library(survival) library(prodlim) #### generate data #### set.seed(5) d <- sampleData(80,outcome="comp") ## training dataset nd <- sampleData(4,outcome="comp") ## validation dataset d$time <- round(d$time,1) ## create tied events ttt <- sort(sample(x = unique(d$time), size = 10)) ## estimate a CSC model based on the coxph function CSC.fit <- CSC(Hist(time,event)~ X3+X8, data=d, method = "breslow") ## compute the absolute risk of cause 1, in the validation dataset ## at time 1:10 CSC.risk <- predict(CSC.fit, newdata=nd, times=1:10, cause=1) CSC.risk ## compute absolute risks with CI for cause 2 ## (without displaying the value of the covariates) predict(CSC.fit,newdata=nd,times=1:10,cause=2,se=TRUE, keep.newdata = FALSE) ## other example library(survival) CSC.fit.s <- CSC(list(Hist(time,event)~ strata(X1)+X2+X9, Hist(time,event)~ X2+strata(X4)+X8+X7),data=d, method = "breslow") predict(CSC.fit.s,cause=1,times=ttt,se=1L) ## note: absRisk>1 due to small number of observations ## using the cph function instead of coxph CSC.cph <- CSC(Hist(time,event)~ X1+X2,data=d, method = "breslow", fitter = "cph")#' predict(CSC.cph, newdata = d, cause = 2, times = ttt) ## landmark analysis T0 <- 1 predCSC.afterT0 <- predict(CSC.fit, newdata = d, cause = 2, times = ttt[ttt>T0], landmark = T0) predCSC.afterT0
library(survival) library(prodlim) #### generate data #### set.seed(5) d <- sampleData(80,outcome="comp") ## training dataset nd <- sampleData(4,outcome="comp") ## validation dataset d$time <- round(d$time,1) ## create tied events ttt <- sort(sample(x = unique(d$time), size = 10)) ## estimate a CSC model based on the coxph function CSC.fit <- CSC(Hist(time,event)~ X3+X8, data=d, method = "breslow") ## compute the absolute risk of cause 1, in the validation dataset ## at time 1:10 CSC.risk <- predict(CSC.fit, newdata=nd, times=1:10, cause=1) CSC.risk ## compute absolute risks with CI for cause 2 ## (without displaying the value of the covariates) predict(CSC.fit,newdata=nd,times=1:10,cause=2,se=TRUE, keep.newdata = FALSE) ## other example library(survival) CSC.fit.s <- CSC(list(Hist(time,event)~ strata(X1)+X2+X9, Hist(time,event)~ X2+strata(X4)+X8+X7),data=d, method = "breslow") predict(CSC.fit.s,cause=1,times=ttt,se=1L) ## note: absRisk>1 due to small number of observations ## using the cph function instead of coxph CSC.cph <- CSC(Hist(time,event)~ X1+X2,data=d, method = "breslow", fitter = "cph")#' predict(CSC.cph, newdata = d, cause = 2, times = ttt) ## landmark analysis T0 <- 1 predCSC.afterT0 <- predict(CSC.fit, newdata = d, cause = 2, times = ttt[ttt>T0], landmark = T0) predCSC.afterT0
Predict subject specific risks (cumulative incidence) based on Fine-Gray regression model
## S3 method for class 'FGR' predict(object, newdata, times, ...)
## S3 method for class 'FGR' predict(object, newdata, times, ...)
object |
Result of call to |
newdata |
Predictor values of subjects for who to predict risks |
times |
Time points at which to evaluate the risks |
... |
passed to predict.crr |
library(prodlim) library(survival) set.seed(10) d <- sampleData(101, outcome = "competing.risk") tFun<-function(t) {t} fgr<-FGR(Hist(time, event)~X1+strata(X2)+X6+cov2(X7, tf=tFun), data=d, cause=1) predictRisk(fgr,times=5,newdata=d[1:10])
library(prodlim) library(survival) set.seed(10) d <- sampleData(101, outcome = "competing.risk") tFun<-function(t) {t} fgr<-FGR(Hist(time, event)~X1+strata(X2)+X6+cov2(X7, tf=tFun), data=d, cause=1) predictRisk(fgr,times=5,newdata=d[1:10])
Extract predictions from a risk prediction model.
## S3 method for class 'riskRegression' predict(object, newdata, ...)
## S3 method for class 'riskRegression' predict(object, newdata, ...)
object |
Fitted object obtained with one of |
newdata |
A data frame containing predictor variable combinations for which to compute predicted risk. |
... |
not used |
Thomas H. Scheike [email protected]
Thomas A. Gerds [email protected]
Gerds, TA and Scheike, T and Andersen, PK (2011) Absolute risk regression for competing risks: interpretation, link functions and prediction Research report 11/8. Department of Biostatistics, University of Copenhagen
data(Melanoma) library(prodlim) library(survival) fit.tarr <- ARR(Hist(time,status)~age+invasion+strata(sex),data=Melanoma,cause=1) predict(fit.tarr,newdata=data.frame(age=48, invasion=factor("level.1", levels=levels(Melanoma$invasion)), sex=factor("Female",levels=levels(Melanoma$sex)))) predict(fit.tarr,newdata=data.frame(age=48, invasion=factor("level.1", levels=levels(Melanoma$invasion)), sex=factor("Male",levels=levels(Melanoma$sex)))) predict(fit.tarr,newdata=data.frame(age=c(48,58,68), invasion=factor("level.1", levels=levels(Melanoma$invasion)), sex=factor("Male",levels=levels(Melanoma$sex)))) predict(fit.tarr,newdata=Melanoma[1:4,])
data(Melanoma) library(prodlim) library(survival) fit.tarr <- ARR(Hist(time,status)~age+invasion+strata(sex),data=Melanoma,cause=1) predict(fit.tarr,newdata=data.frame(age=48, invasion=factor("level.1", levels=levels(Melanoma$invasion)), sex=factor("Female",levels=levels(Melanoma$sex)))) predict(fit.tarr,newdata=data.frame(age=48, invasion=factor("level.1", levels=levels(Melanoma$invasion)), sex=factor("Male",levels=levels(Melanoma$sex)))) predict(fit.tarr,newdata=data.frame(age=c(48,58,68), invasion=factor("level.1", levels=levels(Melanoma$invasion)), sex=factor("Male",levels=levels(Melanoma$sex)))) predict(fit.tarr,newdata=Melanoma[1:4,])
Fast routine to get baseline hazards and subject specific hazards
as well as survival probabilities from a survival::coxph
or rms::cph
object
predictCox( object, times, newdata = NULL, centered = TRUE, type = c("cumhazard", "survival"), keep.strata = TRUE, keep.times = TRUE, keep.newdata = FALSE, keep.infoVar = FALSE, se = FALSE, band = FALSE, iid = FALSE, confint = (se + band) > 0, diag = FALSE, average.iid = FALSE, store.iid = "full" )
predictCox( object, times, newdata = NULL, centered = TRUE, type = c("cumhazard", "survival"), keep.strata = TRUE, keep.times = TRUE, keep.newdata = FALSE, keep.infoVar = FALSE, se = FALSE, band = FALSE, iid = FALSE, confint = (se + band) > 0, diag = FALSE, average.iid = FALSE, store.iid = "full" )
object |
The fitted Cox regression model object either
obtained with |
times |
[numeric vector] Time points at which to return the estimated hazard/cumulative hazard/survival. |
newdata |
[data.frame or data.table] Contain the values of the predictor variables
defining subject specific predictions.
Should have the same structure as the data set used to fit the |
centered |
[logical] If |
type |
[character vector] the type of predicted value. Choices are
Several choices can be
combined in a vector of strings that match (no matter the case)
strings |
keep.strata |
[logical] If |
keep.times |
[logical] If |
keep.newdata |
[logical] If |
keep.infoVar |
[logical] For internal use. |
se |
[logical] If |
band |
[logical] If |
iid |
[logical] If |
confint |
[logical] If |
diag |
[logical] when |
average.iid |
[logical] If |
store.iid |
[character] Implementation used to estimate the influence function and the standard error.
Can be |
When the argument newdata
is not specified, the function computes the baseline hazard estimate.
See (Ozenne et al., 2017) section "Handling of tied event times".
Otherwise the function computes survival probabilities with confidence intervals/bands. See (Ozenne et al., 2017) section "Confidence intervals and confidence bands for survival probabilities". The survival is computed using the exponential approximation (equation 3).
A detailed explanation about the meaning of the argument store.iid
can be found
in (Ozenne et al., 2017) Appendix B "Saving the influence functions".
The function is not compatible with time varying predictor variables.
The centered argument enables us to reproduce the results obtained with the basehaz
function from the survival package but should not be modified by the user.
The iid decomposition is output using an array containing the value of the influence of each subject used to fit the object (dim 1), for each subject in newdata (dim 3), and each time (dim 2).
Brice Ozenne [email protected], Thomas A. Gerds [email protected]
Brice Ozenne, Anne Lyngholm Sorensen, Thomas Scheike, Christian Torp-Pedersen and Thomas Alexander Gerds. riskRegression: Predicting the Risk of an Event using Cox Regression Models. The R Journal (2017) 9:2, pages 440-460.
confint.predictCox
to compute confidence intervals/bands.
autoplot.predictCox
to display the predictions.
library(survival) library(data.table) #### generate data #### set.seed(10) d <- sampleData(40,outcome="survival") ## training dataset nd <- sampleData(4,outcome="survival") ## validation dataset d$time <- round(d$time,1) ## create tied events # table(duplicated(d$time)) #### stratified Cox model #### fit <- coxph(Surv(time,event)~X1 + strata(X2) + X6, data=d, ties="breslow", x = TRUE, y = TRUE) ## compute the baseline cumulative hazard fit.haz <- predictCox(fit) cbind(survival::basehaz(fit), fit.haz$cumhazard) ## compute individual specific cumulative hazard and survival probabilities fit.pred <- predictCox(fit, newdata=nd, times=c(3,8), se = TRUE, band = TRUE) fit.pred #### other examples #### # one strata variable fitS <- coxph(Surv(time,event)~strata(X1)+X2, data=d, ties="breslow", x = TRUE, y = TRUE) predictCox(fitS) predictCox(fitS, newdata=nd, times = 1) # two strata variables set.seed(1) d$U=sample(letters[1:5],replace=TRUE,size=NROW(d)) d$V=sample(letters[4:10],replace=TRUE,size=NROW(d)) nd$U=sample(letters[1:5],replace=TRUE,size=NROW(nd)) nd$V=sample(letters[4:10],replace=TRUE,size=NROW(nd)) fit2S <- coxph(Surv(time,event)~X1+strata(U)+strata(V)+X2, data=d, ties="breslow", x = TRUE, y = TRUE) cbind(survival::basehaz(fit2S),predictCox(fit2S,type="cumhazard")$cumhazard) predictCox(fit2S) predictCox(fitS, newdata=nd, times = 3) # left truncation test2 <- list(start=c(1,2,5,2,1,7,3,4,8,8), stop=c(2,3,6,7,8,9,9,9,14,17), event=c(1,1,1,1,1,1,1,0,0,0), x=c(1,0,0,1,0,1,1,1,0,0)) m.cph <- coxph(Surv(start, stop, event) ~ 1, test2, x = TRUE) as.data.table(predictCox(m.cph)) basehaz(m.cph)
library(survival) library(data.table) #### generate data #### set.seed(10) d <- sampleData(40,outcome="survival") ## training dataset nd <- sampleData(4,outcome="survival") ## validation dataset d$time <- round(d$time,1) ## create tied events # table(duplicated(d$time)) #### stratified Cox model #### fit <- coxph(Surv(time,event)~X1 + strata(X2) + X6, data=d, ties="breslow", x = TRUE, y = TRUE) ## compute the baseline cumulative hazard fit.haz <- predictCox(fit) cbind(survival::basehaz(fit), fit.haz$cumhazard) ## compute individual specific cumulative hazard and survival probabilities fit.pred <- predictCox(fit, newdata=nd, times=c(3,8), se = TRUE, band = TRUE) fit.pred #### other examples #### # one strata variable fitS <- coxph(Surv(time,event)~strata(X1)+X2, data=d, ties="breslow", x = TRUE, y = TRUE) predictCox(fitS) predictCox(fitS, newdata=nd, times = 1) # two strata variables set.seed(1) d$U=sample(letters[1:5],replace=TRUE,size=NROW(d)) d$V=sample(letters[4:10],replace=TRUE,size=NROW(d)) nd$U=sample(letters[1:5],replace=TRUE,size=NROW(nd)) nd$V=sample(letters[4:10],replace=TRUE,size=NROW(nd)) fit2S <- coxph(Surv(time,event)~X1+strata(U)+strata(V)+X2, data=d, ties="breslow", x = TRUE, y = TRUE) cbind(survival::basehaz(fit2S),predictCox(fit2S,type="cumhazard")$cumhazard) predictCox(fit2S) predictCox(fitS, newdata=nd, times = 3) # left truncation test2 <- list(start=c(1,2,5,2,1,7,3,4,8,8), stop=c(2,3,6,7,8,9,9,9,14,17), event=c(1,1,1,1,1,1,1,0,0,0), x=c(1,0,0,1,0,1,1,1,0,0)) m.cph <- coxph(Surv(start, stop, event) ~ 1, test2, x = TRUE) as.data.table(predictCox(m.cph)) basehaz(m.cph)
Same as predictCox except that the survival is estimated using the product limit estimator.
predictCoxPL( object, times, newdata = NULL, type = c("cumhazard", "survival"), keep.strata = TRUE, keep.infoVar = FALSE, ... )
predictCoxPL( object, times, newdata = NULL, type = c("cumhazard", "survival"), keep.strata = TRUE, keep.infoVar = FALSE, ... )
object |
The fitted Cox regression model object either
obtained with |
times |
[numeric vector] Time points at which to return the estimated hazard/cumulative hazard/survival. |
newdata |
[data.frame or data.table] Contain the values of the predictor variables
defining subject specific predictions.
Should have the same structure as the data set used to fit the |
type |
[character vector] the type of predicted value. Choices are
Several choices can be
combined in a vector of strings that match (no matter the case)
strings |
keep.strata |
[logical] If |
keep.infoVar |
[logical] For internal use. |
... |
additional arguments to be passed to |
Note: the iid and standard errors are computed using the exponential approximation.
library(survival) #### generate data #### set.seed(10) d <- sampleData(40,outcome="survival") nd <- sampleData(4,outcome="survival") d$time <- round(d$time,1) #### Cox model #### fit <- coxph(Surv(time,event)~ X1 + X2 + X6, data=d, ties="breslow", x = TRUE, y = TRUE) ## exponential approximation predictCox(fit, newdata = d, times = 1:5) ## product limit predictCoxPL(fit, newdata = d, times = 1:5) #### stratified Cox model #### fitS <- coxph(Surv(time,event)~ X1 + strata(X2) + X6, data=d, ties="breslow", x = TRUE, y = TRUE) ## exponential approximation predictCox(fitS, newdata = d, times = 1:5) ## product limit predictCoxPL(fitS, newdata = d, times = 1:5) #### fully stratified Cox model #### fitS <- coxph(Surv(time,event)~ 1, data=d, ties="breslow", x = TRUE, y = TRUE) ## product limit GS <- survfit(Surv(time,event)~1, data = d) range(predictCoxPL(fitS)$survival - GS$surv) fitS <- coxph(Surv(time,event)~ strata(X2), data=d, ties="breslow", x = TRUE, y = TRUE) ## product limit GS <- survfit(Surv(time,event)~X2, data = d) range(predictCoxPL(fitS)$survival - GS$surv)
library(survival) #### generate data #### set.seed(10) d <- sampleData(40,outcome="survival") nd <- sampleData(4,outcome="survival") d$time <- round(d$time,1) #### Cox model #### fit <- coxph(Surv(time,event)~ X1 + X2 + X6, data=d, ties="breslow", x = TRUE, y = TRUE) ## exponential approximation predictCox(fit, newdata = d, times = 1:5) ## product limit predictCoxPL(fit, newdata = d, times = 1:5) #### stratified Cox model #### fitS <- coxph(Surv(time,event)~ X1 + strata(X2) + X6, data=d, ties="breslow", x = TRUE, y = TRUE) ## exponential approximation predictCox(fitS, newdata = d, times = 1:5) ## product limit predictCoxPL(fitS, newdata = d, times = 1:5) #### fully stratified Cox model #### fitS <- coxph(Surv(time,event)~ 1, data=d, ties="breslow", x = TRUE, y = TRUE) ## product limit GS <- survfit(Surv(time,event)~1, data = d) range(predictCoxPL(fitS)$survival - GS$surv) fitS <- coxph(Surv(time,event)~ strata(X2), data=d, ties="breslow", x = TRUE, y = TRUE) ## product limit GS <- survfit(Surv(time,event)~X2, data = d) range(predictCoxPL(fitS)$survival - GS$surv)
Extract event probabilities from fitted regression models and machine learning objects.
The function predictRisk is a generic function, meaning that it invokes
specifically designed functions depending on the 'class' of the first
argument. See predictRisk
.
predictRisk(object, newdata, ...) ## Default S3 method: predictRisk(object, newdata, times, cause, ...) ## S3 method for class 'double' predictRisk(object, newdata, times, cause, ...) ## S3 method for class 'integer' predictRisk(object, newdata, times, cause, ...) ## S3 method for class 'factor' predictRisk(object, newdata, times, cause, ...) ## S3 method for class 'numeric' predictRisk(object, newdata, times, cause, ...) ## S3 method for class 'glm' predictRisk(object, newdata, iid = FALSE, average.iid = FALSE, ...) ## S3 method for class 'multinom' predictRisk( object, newdata, iid = FALSE, average.iid = FALSE, cause = NULL, ... ) ## S3 method for class 'formula' predictRisk(object, newdata, ...) ## S3 method for class 'BinaryTree' predictRisk(object, newdata, ...) ## S3 method for class 'lrm' predictRisk(object, newdata, ...) ## S3 method for class 'rpart' predictRisk(object, newdata, ...) ## S3 method for class 'randomForest' predictRisk(object, newdata, ...) ## S3 method for class 'matrix' predictRisk(object, newdata, times, cause, ...) ## S3 method for class 'aalen' predictRisk(object, newdata, times, ...) ## S3 method for class 'cox.aalen' predictRisk(object, newdata, times, ...) ## S3 method for class 'comprisk' predictRisk(object, newdata, times, ...) ## S3 method for class 'coxph' predictRisk( object, newdata, times, product.limit = FALSE, diag = FALSE, iid = FALSE, average.iid = FALSE, ... ) ## S3 method for class 'coxphTD' predictRisk(object, newdata, times, landmark, ...) ## S3 method for class 'CSCTD' predictRisk(object, newdata, times, cause, landmark, ...) ## S3 method for class 'coxph.penal' predictRisk(object, newdata, times, ...) ## S3 method for class 'cph' predictRisk( object, newdata, times, product.limit = FALSE, diag = FALSE, iid = FALSE, average.iid = FALSE, ... ) ## S3 method for class 'selectCox' predictRisk(object, newdata, times, ...) ## S3 method for class 'prodlim' predictRisk(object, newdata, times, cause, ...) ## S3 method for class 'survfit' predictRisk(object, newdata, times, ...) ## S3 method for class 'psm' predictRisk(object, newdata, times, ...) ## S3 method for class 'ranger' predictRisk(object, newdata, times, cause, ...) ## S3 method for class 'rfsrc' predictRisk(object, newdata, times, cause, ...) ## S3 method for class 'FGR' predictRisk(object, newdata, times, cause, ...) ## S3 method for class 'riskRegression' predictRisk(object, newdata, times, cause, ...) ## S3 method for class 'ARR' predictRisk(object, newdata, times, cause, ...) ## S3 method for class 'CauseSpecificCox' predictRisk( object, newdata, times, cause, product.limit = TRUE, diag = FALSE, iid = FALSE, average.iid = FALSE, truncate = FALSE, ... ) ## S3 method for class 'penfitS3' predictRisk(object, newdata, times, ...) ## S3 method for class 'SuperPredictor' predictRisk(object, newdata, ...) ## S3 method for class 'gbm' predictRisk(object, newdata, times, ...) ## S3 method for class 'flexsurvreg' predictRisk(object, newdata, times, ...) ## S3 method for class 'Hal9001' predictRisk(object, newdata, times, cause, ...) ## S3 method for class 'GLMnet' predictRisk(object, newdata, times = NA, ...) ## S3 method for class 'singleEventCB' predictRisk(object, newdata, times, cause, ...) ## S3 method for class 'CoxConfidential' predictRisk(object, newdata, ...) ## S3 method for class 'wglm' predictRisk( object, newdata, times = NULL, product.limit = FALSE, diag = FALSE, iid = FALSE, average.iid = FALSE, ... )
predictRisk(object, newdata, ...) ## Default S3 method: predictRisk(object, newdata, times, cause, ...) ## S3 method for class 'double' predictRisk(object, newdata, times, cause, ...) ## S3 method for class 'integer' predictRisk(object, newdata, times, cause, ...) ## S3 method for class 'factor' predictRisk(object, newdata, times, cause, ...) ## S3 method for class 'numeric' predictRisk(object, newdata, times, cause, ...) ## S3 method for class 'glm' predictRisk(object, newdata, iid = FALSE, average.iid = FALSE, ...) ## S3 method for class 'multinom' predictRisk( object, newdata, iid = FALSE, average.iid = FALSE, cause = NULL, ... ) ## S3 method for class 'formula' predictRisk(object, newdata, ...) ## S3 method for class 'BinaryTree' predictRisk(object, newdata, ...) ## S3 method for class 'lrm' predictRisk(object, newdata, ...) ## S3 method for class 'rpart' predictRisk(object, newdata, ...) ## S3 method for class 'randomForest' predictRisk(object, newdata, ...) ## S3 method for class 'matrix' predictRisk(object, newdata, times, cause, ...) ## S3 method for class 'aalen' predictRisk(object, newdata, times, ...) ## S3 method for class 'cox.aalen' predictRisk(object, newdata, times, ...) ## S3 method for class 'comprisk' predictRisk(object, newdata, times, ...) ## S3 method for class 'coxph' predictRisk( object, newdata, times, product.limit = FALSE, diag = FALSE, iid = FALSE, average.iid = FALSE, ... ) ## S3 method for class 'coxphTD' predictRisk(object, newdata, times, landmark, ...) ## S3 method for class 'CSCTD' predictRisk(object, newdata, times, cause, landmark, ...) ## S3 method for class 'coxph.penal' predictRisk(object, newdata, times, ...) ## S3 method for class 'cph' predictRisk( object, newdata, times, product.limit = FALSE, diag = FALSE, iid = FALSE, average.iid = FALSE, ... ) ## S3 method for class 'selectCox' predictRisk(object, newdata, times, ...) ## S3 method for class 'prodlim' predictRisk(object, newdata, times, cause, ...) ## S3 method for class 'survfit' predictRisk(object, newdata, times, ...) ## S3 method for class 'psm' predictRisk(object, newdata, times, ...) ## S3 method for class 'ranger' predictRisk(object, newdata, times, cause, ...) ## S3 method for class 'rfsrc' predictRisk(object, newdata, times, cause, ...) ## S3 method for class 'FGR' predictRisk(object, newdata, times, cause, ...) ## S3 method for class 'riskRegression' predictRisk(object, newdata, times, cause, ...) ## S3 method for class 'ARR' predictRisk(object, newdata, times, cause, ...) ## S3 method for class 'CauseSpecificCox' predictRisk( object, newdata, times, cause, product.limit = TRUE, diag = FALSE, iid = FALSE, average.iid = FALSE, truncate = FALSE, ... ) ## S3 method for class 'penfitS3' predictRisk(object, newdata, times, ...) ## S3 method for class 'SuperPredictor' predictRisk(object, newdata, ...) ## S3 method for class 'gbm' predictRisk(object, newdata, times, ...) ## S3 method for class 'flexsurvreg' predictRisk(object, newdata, times, ...) ## S3 method for class 'Hal9001' predictRisk(object, newdata, times, cause, ...) ## S3 method for class 'GLMnet' predictRisk(object, newdata, times = NA, ...) ## S3 method for class 'singleEventCB' predictRisk(object, newdata, times, cause, ...) ## S3 method for class 'CoxConfidential' predictRisk(object, newdata, ...) ## S3 method for class 'wglm' predictRisk( object, newdata, times = NULL, product.limit = FALSE, diag = FALSE, iid = FALSE, average.iid = FALSE, ... )
object |
A fitted model from which to extract predicted event probabilities. |
newdata |
A data frame containing predictor variable combinations for which to compute predicted event probabilities. |
... |
Additional arguments that are passed on to the current method. |
times |
A vector of times in the range of the response variable, for which the cumulative incidences event probabilities are computed. |
cause |
Identifies the cause of interest among the competing events. |
iid |
Should the iid decomposition be output using an attribute? |
average.iid |
Should the average iid decomposition be output using an attribute? |
product.limit |
If |
diag |
when |
landmark |
The starting time for the computation of the cumulative risk. |
truncate |
If |
In uncensored binary outcome data there is no need to choose a time point.
When operating on models for survival analysis (without competing risks) the function still predicts the risk, as 1 - S(t|X) where S(t|X) is survival chance of a subject characterized by X.
When there are competing risks (and the data are right censored) one needs to specify both the time horizon for prediction (can be a vector) and the cause of the event. The function then extracts the absolute risks F_c(t|X) aka the cumulative incidence of an event of type/cause c until time t for a subject characterized by X. Depending on the model it may or not be possible to predict the risk of all causes in a competing risks setting. For example. a cause-specific Cox (CSC) object allows to predict both cases whereas a Fine-Gray regression model (FGR) is specific to one of the causes.
For binary outcome a vector with predicted risks. For survival outcome with and without
competing risks
a matrix with as many rows as NROW(newdata)
and as many
columns as length(times)
. Each entry is a probability and in
rows the values should be increasing.
Thomas A. Gerds [email protected]
## binary outcome library(rms) set.seed(7) d <- sampleData(80,outcome="binary") nd <- sampleData(80,outcome="binary") fit <- lrm(Y~X1+X8,data=d) predictRisk(fit,newdata=nd) ## survival outcome # generate survival data library(prodlim) set.seed(100) d <- sampleData(100,outcome="survival") d[,X1:=as.numeric(as.character(X1))] d[,X2:=as.numeric(as.character(X2))] # then fit a Cox model library(rms) cphmodel <- cph(Surv(time,event)~X1+X2,data=d,surv=TRUE,x=TRUE,y=TRUE) # or via survival library(survival) coxphmodel <- coxph(Surv(time,event)~X1+X2,data=d,x=TRUE,y=TRUE) # Extract predicted survival probabilities # at selected time-points: ttt <- quantile(d$time) # for selected predictor values: ndat <- data.frame(X1=c(0.25,0.25,-0.05,0.05),X2=c(0,1,0,1)) # as follows predictRisk(cphmodel,newdata=ndat,times=ttt) predictRisk(coxphmodel,newdata=ndat,times=ttt) ## simulate learning and validation data set.seed(10) learndat <- sampleData(80,outcome="survival") valdat <- sampleData(10,outcome="survival") ## use the learning data to fit a Cox model library(survival) fitCox <- coxph(Surv(time,event)~X6+X2,data=learndat,x=TRUE,y=TRUE) ## suppose we want to predict the survival probabilities for all subjects ## in the validation data at the following time points: ## 0, 1, 2, 3, 4 psurv <- predictRisk(fitCox,newdata=valdat,times=seq(0,4,1)) ## This is a matrix with event probabilities (1-survival) ## one column for each of the 5 time points ## one row for each validation set individual ## competing risks library(survival) library(riskRegression) library(prodlim) set.seed(8) train <- sampleData(80) test <- sampleData(10) cox.fit <- CSC(Hist(time,event)~X1+X6,data=train,cause=1) predictRisk(cox.fit,newdata=test,times=seq(1:10),cause=1) ## with strata cox.fit2 <- CSC(list(Hist(time,event)~strata(X1)+X6, Hist(time,cause)~X1+X6),data=train) predictRisk(cox.fit2,newdata=test,times=seq(1:10),cause=1)
## binary outcome library(rms) set.seed(7) d <- sampleData(80,outcome="binary") nd <- sampleData(80,outcome="binary") fit <- lrm(Y~X1+X8,data=d) predictRisk(fit,newdata=nd) ## survival outcome # generate survival data library(prodlim) set.seed(100) d <- sampleData(100,outcome="survival") d[,X1:=as.numeric(as.character(X1))] d[,X2:=as.numeric(as.character(X2))] # then fit a Cox model library(rms) cphmodel <- cph(Surv(time,event)~X1+X2,data=d,surv=TRUE,x=TRUE,y=TRUE) # or via survival library(survival) coxphmodel <- coxph(Surv(time,event)~X1+X2,data=d,x=TRUE,y=TRUE) # Extract predicted survival probabilities # at selected time-points: ttt <- quantile(d$time) # for selected predictor values: ndat <- data.frame(X1=c(0.25,0.25,-0.05,0.05),X2=c(0,1,0,1)) # as follows predictRisk(cphmodel,newdata=ndat,times=ttt) predictRisk(coxphmodel,newdata=ndat,times=ttt) ## simulate learning and validation data set.seed(10) learndat <- sampleData(80,outcome="survival") valdat <- sampleData(10,outcome="survival") ## use the learning data to fit a Cox model library(survival) fitCox <- coxph(Surv(time,event)~X6+X2,data=learndat,x=TRUE,y=TRUE) ## suppose we want to predict the survival probabilities for all subjects ## in the validation data at the following time points: ## 0, 1, 2, 3, 4 psurv <- predictRisk(fitCox,newdata=valdat,times=seq(0,4,1)) ## This is a matrix with event probabilities (1-survival) ## one column for each of the 5 time points ## one row for each validation set individual ## competing risks library(survival) library(riskRegression) library(prodlim) set.seed(8) train <- sampleData(80) test <- sampleData(10) cox.fit <- CSC(Hist(time,event)~X1+X6,data=train,cause=1) predictRisk(cox.fit,newdata=test,times=seq(1:10),cause=1) ## with strata cox.fit2 <- CSC(list(Hist(time,event)~strata(X1)+X6, Hist(time,cause)~X1+X6),data=train) predictRisk(cox.fit2,newdata=test,times=seq(1:10),cause=1)
Print average treatment effects.
## S3 method for class 'ate' print(x, estimator = x$estimator, ...)
## S3 method for class 'ate' print(x, estimator = x$estimator, ...)
x |
object obtained with function |
estimator |
[character] The type of estimator relative to which the risks should be output. |
... |
for internal use |
summary.ate
to obtained a more detailed output
confint.ate
to compute confidence intervals/bands.
ate
to compute the average treatment effects.
Print of a Cause-Specific Cox regression model
## S3 method for class 'CauseSpecificCox' print(x, ...)
## S3 method for class 'CauseSpecificCox' print(x, ...)
x |
Object obtained with CSC |
... |
Passed to print |
Print of a Fine-Gray regression model
## S3 method for class 'FGR' print(x, ...)
## S3 method for class 'FGR' print(x, ...)
x |
Object fitted with function FGR |
... |
passed to cmprsk::summary.crr |
Output of the difference between two estimates.
## S3 method for class 'influenceTest' print(x, digits = 3, ...)
## S3 method for class 'influenceTest' print(x, digits = 3, ...)
x |
object obtained with the function |
digits |
[integer, >0] indicating the number of decimal places. |
... |
Passed to print. |
to display confidence intervals/bands,
the confint
method needs to be applied on the object.
confint.influenceTest
to compute confidence intervals/bands.
influenceTest
to perform the comparison.
Print method for IPA
## S3 method for class 'IPA' print(x, digits = 2, ...)
## S3 method for class 'IPA' print(x, digits = 2, ...)
x |
Object obtained with |
digits |
Number of digits |
... |
passed to print |
Thomas A. Gerds <[email protected]>
Print predictions from a Cox model.
## S3 method for class 'predictCox' print(x, digits = 3, ...)
## S3 method for class 'predictCox' print(x, digits = 3, ...)
x |
object obtained with the function |
digits |
[integer, >0] indicating the number of decimal places. |
... |
Passed to print. |
to display confidence intervals/bands,
the confint
method needs to be applied on the object.
confint.predictCox
to compute confidence intervals/bands.
predictCox
to compute the predicted cumulative hazard/survival.
Print predictions from a Cause-specific Cox proportional hazard regression.
## S3 method for class 'predictCSC' print(x, digits = 3, ...)
## S3 method for class 'predictCSC' print(x, digits = 3, ...)
x |
object obtained with the function |
digits |
[integer, >0] indicating the number of decimal places. |
... |
Passed to print. |
to display confidence intervals/bands,
the confint
method needs to be applied on the object.
confint.predictCSC
to compute confidence intervals/bands.
predict.CauseSpecificCox
to compute the predicted risks.
Print function for riskRegression models
## S3 method for class 'riskRegression' print(x, times, digits = 3, eps = 10^-4, verbose = TRUE, conf.int = 0.95, ...)
## S3 method for class 'riskRegression' print(x, times, digits = 3, eps = 10^-4, verbose = TRUE, conf.int = 0.95, ...)
x |
Object obtained with ARR, LRR or riskRegression |
times |
Time points at which to show time-dependent coefficients |
digits |
Number of digits for all numbers but p-values |
eps |
p-values smaller than this number are shown as such |
verbose |
Level of verbosity |
conf.int |
level of confidence. default is 0.95 |
... |
not used |
Print method for risk prediction scores
## S3 method for class 'Score' print(x, digits, ...)
## S3 method for class 'Score' print(x, digits, ...)
x |
Object obtained with |
digits |
Number of digits |
... |
passed to print |
Print subject weights
## S3 method for class 'subjectWeights' print(x, digits = 3, ...)
## S3 method for class 'subjectWeights' print(x, digits = 3, ...)
x |
Subject weights |
digits |
Digits |
... |
not used |
Reconstruct the original dataset from the elements stored in the coxph object
reconstructData(object)
reconstructData(object)
object |
a coxph object. |
Brice Ozenne [email protected] and Thomas A. Gerds [email protected]
Level plots for predicted risks
riskLevelPlot( object, formula, data = parent.frame(), horizon = NULL, cause = 1, ... )
riskLevelPlot( object, formula, data = parent.frame(), horizon = NULL, cause = 1, ... )
object |
risk prediction model object |
formula |
formula |
data |
data |
horizon |
time point |
cause |
cause of interst |
... |
passed to lattice::levelplot |
Level plots for predicted risks
Thomas A. Gerds <[email protected]>
# ---------- logistic regression -------------------- expit <- function(x){exp(x)/(1+exp(x))} partyData <- function(N){ Age <- runif(N,.5,15) Parasites <- rnorm(N,mean=3.5-0.03*Age) Fever <- factor(rbinom(N,1,expit(-3.5-.3*Age+.55*Parasites+0.15*Age*Parasites))) data.frame(Fever,Age,Parasites) } d <- partyData(100) f <- glm(Fever~Age+Parasites,data=d,family="binomial") riskLevelPlot(f,Fever~Age+Parasites,d) ## Not run: if (require("randomForest",quietly=TRUE)){ rf <- randomForest::randomForest(Fever~Age+Parasites,data=d) riskLevelPlot(f,Fever~Age+Parasites,d) riskLevelPlot(rf,Fever~Age+Parasites,d) } ## End(Not run) # ---------- survival analysis -------------------- # --simulate an artificial data frame # with survival response and three predictors library(survival) library(prodlim) set.seed(140515) sdat <- sampleData(43,outcome="survival") # -- fit a Cox regression model survForm = Surv(time,event) ~ X8 + X9 cox <- coxph(survForm, data = sdat,x=TRUE) # --choose a time horizon for the predictions and plot the risks timeHorizon <- floor(median(sdat$time)) riskLevelPlot(cox, survForm, data = sdat, horizon = timeHorizon) # ---------- competing risks -------------------- # -- simulate an artificial data frame # with competing cause response and three predictors library(cmprsk) library(riskRegression) set.seed(140515) crdat <- sampleData(49) # -- fit a cause-specific Cox regression model crForm <- Hist(time,event)~X8+X9 csCox <- CSC(crForm, data=crdat) # -- choose a time horizon and plot the risk for a given cause timeHorizon <- floor(median(crdat$time)) riskLevelPlot(csCox, crForm, data = crdat, horizon = timeHorizon, cause = 1)
# ---------- logistic regression -------------------- expit <- function(x){exp(x)/(1+exp(x))} partyData <- function(N){ Age <- runif(N,.5,15) Parasites <- rnorm(N,mean=3.5-0.03*Age) Fever <- factor(rbinom(N,1,expit(-3.5-.3*Age+.55*Parasites+0.15*Age*Parasites))) data.frame(Fever,Age,Parasites) } d <- partyData(100) f <- glm(Fever~Age+Parasites,data=d,family="binomial") riskLevelPlot(f,Fever~Age+Parasites,d) ## Not run: if (require("randomForest",quietly=TRUE)){ rf <- randomForest::randomForest(Fever~Age+Parasites,data=d) riskLevelPlot(f,Fever~Age+Parasites,d) riskLevelPlot(rf,Fever~Age+Parasites,d) } ## End(Not run) # ---------- survival analysis -------------------- # --simulate an artificial data frame # with survival response and three predictors library(survival) library(prodlim) set.seed(140515) sdat <- sampleData(43,outcome="survival") # -- fit a Cox regression model survForm = Surv(time,event) ~ X8 + X9 cox <- coxph(survForm, data = sdat,x=TRUE) # --choose a time horizon for the predictions and plot the risks timeHorizon <- floor(median(sdat$time)) riskLevelPlot(cox, survForm, data = sdat, horizon = timeHorizon) # ---------- competing risks -------------------- # -- simulate an artificial data frame # with competing cause response and three predictors library(cmprsk) library(riskRegression) set.seed(140515) crdat <- sampleData(49) # -- fit a cause-specific Cox regression model crForm <- Hist(time,event)~X8+X9 csCox <- CSC(crForm, data=crdat) # -- choose a time horizon and plot the risk for a given cause timeHorizon <- floor(median(crdat$time)) riskLevelPlot(csCox, crForm, data = crdat, horizon = timeHorizon, cause = 1)
This is a wrapper for the function comp.risk
from the timereg package.
The main difference is one marks variables in the formula that should have a
time-dependent effect whereas in comp.risk
one marks variables that
should have a time constant (proportional) effect.
riskRegression( formula, data, times, link = "relative", cause, conf.int = TRUE, cens.model, cens.formula, max.iter = 50, conservative = TRUE, ... )
riskRegression( formula, data, times, link = "relative", cause, conf.int = TRUE, cens.model, cens.formula, max.iter = 50, conservative = TRUE, ... )
formula |
Formula where the left hand side specifies the event history event.history and the right hand side the linear predictor. See examples. |
data |
The data for fitting the model in which includes all the variables included in formula. |
times |
Vector of times. For each time point in |
link |
|
cause |
The cause of interest. |
conf.int |
If |
cens.model |
Specified the model for the (conditional) censoring distribution used for deriving weights (IFPW). Defaults to "KM" (the Kaplan-Meier method ignoring covariates) alternatively it may be "Cox" (Cox regression). |
cens.formula |
Right hand side of the formula used for fitting
the censoring model. If not specified the right hand side of
|
max.iter |
Maximal number of iterations. |
conservative |
If |
... |
Further arguments passed to |
Thomas A. Gerds [email protected], Thomas H. Scheike [email protected]
Thomas A Gerds, Thomas H Scheike, and Per K Andersen. Absolute risk regression for competing risks: interpretation, link functions, and prediction. Statistics in medicine, 31(29):3921–3930, 2012.
Scheike, Zhang and Gerds (2008), Predicting cumulative incidence probability by direct binomial regression, Biometrika, 95, 205-220.
Scheike and Zhang (2007), Flexible competing risks regression modelling and goodness of fit, LIDA, 14, 464-483.
Martinussen and Scheike (2006), Dynamic regression models for survival data, Springer.
library(prodlim) data(Melanoma,package="riskRegression") ## tumor thickness on the log-scale Melanoma$logthick <- log(Melanoma$thick) # Single binary factor ## absolute risk regression library(survival) library(prodlim) fit.arr <- ARR(Hist(time,status)~sex,data=Melanoma,cause=1) print(fit.arr) # show predicted cumulative incidences plot(fit.arr,col=3:4,newdata=data.frame(sex=c("Female","Male"))) ## compare with non-parametric Aalen-Johansen estimate library(prodlim) fit.aj <- prodlim(Hist(time,status)~sex,data=Melanoma) plot(fit.aj,conf.int=FALSE) plot(fit.arr,add=TRUE,col=3:4,newdata=data.frame(sex=c("Female","Male"))) ## with time-dependent effect fit.tarr <- ARR(Hist(time,status)~strata(sex),data=Melanoma,cause=1) plot(fit.tarr,newdata=data.frame(sex=c("Female","Male"))) ## logistic risk regression fit.lrr <- LRR(Hist(time,status)~sex,data=Melanoma,cause=1) summary(fit.lrr) # Single continuous factor ## tumor thickness on the log-scale Melanoma$logthick <- log(Melanoma$thick) ## absolute risk regression fit2.arr <- ARR(Hist(time,status)~logthick,data=Melanoma,cause=1) print(fit2.arr) # show predicted cumulative incidences plot(fit2.arr,col=1:5,newdata=data.frame(logthick=quantile(Melanoma$logthick))) ## comparison with nearest neighbor non-parametric Aalen-Johansen estimate library(prodlim) fit2.aj <- prodlim(Hist(time,status)~logthick,data=Melanoma) plot(fit2.aj,conf.int=FALSE,newdata=data.frame(logthick=quantile(Melanoma$logthick))) plot(fit2.arr,add=TRUE,col=1:5,lty=3,newdata=data.frame(logthick=quantile(Melanoma$logthick))) ## logistic risk regression fit2.lrr <- LRR(Hist(time,status)~logthick,data=Melanoma,cause=1) summary(fit2.lrr) ## change model for censoring weights library(rms) fit2a.lrr <- LRR(Hist(time,status)~logthick, data=Melanoma, cause=1, cens.model="cox", cens.formula=~sex+epicel+ulcer+age+logthick) summary(fit2a.lrr) ## compare prediction performance Score(list(ARR=fit2.arr,AJ=fit2.aj,LRR=fit2.lrr),formula=Hist(time,status)~1,data=Melanoma) # multiple regression library(riskRegression) library(prodlim) # absolute risk model multi.arr <- ARR(Hist(time,status)~logthick+sex+age+ulcer,data=Melanoma,cause=1) # stratified model allowing different baseline risk for the two gender multi.arr <- ARR(Hist(time,status)~thick+strata(sex)+age+ulcer,data=Melanoma,cause=1) # stratify by a continuous variable: strata(age) multi.arr <- ARR(Hist(time,status)~tp(thick,power=0)+strata(age)+sex+ulcer, data=Melanoma, cause=1) fit.arr2a <- ARR(Hist(time,status)~tp(thick,power=1),data=Melanoma,cause=1) summary(fit.arr2a) fit.arr2b <- ARR(Hist(time,status)~timevar(thick),data=Melanoma,cause=1) summary(fit.arr2b) ## logistic risk model fit.lrr <- LRR(Hist(time,status)~thick,data=Melanoma,cause=1) summary(fit.lrr) ## nearest neighbor non-parametric Aalen-Johansen estimate library(prodlim) fit.aj <- prodlim(Hist(time,status)~thick,data=Melanoma) plot(fit.aj,conf.int=FALSE) # prediction performance x <- Score(list(fit.arr2a,fit.arr2b,fit.lrr), data=Melanoma, formula=Hist(time,status)~1, cause=1, split.method="none")
library(prodlim) data(Melanoma,package="riskRegression") ## tumor thickness on the log-scale Melanoma$logthick <- log(Melanoma$thick) # Single binary factor ## absolute risk regression library(survival) library(prodlim) fit.arr <- ARR(Hist(time,status)~sex,data=Melanoma,cause=1) print(fit.arr) # show predicted cumulative incidences plot(fit.arr,col=3:4,newdata=data.frame(sex=c("Female","Male"))) ## compare with non-parametric Aalen-Johansen estimate library(prodlim) fit.aj <- prodlim(Hist(time,status)~sex,data=Melanoma) plot(fit.aj,conf.int=FALSE) plot(fit.arr,add=TRUE,col=3:4,newdata=data.frame(sex=c("Female","Male"))) ## with time-dependent effect fit.tarr <- ARR(Hist(time,status)~strata(sex),data=Melanoma,cause=1) plot(fit.tarr,newdata=data.frame(sex=c("Female","Male"))) ## logistic risk regression fit.lrr <- LRR(Hist(time,status)~sex,data=Melanoma,cause=1) summary(fit.lrr) # Single continuous factor ## tumor thickness on the log-scale Melanoma$logthick <- log(Melanoma$thick) ## absolute risk regression fit2.arr <- ARR(Hist(time,status)~logthick,data=Melanoma,cause=1) print(fit2.arr) # show predicted cumulative incidences plot(fit2.arr,col=1:5,newdata=data.frame(logthick=quantile(Melanoma$logthick))) ## comparison with nearest neighbor non-parametric Aalen-Johansen estimate library(prodlim) fit2.aj <- prodlim(Hist(time,status)~logthick,data=Melanoma) plot(fit2.aj,conf.int=FALSE,newdata=data.frame(logthick=quantile(Melanoma$logthick))) plot(fit2.arr,add=TRUE,col=1:5,lty=3,newdata=data.frame(logthick=quantile(Melanoma$logthick))) ## logistic risk regression fit2.lrr <- LRR(Hist(time,status)~logthick,data=Melanoma,cause=1) summary(fit2.lrr) ## change model for censoring weights library(rms) fit2a.lrr <- LRR(Hist(time,status)~logthick, data=Melanoma, cause=1, cens.model="cox", cens.formula=~sex+epicel+ulcer+age+logthick) summary(fit2a.lrr) ## compare prediction performance Score(list(ARR=fit2.arr,AJ=fit2.aj,LRR=fit2.lrr),formula=Hist(time,status)~1,data=Melanoma) # multiple regression library(riskRegression) library(prodlim) # absolute risk model multi.arr <- ARR(Hist(time,status)~logthick+sex+age+ulcer,data=Melanoma,cause=1) # stratified model allowing different baseline risk for the two gender multi.arr <- ARR(Hist(time,status)~thick+strata(sex)+age+ulcer,data=Melanoma,cause=1) # stratify by a continuous variable: strata(age) multi.arr <- ARR(Hist(time,status)~tp(thick,power=0)+strata(age)+sex+ulcer, data=Melanoma, cause=1) fit.arr2a <- ARR(Hist(time,status)~tp(thick,power=1),data=Melanoma,cause=1) summary(fit.arr2a) fit.arr2b <- ARR(Hist(time,status)~timevar(thick),data=Melanoma,cause=1) summary(fit.arr2b) ## logistic risk model fit.lrr <- LRR(Hist(time,status)~thick,data=Melanoma,cause=1) summary(fit.lrr) ## nearest neighbor non-parametric Aalen-Johansen estimate library(prodlim) fit.aj <- prodlim(Hist(time,status)~thick,data=Melanoma) plot(fit.aj,conf.int=FALSE) # prediction performance x <- Score(list(fit.arr2a,fit.arr2b,fit.lrr), data=Melanoma, formula=Hist(time,status)~1, cause=1, split.method="none")
riskRegression
Output and set global options for the riskRegression
package.
riskRegression.options(...)
riskRegression.options(...)
... |
for now limited to |
only used by the ate
function.
options <- riskRegression.options() ## add new method.predictRiskIID riskRegression.options(method.predictRiskIID = c(options$method.predictRiskIID,"xx")) riskRegression.options()
options <- riskRegression.options() ## add new method.predictRiskIID riskRegression.options(method.predictRiskIID = c(options$method.predictRiskIID,"xx")) riskRegression.options()
Fast computation of sweep(X, MARGIN = 2, FUN = "-", STATS = center)
rowCenter_cpp(X, center)
rowCenter_cpp(X, center)
X |
A matrix. |
center |
a numeric vector of length equal to the number of rows of |
A matrix of same size as X.
Brice Ozenne <[email protected]>
x <- matrix(1,6,5) sweep(x, MARGIN = 2, FUN = "-", STATS = 1:5) rowCenter_cpp(x, 1:5 ) rowCenter_cpp(x, colMeans(x) )
x <- matrix(1,6,5) sweep(x, MARGIN = 2, FUN = "-", STATS = 1:5) rowCenter_cpp(x, 1:5 ) rowCenter_cpp(x, colMeans(x) )
Fast computation of t(apply(x,1,cumsum))
rowCumSum(x)
rowCumSum(x)
x |
A matrix. |
A matrix of same size as x.
Thomas Alexander Gerds <[email protected]>
x <- matrix(1:8,ncol=2) rowCumSum(x)
x <- matrix(1:8,ncol=2) rowCumSum(x)
Fast computation of sweep(X, MARGIN = 2, FUN = "*", STATS = scale)
rowMultiply_cpp(X, scale)
rowMultiply_cpp(X, scale)
X |
A matrix. |
scale |
a numeric vector of length equal to the number of rows of |
A matrix of same size as X.
Brice Ozenne <[email protected]>
x <- matrix(1,6,5) sweep(x, MARGIN = 2, FUN = "*", STATS = 1:5) rowMultiply_cpp(x, 1:5 ) rowMultiply_cpp(x, 1/colMeans(x) )
x <- matrix(1,6,5) sweep(x, MARGIN = 2, FUN = "*", STATS = 1:5) rowMultiply_cpp(x, 1:5 ) rowMultiply_cpp(x, 1/colMeans(x) )
Collapse rows of characters. Fast alternative to apply(x,1,paste0,collapse="")
rowPaste(object)
rowPaste(object)
object |
A matrix/data.frame/list containing the characters. |
## Not run: M <- matrix(letters,nrow = 26, ncol = 2) rowPaste(M) ## End(Not run)
## Not run: M <- matrix(letters,nrow = 26, ncol = 2) rowPaste(M) ## End(Not run)
Fast computation of sweep(X, MARGIN = 2, FUN = "/", STATS = scale)
rowScale_cpp(X, scale)
rowScale_cpp(X, scale)
X |
A matrix. |
scale |
a numeric vector of length equal to the number of rows of |
A matrix of same size as X.
Brice Ozenne <[email protected]>
x <- matrix(1,6,5) sweep(x, MARGIN = 2, FUN = "/", STATS = 1:5) rowScale_cpp(x, 1:5 ) rowScale_cpp(x, colMeans(x) )
x <- matrix(1,6,5) sweep(x, MARGIN = 2, FUN = "/", STATS = 1:5) rowScale_cpp(x, 1:5 ) rowScale_cpp(x, colMeans(x) )
Fast computation of crossprod(rowSums(X),Y)
rowSumsCrossprod(X, Y, transposeY)
rowSumsCrossprod(X, Y, transposeY)
X |
A matrix with dimensions n*k. Hence the result of |
Y |
A matrix with dimenions n*m. Can be a matrix with dimension m*n but then |
transposeY |
Logical. If |
A vector of length m.
Thomas Alexander Gerds <[email protected]>
x <- matrix(1:10,nrow=5) y <- matrix(1:20,ncol=4) rowSumsCrossprod(x,y,0) x <- matrix(1:10,nrow=5) y <- matrix(1:20,ncol=5) rowSumsCrossprod(x,y,1)
x <- matrix(1:10,nrow=5) y <- matrix(1:20,ncol=4) rowSumsCrossprod(x,y,0) x <- matrix(1:10,nrow=5) y <- matrix(1:20,ncol=5) rowSumsCrossprod(x,y,1)
Simulate data with binary outcome and 10 covariates.
sampleData(n,outcome="competing.risks", formula= ~ f(X1,2)+f(X2,-0.033)+f(X3,0.4)+f(X6,.1)+f(X7,-.1)+f(X8,.5)+f(X9,-1), intercept=0) sampleDataTD(n,n.intervals=5,outcome="competing.risks", formula= ~ f(X1,2)+f(X2,-0.033)+f(X3,0.4)+f(X6,.1)+f(X7,-.1)+f(X8,.5)+f(X9,-1))
sampleData(n,outcome="competing.risks", formula= ~ f(X1,2)+f(X2,-0.033)+f(X3,0.4)+f(X6,.1)+f(X7,-.1)+f(X8,.5)+f(X9,-1), intercept=0) sampleDataTD(n,n.intervals=5,outcome="competing.risks", formula= ~ f(X1,2)+f(X2,-0.033)+f(X3,0.4)+f(X6,.1)+f(X7,-.1)+f(X8,.5)+f(X9,-1))
n |
Sample size |
outcome |
Character vector. Response variables are generated
according to keywords: |
formula |
Specify regression coefficients |
intercept |
For binary outcome the intercept of the logistic regression. |
n.intervals |
|
For the actual lava::regression parameters see the function definition.
Simulated data as data.table with n rows and the following columns: Y (binary outcome), time (non-binary outcome), event (non-binary outcome), X1-X5 (binary predictors), X6-X10 (continous predictors)
Thomas A. Gerds <[email protected]>
lvm
set.seed(10) sampleData(10,outcome="binary") sampleData(10,outcome="survival") sampleData(10,outcome="competing.risks")
set.seed(10) sampleData(10,outcome="binary") sampleData(10,outcome="survival") sampleData(10,outcome="competing.risks")
Save confidential Cox objects
saveCoxConfidential(object, times)
saveCoxConfidential(object, times)
object |
An object of class |
times |
The times at which we want to predict risk. |
This function can save coxph
objects such that we do not need to export
the data on which it was fitted at given times.
library(survival) library(lava) set.seed(18) trainSurv <- sampleData(300,outcome="survival") testSurv <- sampleData(40,outcome="survival") fit = coxph(Surv(time,event)~X1+X2+X3+X7+X9,data=trainSurv, y=TRUE, x = TRUE) u=saveCoxConfidential(fit,times=3) ## Not run: # write object as plain text file sink("~/tmp/u.R") cat("U <- ") dput(u) sink(NULL) # reload object source("~/tmp/u.R") class(u) <- "CoxConfidential" ## End(Not run) predictRisk(u,newdata=testSurv) cox1 = coxph(Surv(time,event)~strata(X1)+X2+X3+X7+X9,data=trainSurv, y=TRUE, x = TRUE) z<-saveCoxConfidential(cox1,c(2,5)) dput(z) ## get output to copy object all.equal(predictRisk(z,newdata=testSurv), predictRisk(cox1,newdata=testSurv,times=c(2,5))) cox2 = coxph(Surv(time,event)~X1+X2+X7+X9,data=trainSurv, y=TRUE, x = TRUE) z<-saveCoxConfidential(cox2,c(2,5)) all.equal(predictRisk(z,testSurv),predictRisk(z,testSurv,c(2,5)))
library(survival) library(lava) set.seed(18) trainSurv <- sampleData(300,outcome="survival") testSurv <- sampleData(40,outcome="survival") fit = coxph(Surv(time,event)~X1+X2+X3+X7+X9,data=trainSurv, y=TRUE, x = TRUE) u=saveCoxConfidential(fit,times=3) ## Not run: # write object as plain text file sink("~/tmp/u.R") cat("U <- ") dput(u) sink(NULL) # reload object source("~/tmp/u.R") class(u) <- "CoxConfidential" ## End(Not run) predictRisk(u,newdata=testSurv) cox1 = coxph(Surv(time,event)~strata(X1)+X2+X3+X7+X9,data=trainSurv, y=TRUE, x = TRUE) z<-saveCoxConfidential(cox1,c(2,5)) dput(z) ## get output to copy object all.equal(predictRisk(z,newdata=testSurv), predictRisk(cox1,newdata=testSurv,times=c(2,5))) cox2 = coxph(Surv(time,event)~X1+X2+X7+X9,data=trainSurv, y=TRUE, x = TRUE) z<-saveCoxConfidential(cox2,c(2,5)) all.equal(predictRisk(z,testSurv),predictRisk(z,testSurv,c(2,5)))
Methods to score the predictive performance of risk markers and risk prediction models
Score(object, ...) ## S3 method for class 'list' Score( object, formula, data, metrics = c("auc", "brier"), summary = NULL, plots = NULL, cause, times, landmarks, use.event.times = FALSE, null.model = TRUE, se.fit = TRUE, conservative = FALSE, multi.split.test = FALSE, conf.int = 0.95, contrasts = TRUE, probs = c(0, 0.25, 0.5, 0.75, 1), cens.method = "ipcw", cens.model = "cox", split.method, B, M, seed, trainseeds, parallel = c("no", "multicore", "snow", "as.registered"), ncpus = 1, cl = NULL, progress.bar = 3, errorhandling = "pass", keep, predictRisk.args, debug = 0L, censoring.save.memory = FALSE, breaks = seq(0, 1, 0.01), roc.method = "vertical", roc.grid = switch(roc.method, vertical = seq(0, 1, 0.01), horizontal = seq(1, 0, -0.01)), cutpoints = NULL, ... )
Score(object, ...) ## S3 method for class 'list' Score( object, formula, data, metrics = c("auc", "brier"), summary = NULL, plots = NULL, cause, times, landmarks, use.event.times = FALSE, null.model = TRUE, se.fit = TRUE, conservative = FALSE, multi.split.test = FALSE, conf.int = 0.95, contrasts = TRUE, probs = c(0, 0.25, 0.5, 0.75, 1), cens.method = "ipcw", cens.model = "cox", split.method, B, M, seed, trainseeds, parallel = c("no", "multicore", "snow", "as.registered"), ncpus = 1, cl = NULL, progress.bar = 3, errorhandling = "pass", keep, predictRisk.args, debug = 0L, censoring.save.memory = FALSE, breaks = seq(0, 1, 0.01), roc.method = "vertical", roc.grid = switch(roc.method, vertical = seq(0, 1, 0.01), horizontal = seq(1, 0, -0.01)), cutpoints = NULL, ... )
object |
List of risk predictions (see details and examples). |
... |
Named list containing additional arguments that are passed on to the |
formula |
A formula which identifies the outcome (left hand
side). E.g., |
data |
|
metrics |
Character vector specifying which metrics to
apply. Case does not matter. Choices are |
summary |
Character vector specifying which summary
statistics to apply to the predicted risks. Choices are
Set to |
plots |
Character vector specifying for which plots to put data into the result.
Currently implemented are |
cause |
Event of interest. Used for binary outcome |
times |
For survival and competing risks outcome: list of prediction horizons. All times which are greater than the maximal observed time in the data set are automatically removed. Note that the object returned by the function may become huge when the prediction performance is estimated at many prediction horizons. |
landmarks |
Not yet implemented. |
use.event.times |
If |
null.model |
If |
se.fit |
Logical or |
conservative |
Logical, only relevant in right censored data. If |
multi.split.test |
Logical or |
conf.int |
Either logical or a numeric value between 0 and 1. In right censored data,
confidence intervals are based on Blanche et al (see references). Setting |
contrasts |
Either logical or a list of contrasts. A list of contrasts defines which risk prediction models (markers)
should be contrasted with respect to their prediction performance.
If |
probs |
Quantiles for retrospective summary statistics of the
predicted risks. This affects the result of the function |
cens.method |
Method for dealing with right censored
data. Either |
cens.model |
Model for estimating inverse probability of
censored weights (IPCW). Implemented are the Kaplan-Meier method ( |
split.method |
Method for cross-validation. Right now the only choices are |
B |
Number of bootstrap sets for cross-validation. |
M |
Size of subsamples for bootstrap cross-validation. If specified it
has to be an integer smaller than the size of |
seed |
Super seed for setting training data seeds when randomly splitting (bootstrapping) the data during cross-validation. |
trainseeds |
Seeds for training models during cross-validation. |
parallel |
The type of parallel operation to be used (if any). If missing, the default is |
ncpus |
integer: number of processes to be used in parallel operation. |
cl |
An optional |
progress.bar |
Style for |
errorhandling |
Argument passed as |
keep |
list of characters (not case sensitive) which determines additional output.
|
predictRisk.args |
A list of argument-lists to control how risks are predicted.
The names of the lists should be the S3-classes of the A more flexible approach is to write a new predictRisk S3-method. See Details. |
debug |
Logical. If |
censoring.save.memory |
Only relevant in censored data where censoring weigths are obtained with
Cox regression and argument |
breaks |
Break points for computing the Roc curve. Defaults to
|
roc.method |
Method for averaging ROC curves across data splits.
If |
roc.grid |
Grid points for the averaging of ROC curves. A sequence of values at which to compute averages across the ROC curves obtained for different data splits during crossvalidation. |
cutpoints |
If not |
The function implements a toolbox for the risk prediction modeller: all tools work for the three outcomes: (1) binary (uncensored), (2) right censored time to event without competing risks, (3) right censored time to event with competing risks
Computed are the (time-dependent) Brier score and the (time-dependent) area under the ROC curve for a list of risk prediction models either in external validation data or in the learning data using bootstrap cross-validation. The function optionally provides results for plotting (time-point specific) ROC curves, for (time-point specific) calibration curves and for (time-point specific) retrospective boxplots.
For uncensored binary outcome the Delong-Delong test is used to contrast AUC of rival models. In right censored survival data (with and without competing risks) the p-values correspond to Wald tests based on standard errors obtained with an estimate of the influence function as described in detail in the appendix of Blanche et al. (2015).
This function works with one or multiple models that predict the risk of an event R(t|X) for a subject characterized by predictors X at time t. With binary endpoints (outcome 0/1 without time component) the risk is simply R(X). In case of a survival object without competing risks the function still works with predicted event probabilities, i.e., R(t|X)=1-S(t|X) where S(t|X) is the predicted survival chance for subject X at time t.
The already existing predictRisk methods (see methods(predictRisk)) may not cover all models and methods
for predicting risks. But users can quickly extend the package as explained in detail in Mogensen et al. (2012) for
the predecessors pec::predictSurvProb
and pec::predictEventProb
which have been unified as
riskRegression::predictRisk
.
Bootstrap Crossvalidation (see also Gerds & Schumacher 2007 and Mogensen et al. 2012)
B=10, M (not specified or M=NROW(data))
Training of each of the models in each of 10 bootstrap data sets (learning data sets).
Learning data sets are obtained by sampling NROW(data)
subjects of the data set
with replacement. There are roughly .632*NROW(data)
subjects in the learning data (inbag)
and .368*NROW(data)
subjects not in the validation data sets (out-of-bag).
These are used to estimate the scores: AUC, Brier, etc. Reported are averages across the 10 splits.
## Bootstrap with replacement
set.seed(13)
N=17
data = data.frame(id=1:N, y=rbinom(N,1,.3),x=rnorm(N))
boot.index = sample(1:N,size=N,replace=TRUE)
boot.index
inbag = 1:N
outofbag = !inbag
learn.data = data[inbag]
val.data = data[outofbag]
riskRegression:::getSplitMethod("bootcv",B=10,N=17)$index
NOTE: the number .632 is the expected probability to draw one subject (for example subject 1) with
replacement from the data, which does not depend on the sample size:
B=10000
N=137
mean(sapply(1:B, function(b){match(1,sample(1:N,size=N,replace=TRUE),nomatch=0)}))
N=30
mean(sapply(1:B, function(b){match(1,sample(1:N,size=N,replace=TRUE),nomatch=0)}))
N=300
mean(sapply(1:B, function(b){match(1,sample(1:N,size=N,replace=TRUE),nomatch=0)}))
## Bootstrap without replacement (training size set to be 70 percent of data) B=10, M=.7
Training of each of the models in each of 10 bootstrap data sets (learning data sets).
Learning data sets are obtained by sampling round(.8*NROW(data))
subjects of the data set
without replacement. There are NROW(data)-round(.8*NROW(data))
subjects not in the learning data sets.
These are used to estimate the scores: AUC, Brier, etc. Reported are averages across the 10 splits.
set.seed(13)
N=17
data = data.frame(id=1:N, y=rbinom(N,1,.3),x=rnorm(N))
boot.index = sample(1:N,size=M,replace=FALSE)
boot.index
inbag = 1:N
outofbag = !inbag
learn.data = data[inbag]
val.data = data[outofbag]
riskRegression:::getSplitMethod("bootcv",B=10,N=17,M=.7)$index
List with scores and assessments of contrasts, i.e.,
tests and confidence limits for performance and difference in performance (AUC and Brier),
summaries and plots. Most elements are indata.table
format.
Thomas A Gerds [email protected] and Paul Blanche [email protected]
Thomas A. Gerds and Michael W. Kattan (2021). Medical Risk Prediction Models: With Ties to Machine Learning (1st ed.) Chapman and Hall/CRC https://doi.org/10.1201/9781138384484
Ulla B. Mogensen, Hemant Ishwaran, Thomas A. Gerds (2012). Evaluating Random Forests for Survival Analysis Using Prediction Error Curves. Journal of Statistical Software, 50(11), 1-23. URL http://www.jstatsoft.org/v50/i11/.
Paul Blanche, Cecile Proust-Lima, Lucie Loubere, Claudine Berr, Jean- Francois Dartigues, and Helene Jacqmin-Gadda. Quantifying and comparing dynamic predictive accuracy of joint models for longitudinal marker and time-to-event in presence of censoring and competing risks. Biometrics, 71 (1):102–113, 2015.
P. Blanche, J-F Dartigues, and H. Jacqmin-Gadda. Estimating and comparing time-dependent areas under receiver operating characteristic curves for censored event times with competing risks. Statistics in Medicine, 32(30):5381–5397, 2013.
E. Graf et al. (1999), Assessment and comparison of prognostic classification schemes for survival data. Statistics in Medicine, vol 18, pp= 2529–2545.
Efron, Tibshirani (1997) Journal of the American Statistical Association 92, 548–560 Improvement On Cross-Validation: The .632+ Bootstrap Method.
Gerds, Schumacher (2006), Consistent estimation of the expected Brier score in general survival models with right-censored event times. Biometrical Journal, vol 48, 1029–1040.
Thomas A. Gerds, Martin Schumacher (2007) Efron-Type Measures of Prediction Error for Survival Analysis Biometrics, 63(4), 1283–1287 doi:10.1111/j.1541-0420.2007.00832.x
Martin Schumacher, Harald Binder, and Thomas Gerds. Assessment of survival prediction models based on microarray data. Bioinformatics, 23(14):1768-74, 2007.
Mark A. van de Wiel, Johannes Berkhof, and Wessel N. van Wieringen Testing the prediction error difference between 2 predictors Biostatistics (2009) 10(3): 550-560 doi:10.1093/biostatistics/kxp011
Michael W Kattan and Thomas A Gerds. The index of prediction accuracy: an intuitive measure useful for evaluating risk prediction models. Diagnostic and Prognostic Research, 2(1):7, 2018.
Fawcett, T. (2006). An introduction to ROC analysis. Pattern Recognition Letters, 27, 861-874.
# binary outcome library(lava) set.seed(18) learndat <- sampleData(48,outcome="binary") testdat <- sampleData(40,outcome="binary") ## score logistic regression models lr1 = glm(Y~X1+X2+X7+X9,data=learndat,family=binomial) lr2 = glm(Y~X3+X5,data=learndat,family=binomial) Score(list("LR(X1+X2+X7+X9)"=lr1,"LR(X3+X5)"=lr2),formula=Y~1,data=testdat) ## ROC curve and calibration plot xb=Score(list("LR(X1+X2+X7+X9)"=lr1,"LR(X3+X5+X6)"=lr2),formula=Y~1, data=testdat,plots=c("calibration","ROC")) ## Not run: plotROC(xb) plotCalibration(xb) ## End(Not run) ## compute AUC for a list of continuous markers markers = as.list(testdat[,.(X6,X7,X8,X9,X10)]) Score(markers,formula=Y~1,data=testdat,metrics=c("auc")) # cross-validation ## Not run: set.seed(10) learndat=sampleData(400,outcome="binary") lr1a = glm(Y~X6,data=learndat,family=binomial) lr2a = glm(Y~X7+X8+X9,data=learndat,family=binomial) ## bootstrap cross-validation x1=Score(list("LR1"=lr1a,"LR2"=lr2a),formula=Y~1,data=learndat,split.method="bootcv",B=100) x1 ## leave-one-out and leave-pair-out bootstrap x2=Score(list("LR1"=lr1a,"LR2"=lr2a),formula=Y~1,data=learndat, split.method="loob", B=100,plots="calibration") x2 ## 5-fold cross-validation x3=Score(list("LR1"=lr1a,"LR2"=lr2a),formula=Y~1,data=learndat, split.method="cv5", B=1,plots="calibration") x3 ## End(Not run) # survival outcome # Score Cox regression models ## Not run: library(survival) library(rms) library(prodlim) set.seed(18) trainSurv <- sampleData(100,outcome="survival") testSurv <- sampleData(40,outcome="survival") cox1 = coxph(Surv(time,event)~X1+X2+X7+X9,data=trainSurv, y=TRUE, x = TRUE) cox2 = coxph(Surv(time,event)~X3+X5+X6,data=trainSurv, y=TRUE, x = TRUE) x=Score(list("Cox(X1+X2+X7+X9)"=cox1,"Cox(X3+X5+X6)"=cox2), formula=Surv(time,event)~1,data=testSurv,conf.int=FALSE,times=c(5,8)) ## Use Cox to estimate censoring weights y=Score(list("Cox(X1+X2+X7+X9)"=cox1,"Cox(X3+X5+X6)"=cox2), formula=Surv(time,event)~X1+X8,data=testSurv,conf.int=FALSE,times=c(5,8)) ## Use GLMnet to estimate censoring weights z=Score(list("Cox(X1+X2+X7+X9)"=cox1,"Cox(X3+X5+X6)"=cox2), formula=Surv(time,event)~X1+X8,cens.model = "GLMnet",data=testSurv, conf.int=FALSE,times=c(5,8)) ## Use hal9001 to estimate censoring weights w=Score(list("Cox(X1+X2+X7+X9)"=cox1,"Cox(X3+X5+X6)"=cox2), formula=Surv(time,event)~X1+X8,cens.model = "Hal9001",data=testSurv, conf.int=FALSE,times=c(5,8)) x y z w ## End(Not run) ## Not run: library(survival) library(rms) library(prodlim) set.seed(18) trainSurv <- sampleData(100,outcome="survival") testSurv <- sampleData(40,outcome="survival") cox1 = coxph(Surv(time,event)~X1+X2+X7+X9,data=trainSurv, y=TRUE, x = TRUE) cox2 = coxph(Surv(time,event)~X3+X5+X6,data=trainSurv, y=TRUE, x = TRUE) xs=Score(list("Cox(X1+X2+X7+X9)"=cox1,"Cox(X3+X5+X6)"=cox2), formula=Surv(time,event)~1,data=testSurv,conf.int=FALSE,times=c(5,8)) xs ## End(Not run) # Integrated Brier score ## Not run: xs=Score(list("Cox(X1+X2+X7+X9)"=cox1,"Cox(X3+X5+X6)"=cox2), formula=Surv(time,event)~1,data=testSurv,conf.int=FALSE, summary="ibs", times=sort(unique(testSurv$time))) ## End(Not run) # time-dependent AUC for list of markers ## Not run: survmarkers = as.list(testSurv[,.(X6,X7,X8,X9,X10)]) Score(survmarkers, formula=Surv(time,event)~1,metrics="auc",data=testSurv, conf.int=TRUE,times=c(5,8)) # compare models on test data Score(list("Cox(X1+X2+X7+X9)"=cox1,"Cox(X3+X5+X6)"=cox2), formula=Surv(time,event)~1,data=testSurv,conf.int=TRUE,times=c(5,8)) ## End(Not run) # crossvalidation models in traindata ## Not run: library(survival) set.seed(18) trainSurv <- sampleData(400,outcome="survival") cox1 = coxph(Surv(time,event)~X1+X2+X7+X9,data=trainSurv, y=TRUE, x = TRUE) cox2 = coxph(Surv(time,event)~X3+X5+X6,data=trainSurv, y=TRUE, x = TRUE) x1 = Score(list("Cox(X1+X2+X7+X9)"=cox1,"Cox(X3+X5+X6)"=cox2), formula=Surv(time,event)~1,data=trainSurv,conf.int=TRUE,times=c(5,8), split.method="loob",B=100,plots="calibration") x2= Score(list("Cox(X1+X2+X7+X9)"=cox1,"Cox(X3+X5+X6)"=cox2), formula=Surv(time,event)~1,data=trainSurv,conf.int=TRUE,times=c(5,8), split.method="bootcv",B=100) ## End(Not run) # restrict number of comparisons ## Not run: Score(list("Cox(X1+X2+X7+X9)"=cox1,"Cox(X3+X5+X6)"=cox2), formula=Surv(time,event)~1,data=trainSurv,contrasts=TRUE, null.model=FALSE,conf.int=TRUE,times=c(5,8),split.method="bootcv",B=3) # competing risks outcome set.seed(18) trainCR <- sampleData(400,outcome="competing.risks") testCR <- sampleData(400,outcome="competing.risks") library(riskRegression) library(cmprsk) # Cause-specific Cox regression csc1 = CSC(Hist(time,event)~X1+X2+X7+X9,data=trainCR) csc2 = CSC(Hist(time,event)~X3+X5+X6,data=trainCR) # Fine-Gray regression fgr1 = FGR(Hist(time,event)~X1+X2+X7+X9,data=trainCR,cause=1) fgr2 = FGR(Hist(time,event)~X3+X5+X6,data=trainCR,cause=1) Score(list("CSC(X1+X2+X7+X9)"=csc1,"CSC(X3+X5+X6)"=csc2, "FGR(X1+X2+X7+X9)"=fgr1,"FGR(X3+X5+X6)"=fgr2), formula=Hist(time,event)~1,data=testCR,se.fit=1L,times=c(5,8)) ## End(Not run) ## Not run: # reproduce some results of Table IV of Blanche et al. Stat Med 2013 data(Paquid) ResPaquid <- Score(list("DSST"=-Paquid$DSST,"MMSE"=-Paquid$MMSE), formula=Hist(time,status)~1, data=Paquid, null.model = FALSE, conf.int=TRUE, metrics=c("auc"), times=c(3,5,10), plots="ROC") ResPaquid plotROC(ResPaquid,time=5) ## End(Not run) ## Not run: # parallel options # by erikvona: Here is a generic example of using future # and doFuture, works great with the current version: library(riskRegression) library(future) library(foreach) library(doFuture) library(survival) # Register all available cores for parallel operation plan(multiprocess, workers = availableCores()) registerDoFuture() set.seed(10) trainSurv <- sampleData(400,outcome="survival") cox1 = coxph(Surv(time,event)~X1+X2+X7+X9,data=trainSurv, y=TRUE, x = TRUE) # Bootstrapping on multiple cores x1 = Score(list("Cox(X1+X2+X7+X9)"=cox1), formula=Surv(time,event)~1,data=trainSurv, times=c(5,8), parallel = "as.registered", split.method="bootcv",B=100) ## End(Not run)
# binary outcome library(lava) set.seed(18) learndat <- sampleData(48,outcome="binary") testdat <- sampleData(40,outcome="binary") ## score logistic regression models lr1 = glm(Y~X1+X2+X7+X9,data=learndat,family=binomial) lr2 = glm(Y~X3+X5,data=learndat,family=binomial) Score(list("LR(X1+X2+X7+X9)"=lr1,"LR(X3+X5)"=lr2),formula=Y~1,data=testdat) ## ROC curve and calibration plot xb=Score(list("LR(X1+X2+X7+X9)"=lr1,"LR(X3+X5+X6)"=lr2),formula=Y~1, data=testdat,plots=c("calibration","ROC")) ## Not run: plotROC(xb) plotCalibration(xb) ## End(Not run) ## compute AUC for a list of continuous markers markers = as.list(testdat[,.(X6,X7,X8,X9,X10)]) Score(markers,formula=Y~1,data=testdat,metrics=c("auc")) # cross-validation ## Not run: set.seed(10) learndat=sampleData(400,outcome="binary") lr1a = glm(Y~X6,data=learndat,family=binomial) lr2a = glm(Y~X7+X8+X9,data=learndat,family=binomial) ## bootstrap cross-validation x1=Score(list("LR1"=lr1a,"LR2"=lr2a),formula=Y~1,data=learndat,split.method="bootcv",B=100) x1 ## leave-one-out and leave-pair-out bootstrap x2=Score(list("LR1"=lr1a,"LR2"=lr2a),formula=Y~1,data=learndat, split.method="loob", B=100,plots="calibration") x2 ## 5-fold cross-validation x3=Score(list("LR1"=lr1a,"LR2"=lr2a),formula=Y~1,data=learndat, split.method="cv5", B=1,plots="calibration") x3 ## End(Not run) # survival outcome # Score Cox regression models ## Not run: library(survival) library(rms) library(prodlim) set.seed(18) trainSurv <- sampleData(100,outcome="survival") testSurv <- sampleData(40,outcome="survival") cox1 = coxph(Surv(time,event)~X1+X2+X7+X9,data=trainSurv, y=TRUE, x = TRUE) cox2 = coxph(Surv(time,event)~X3+X5+X6,data=trainSurv, y=TRUE, x = TRUE) x=Score(list("Cox(X1+X2+X7+X9)"=cox1,"Cox(X3+X5+X6)"=cox2), formula=Surv(time,event)~1,data=testSurv,conf.int=FALSE,times=c(5,8)) ## Use Cox to estimate censoring weights y=Score(list("Cox(X1+X2+X7+X9)"=cox1,"Cox(X3+X5+X6)"=cox2), formula=Surv(time,event)~X1+X8,data=testSurv,conf.int=FALSE,times=c(5,8)) ## Use GLMnet to estimate censoring weights z=Score(list("Cox(X1+X2+X7+X9)"=cox1,"Cox(X3+X5+X6)"=cox2), formula=Surv(time,event)~X1+X8,cens.model = "GLMnet",data=testSurv, conf.int=FALSE,times=c(5,8)) ## Use hal9001 to estimate censoring weights w=Score(list("Cox(X1+X2+X7+X9)"=cox1,"Cox(X3+X5+X6)"=cox2), formula=Surv(time,event)~X1+X8,cens.model = "Hal9001",data=testSurv, conf.int=FALSE,times=c(5,8)) x y z w ## End(Not run) ## Not run: library(survival) library(rms) library(prodlim) set.seed(18) trainSurv <- sampleData(100,outcome="survival") testSurv <- sampleData(40,outcome="survival") cox1 = coxph(Surv(time,event)~X1+X2+X7+X9,data=trainSurv, y=TRUE, x = TRUE) cox2 = coxph(Surv(time,event)~X3+X5+X6,data=trainSurv, y=TRUE, x = TRUE) xs=Score(list("Cox(X1+X2+X7+X9)"=cox1,"Cox(X3+X5+X6)"=cox2), formula=Surv(time,event)~1,data=testSurv,conf.int=FALSE,times=c(5,8)) xs ## End(Not run) # Integrated Brier score ## Not run: xs=Score(list("Cox(X1+X2+X7+X9)"=cox1,"Cox(X3+X5+X6)"=cox2), formula=Surv(time,event)~1,data=testSurv,conf.int=FALSE, summary="ibs", times=sort(unique(testSurv$time))) ## End(Not run) # time-dependent AUC for list of markers ## Not run: survmarkers = as.list(testSurv[,.(X6,X7,X8,X9,X10)]) Score(survmarkers, formula=Surv(time,event)~1,metrics="auc",data=testSurv, conf.int=TRUE,times=c(5,8)) # compare models on test data Score(list("Cox(X1+X2+X7+X9)"=cox1,"Cox(X3+X5+X6)"=cox2), formula=Surv(time,event)~1,data=testSurv,conf.int=TRUE,times=c(5,8)) ## End(Not run) # crossvalidation models in traindata ## Not run: library(survival) set.seed(18) trainSurv <- sampleData(400,outcome="survival") cox1 = coxph(Surv(time,event)~X1+X2+X7+X9,data=trainSurv, y=TRUE, x = TRUE) cox2 = coxph(Surv(time,event)~X3+X5+X6,data=trainSurv, y=TRUE, x = TRUE) x1 = Score(list("Cox(X1+X2+X7+X9)"=cox1,"Cox(X3+X5+X6)"=cox2), formula=Surv(time,event)~1,data=trainSurv,conf.int=TRUE,times=c(5,8), split.method="loob",B=100,plots="calibration") x2= Score(list("Cox(X1+X2+X7+X9)"=cox1,"Cox(X3+X5+X6)"=cox2), formula=Surv(time,event)~1,data=trainSurv,conf.int=TRUE,times=c(5,8), split.method="bootcv",B=100) ## End(Not run) # restrict number of comparisons ## Not run: Score(list("Cox(X1+X2+X7+X9)"=cox1,"Cox(X3+X5+X6)"=cox2), formula=Surv(time,event)~1,data=trainSurv,contrasts=TRUE, null.model=FALSE,conf.int=TRUE,times=c(5,8),split.method="bootcv",B=3) # competing risks outcome set.seed(18) trainCR <- sampleData(400,outcome="competing.risks") testCR <- sampleData(400,outcome="competing.risks") library(riskRegression) library(cmprsk) # Cause-specific Cox regression csc1 = CSC(Hist(time,event)~X1+X2+X7+X9,data=trainCR) csc2 = CSC(Hist(time,event)~X3+X5+X6,data=trainCR) # Fine-Gray regression fgr1 = FGR(Hist(time,event)~X1+X2+X7+X9,data=trainCR,cause=1) fgr2 = FGR(Hist(time,event)~X3+X5+X6,data=trainCR,cause=1) Score(list("CSC(X1+X2+X7+X9)"=csc1,"CSC(X3+X5+X6)"=csc2, "FGR(X1+X2+X7+X9)"=fgr1,"FGR(X3+X5+X6)"=fgr2), formula=Hist(time,event)~1,data=testCR,se.fit=1L,times=c(5,8)) ## End(Not run) ## Not run: # reproduce some results of Table IV of Blanche et al. Stat Med 2013 data(Paquid) ResPaquid <- Score(list("DSST"=-Paquid$DSST,"MMSE"=-Paquid$MMSE), formula=Hist(time,status)~1, data=Paquid, null.model = FALSE, conf.int=TRUE, metrics=c("auc"), times=c(3,5,10), plots="ROC") ResPaquid plotROC(ResPaquid,time=5) ## End(Not run) ## Not run: # parallel options # by erikvona: Here is a generic example of using future # and doFuture, works great with the current version: library(riskRegression) library(future) library(foreach) library(doFuture) library(survival) # Register all available cores for parallel operation plan(multiprocess, workers = availableCores()) registerDoFuture() set.seed(10) trainSurv <- sampleData(400,outcome="survival") cox1 = coxph(Surv(time,event)~X1+X2+X7+X9,data=trainSurv, y=TRUE, x = TRUE) # Bootstrapping on multiple cores x1 = Score(list("Cox(X1+X2+X7+X9)"=cox1), formula=Surv(time,event)~1,data=trainSurv, times=c(5,8), parallel = "as.registered", split.method="bootcv",B=100) ## End(Not run)
Compute the first derivative of the log-likelihood for IPCW logistic regressions.
## S3 method for class 'wglm' score(x, indiv = FALSE, times = NULL, simplifies = TRUE, ...)
## S3 method for class 'wglm' score(x, indiv = FALSE, times = NULL, simplifies = TRUE, ...)
x |
a wglm object. |
indiv |
[logical] should the individual score be output? Otherwise the total score (i.e. summed over all individuals will be output). |
times |
[numeric vector] time points at which the score should be output. |
simplifies |
[logical] should the ouput be converted to a matrix when only one timepoint is requested. Otherwise will always return a list. |
... |
Not used. |
This is a wrapper function which first selects variables in the Cox
regression model using fastbw
from the rms
package and then
returns a fitted Cox regression model with the selected variables.
selectCox(formula, data, rule = "aic")
selectCox(formula, data, rule = "aic")
formula |
A formula object with a |
data |
Name of an data frame containing all needed variables. |
rule |
The method for selecting variables. See |
This function first calls cph
then fastbw
and finally
cph
again.
Ulla B. Mogensen, Hemant Ishwaran, Thomas A. Gerds (2012). Evaluating Random Forests for Survival Analysis Using Prediction Error Curves. Journal of Statistical Software, 50(11), 1-23. URL http://www.jstatsoft.org/v50/i11/.
library(survival) set.seed(74) d <- sampleData(89,outcome="survival") f <- selectCox(Surv(time,event)~X1+X2+X3+X4+X6+X7+X8+X9, data=d)
library(survival) set.seed(74) d <- sampleData(89,outcome="survival") f <- selectCox(Surv(time,event)~X1+X2+X3+X4+X6+X7+X8+X9, data=d)
Evaluate the influence function at selected times
selectJump(IF, times, type)
selectJump(IF, times, type)
IF |
influence function returned by iidCox |
times |
the times at which the influence function should be assessed |
type |
can be |
An object with the same dimensions as IF
Brice Ozenne [email protected]
Simulate data of a hypothetical active surveillance prostate cancer study
simActiveSurveillance(n)
simActiveSurveillance(n)
n |
sample size |
This is based on the functionality of library(lava)
.
data table of size n
Thomas A. Gerds <[email protected]>
set.seed(71) simActiveSurveillance(3)
set.seed(71) simActiveSurveillance(3)
Simulate data alike the Melanoma data
simMelanoma(n)
simMelanoma(n)
n |
sample size |
This is based on the functionality of library(lava)
.
data table of size n
Thomas A. Gerds <[email protected]>
set.seed(71) simMelanoma(3)
set.seed(71) simMelanoma(3)
This function can be used to simulate data alike the pbc data from the survival package.
simPBC(n)
simPBC(n)
n |
Sample size |
using lava to synthesize data
The simulated data.
Thomas A. Gerds <[email protected]>
library(survival) library(lava) # simulate data alike pbc data set.seed(98) d=simPBC(847) d$protimegrp1 <- d$protimegrp=="10-11" d$protimegrp2 <- d$protimegrp==">11" d$sex <- factor(d$sex,levels=0:1,labels=c("m","f")) sF1 <- survreg(Surv(time,status==1)~sex+age+logbili+protimegrp1+protimegrp2+stage3+stage4, data=d) coxF1 <- coxph(Surv(time,status==1)~sex+age+logbili+protimegrp1+protimegrp2+stage3+stage4, data=d) # load real pbc data data(pbc,package="survival") pbc <- na.omit(pbc[,c("time","status","age","sex","stage","bili","protime","trt")]) pbc$stage <- factor(pbc$stage) levels(pbc$stage) <- list("1/2"=c(1,2),"3"=3,"4"=4) pbc$logbili <- log(pbc$bili) pbc$logprotime <- log(pbc$protime) pbc$stage3 <- 1*(pbc$stage=="3") pbc$stage4 <- 1*(pbc$stage=="4") pbc$protimegrp <- cut(pbc$protime,c(-Inf,10,11,Inf),labels=c("<=10","10-11",">11")) pbc$protimegrp1 <- pbc$protimegrp=="10-11" pbc$protimegrp2 <- pbc$protimegrp==">11" form1=Surv(time,status==1)~sex+age+logbili+protimegrp1+protimegrp2+stage3+stage4 F1 <- survival::survreg(form1,data=pbc) form2=Surv(time,status==2)~sex+age+logbili+protimegrp1+protimegrp2+stage3+stage4 F2 <- survival::survreg(form1,data=pbc) sF2 <- survreg(Surv(time,status==2)~sex+age+logbili+protimegrp1+protimegrp2+stage3+stage4, data=d) G <- survreg(Surv(time,status==0)~sex+age+logbili+protimegrp1+protimegrp2+stage3+stage4, data=pbc) sG <- survreg(Surv(time,status==0)~sex+age+logbili+protimegrp1+protimegrp2+stage3+stage4, data=d) # compare fits in real and simulated pbc data cbind(coef(F1),coef(sF1)) cbind(coef(F2),coef(sF2)) cbind(coef(G),coef(sG)) cbind(coef(glm(protimegrp1~age+sex+logbili,data=pbc,family="binomial")), coef(glm(protimegrp1~age+sex+logbili,data=d,family="binomial"))) cbind(coef(lm(logbili~age+sex,data=pbc)),coef(lm(logbili~age+sex,data=d)))
library(survival) library(lava) # simulate data alike pbc data set.seed(98) d=simPBC(847) d$protimegrp1 <- d$protimegrp=="10-11" d$protimegrp2 <- d$protimegrp==">11" d$sex <- factor(d$sex,levels=0:1,labels=c("m","f")) sF1 <- survreg(Surv(time,status==1)~sex+age+logbili+protimegrp1+protimegrp2+stage3+stage4, data=d) coxF1 <- coxph(Surv(time,status==1)~sex+age+logbili+protimegrp1+protimegrp2+stage3+stage4, data=d) # load real pbc data data(pbc,package="survival") pbc <- na.omit(pbc[,c("time","status","age","sex","stage","bili","protime","trt")]) pbc$stage <- factor(pbc$stage) levels(pbc$stage) <- list("1/2"=c(1,2),"3"=3,"4"=4) pbc$logbili <- log(pbc$bili) pbc$logprotime <- log(pbc$protime) pbc$stage3 <- 1*(pbc$stage=="3") pbc$stage4 <- 1*(pbc$stage=="4") pbc$protimegrp <- cut(pbc$protime,c(-Inf,10,11,Inf),labels=c("<=10","10-11",">11")) pbc$protimegrp1 <- pbc$protimegrp=="10-11" pbc$protimegrp2 <- pbc$protimegrp==">11" form1=Surv(time,status==1)~sex+age+logbili+protimegrp1+protimegrp2+stage3+stage4 F1 <- survival::survreg(form1,data=pbc) form2=Surv(time,status==2)~sex+age+logbili+protimegrp1+protimegrp2+stage3+stage4 F2 <- survival::survreg(form1,data=pbc) sF2 <- survreg(Surv(time,status==2)~sex+age+logbili+protimegrp1+protimegrp2+stage3+stage4, data=d) G <- survreg(Surv(time,status==0)~sex+age+logbili+protimegrp1+protimegrp2+stage3+stage4, data=pbc) sG <- survreg(Surv(time,status==0)~sex+age+logbili+protimegrp1+protimegrp2+stage3+stage4, data=d) # compare fits in real and simulated pbc data cbind(coef(F1),coef(sF1)) cbind(coef(F2),coef(sF2)) cbind(coef(G),coef(sG)) cbind(coef(glm(protimegrp1~age+sex+logbili,data=pbc,family="binomial")), coef(glm(protimegrp1~age+sex+logbili,data=d,family="binomial"))) cbind(coef(lm(logbili~age+sex,data=pbc)),coef(lm(logbili~age+sex,data=d)))
Simulating from a synthesized object
simsynth(object, n = 200, drop.latent = FALSE, ...)
simsynth(object, n = 200, drop.latent = FALSE, ...)
object |
generated with |
n |
sample size |
drop.latent |
if |
... |
additional arguments passed on to |
library(survival) m=synthesize(Surv(time,status)~sex+age+bili,data=pbc) simsynth(m,10,drop.latent=TRUE)
library(survival) m=synthesize(Surv(time,status)~sex+age+bili,data=pbc) simsynth(m,10,drop.latent=TRUE)
TODO
SmcFcs(formula, data, m = 5, method, fitter = "glm", fit.formula, ...)
SmcFcs(formula, data, m = 5, method, fitter = "glm", fit.formula, ...)
formula |
TODO |
data |
TODO |
m |
TODO |
method |
TODO |
fitter |
TODO |
fit.formula |
TODO |
... |
TODO # @export |
Reconstruct each of the strata variables from the strata variable stored in the coxph object.
splitStrataVar(object)
splitStrataVar(object)
object |
a coxph object. |
Brice Ozenne [email protected] and Thomas A. Gerds [email protected]
This function is used internally to contruct pseudo values by inverse of the probability of censoring weights.
subjectWeights( formula, data, method = c("cox", "marginal", "km", "nonpar", "forest", "none"), args, lag = 1 )
subjectWeights( formula, data, method = c("cox", "marginal", "km", "nonpar", "forest", "none"), args, lag = 1 )
formula |
A survival formula like, Surv(time,status)~1 or
Hist(time,status)~1 where status=0 means censored. The status
variable is internally reversed for estimation of censoring rather
than survival probabilities. Some of the available models, see
argument |
data |
The data used for fitting the censoring model |
method |
Censoring model used for estimation of the (conditional) censoring distribution. |
args |
Arguments passed to the fitter of the method. |
lag |
If equal to |
Inverse of the probability of censoring weights usually refer to the probabilities of not being censored at certain time points. These probabilities are also the values of the conditional survival function of the censoring time given covariates. The function subjectWeights estimates the conditional survival function of the censoring times and derives the weights.
IMPORTANT: the data set should be ordered, order(time,-status)
in
order to get the weights
in the right order for some choices of
method
.
times |
The times at which weights are estimated |
weights |
Estimated weights at individual time values
|
lag |
The time lag. |
fit |
The fitted censoring model |
method |
The method for modelling the censoring distribution |
call |
The call |
Thomas A. Gerds [email protected]
library(prodlim) library(survival) dat=SimSurv(300) dat <- dat[order(dat$time,-dat$status),] # using the marginal Kaplan-Meier for the censoring times WKM=subjectWeights(Hist(time,status)~X2,data=dat,method="marginal") plot(WKM$fit) WKM$fit WKM$weights # using the Cox model for the censoring times given X2 WCox=subjectWeights(Surv(time,status)~X2,data=dat,method="cox") WCox plot(WCox$weights,WKM$weights) # using the stratified Kaplan-Meier for the censoring times given X2 WKM2 <- subjectWeights(Surv(time,status)~X2,data=dat,method="nonpar") plot(WKM2$fit,add=FALSE)
library(prodlim) library(survival) dat=SimSurv(300) dat <- dat[order(dat$time,-dat$status),] # using the marginal Kaplan-Meier for the censoring times WKM=subjectWeights(Hist(time,status)~X2,data=dat,method="marginal") plot(WKM$fit) WKM$fit WKM$weights # using the Cox model for the censoring times given X2 WCox=subjectWeights(Surv(time,status)~X2,data=dat,method="cox") WCox plot(WCox$weights,WKM$weights) # using the stratified Kaplan-Meier for the censoring times given X2 WKM2 <- subjectWeights(Surv(time,status)~X2,data=dat,method="nonpar") plot(WKM2$fit,add=FALSE)
Extract specific elements from an object.
subsetIndex(object, index, default, ...) ## Default S3 method: subsetIndex(object, index, default, ...) ## S3 method for class 'matrix' subsetIndex(object, index, default, col = TRUE, ...)
subsetIndex(object, index, default, ...) ## Default S3 method: subsetIndex(object, index, default, ...) ## S3 method for class 'matrix' subsetIndex(object, index, default, col = TRUE, ...)
object |
A vector or a matrix. |
index |
index of the elements to be extracted. 0 indicates that the column should be set to the default value. NA indicates that the column should be set to NA. |
default |
the default value. |
... |
Only used by the generic method. |
col |
If object is a matrix, |
M <- matrix(rnorm(50),5,10) subsetIndex(M, index = c(0,0,1), default = 0) subsetIndex(M, index = c(0,2,3,NA), default = 0) subsetIndex(M, index = c(0,NA,2,3,NA), default = 0) C <- 1:10 subsetIndex(C, index = c(0,0,1,5,NA), default = 0)
M <- matrix(rnorm(50),5,10) subsetIndex(M, index = c(0,0,1), default = 0) subsetIndex(M, index = c(0,2,3,NA), default = 0) subsetIndex(M, index = c(0,NA,2,3,NA), default = 0) C <- 1:10 subsetIndex(C, index = c(0,0,1,5,NA), default = 0)
Summary average treatment effects.
## S3 method for class 'ate' summary( object, estimator = object$estimator[1], short = FALSE, type = c("meanRisk", "diffRisk"), se = FALSE, quantile = FALSE, estimate.boot = TRUE, digits = 3, ... )
## S3 method for class 'ate' summary( object, estimator = object$estimator[1], short = FALSE, type = c("meanRisk", "diffRisk"), se = FALSE, quantile = FALSE, estimate.boot = TRUE, digits = 3, ... )
object |
object obtained with function |
estimator |
[character] The type of estimator relative to which the estimates should be displayed. |
short |
[logical] If |
type |
[character vector] what to displayed.
Can be |
se |
[logical] should the standard error of the risks be displayed? |
quantile |
[logical] should the quantile of the confidence bands be displayed? |
estimate.boot |
[logical] should the average estimate on the bootstrap samples be displayed? |
digits |
[integer, >0] Number of digits. |
... |
passed to confint |
to display confidence intervals/bands and p.value,
the confint
method needs to be applied on the object.
as.data.table
to extract the estimates in a data.table
object.
autoplot.ate
for a graphical representation the standardized risks.
confint.ate
to compute p-values and adjusted p-values
or perform statistical inference using a transformation.
confint.ate
to compute (pointwise/simultaneous) confidence intervals and (unadjusted/adjusted) p-values, possibly using a transformation.
Summary of a Fine-Gray regression model
## S3 method for class 'FGR' summary(object, ...)
## S3 method for class 'FGR' summary(object, ...)
object |
Object fitted with function FGR |
... |
passed to cmprsk::summary.crr |
Summary of a risk regression model
## S3 method for class 'riskRegression' summary( object, times, digits = 3, pvalue.digits = 4, eps = 10^-4, verbose = TRUE, ... )
## S3 method for class 'riskRegression' summary( object, times, digits = 3, pvalue.digits = 4, eps = 10^-4, verbose = TRUE, ... )
object |
Object obtained with ARR, LRR or riskRegression |
times |
Time points at which to show time-dependent coefficients |
digits |
Number of digits for all numbers but p-values |
pvalue.digits |
Number of digits for p-values |
eps |
p-values smaller than this number are shown as such |
verbose |
Level of verbosity |
... |
not used |
Summarizing a Score object
## S3 method for class 'Score' summary( object, times, what = c("score", "contrasts"), models, digits = 1, pvalue.digits = 4, ... )
## S3 method for class 'Score' summary( object, times, what = c("score", "contrasts"), models, digits = 1, pvalue.digits = 4, ... )
object |
Object obtained with |
times |
Select time points |
what |
Either |
models |
Select which models to summarize. Need to be a subset of |
digits |
For rounding everything but p-values |
pvalue.digits |
For rounding p-values |
... |
not used |
The AUC and the Brier score are put into tables
List of tables
Thomas A. Gerds <[email protected]>
Score
Formula interface for SuperLearner::SuperLearner
SuperPredictor( formula, data, family = "binomial", SL.library = c("SL.glm", "SL.glm.interaction", "SL.ranger"), ... )
SuperPredictor( formula, data, family = "binomial", SL.library = c("SL.glm", "SL.glm.interaction", "SL.ranger"), ... )
formula |
where the left hand side specifies the outcome and the right hand side the predictors |
data |
data set in which formula can be evaluated |
family |
the outcome family. default is binomial |
SL.library |
the SuperLearner libraries |
... |
passed to SuperLearner::SuperLearner |
Formula interface for SuperLearner::SuperLearner ##' @param formula
## Not run: if(require("SuperLearner",quietly=TRUE)){ library(SuperLearner) library(data.table) set.seed(10) d = sampleData(338, outcome="binary") spfit = SuperPredictor(Y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10,data=d) predictRisk(spfit) x <- Score(list(spfit),data=d,formula=Y~1) } ## End(Not run)
## Not run: if(require("SuperLearner",quietly=TRUE)){ library(SuperLearner) library(data.table) set.seed(10) d = sampleData(338, outcome="binary") spfit = SuperPredictor(Y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10,data=d) predictRisk(spfit) x <- Score(list(spfit),data=d,formula=Y~1) } ## End(Not run)
Extract the time and event variable from a Cox model
SurvResponseVar(formula)
SurvResponseVar(formula)
formula |
a formula |
Brice Ozenne [email protected]
## Not run: SurvResponseVar(Surv(time,event)~X1+X2) SurvResponseVar(Hist(time,event==0)~X1+X2) SurvResponseVar(Surv(start,time, status,type="counting") ~ X3+X5) SurvResponseVar(Surv(start,event=status, time2=time,type="counting") ~ X3+X5) SurvResponseVar(survival::Surv(start,event=status, time2=time,type="counting") ~ X3+X5) SurvResponseVar(status ~ X3+X5) SurvResponseVar(I(status == 1) ~ X3+X5) SurvResponseVar(list(Hist(time, event) ~ X1+X6,Hist(time, event) ~ X6)) ## End(Not run)
## Not run: SurvResponseVar(Surv(time,event)~X1+X2) SurvResponseVar(Hist(time,event==0)~X1+X2) SurvResponseVar(Surv(start,time, status,type="counting") ~ X3+X5) SurvResponseVar(Surv(start,event=status, time2=time,type="counting") ~ X3+X5) SurvResponseVar(survival::Surv(start,event=status, time2=time,type="counting") ~ X3+X5) SurvResponseVar(status ~ X3+X5) SurvResponseVar(I(status == 1) ~ X3+X5) SurvResponseVar(list(Hist(time, event) ~ X1+X6,Hist(time, event) ~ X6)) ## End(Not run)
Fit parametric regression models to the outcome distribution and optionally
also parametric regression models for the joint distribution of the predictors
structural equation models.
Then the function sim.synth
can be called on the resulting object to
to simulate from the parametric model based on the machinery of the lava
package
synthesize(object, data, ...) ## S3 method for class 'formula' synthesize( object, data, recursive = FALSE, max.levels = 10, verbose = FALSE, ... ) ## S3 method for class 'lvm' synthesize( object, data, max.levels = 10, logtrans = NULL, verbose = FALSE, fix.names = FALSE, ... )
synthesize(object, data, ...) ## S3 method for class 'formula' synthesize( object, data, recursive = FALSE, max.levels = 10, verbose = FALSE, ... ) ## S3 method for class 'lvm' synthesize( object, data, max.levels = 10, logtrans = NULL, verbose = FALSE, fix.names = FALSE, ... )
object |
Specification of the synthesizing model structures. Either a |
data |
Data to be synthesized. |
... |
Not used yet. |
recursive |
Let covariates recursively depend on each other. |
max.levels |
Integer used to guess which variables are categorical. When set to |
verbose |
Logical. If |
logtrans |
Vector of covariate names that should be log-transformed. This is primarily for internal use. |
fix.names |
Fix possible problematic covariate names. |
Synthesizes survival data (also works for linear models and generalized linear models).
The idea is to be able to simulate new data sets that mimic the original data.
See the vignette vignette("synthesize",package = "riskRegression")
for more details.
The simulation engine is: lava.
lava object
Thomas A. Gerds <[email protected]>
lvm
# pbc data library(survival) library(lava) data(pbc) pbc <- na.omit(pbc[,c("time","status","sex","age","bili")]) pbc$logbili <- log(pbc$bili) v_synt <- synthesize(object=Surv(time,status)~sex+age+logbili,data=pbc) d <- simsynth(v_synt,1000) fit_sim <- coxph(Surv(time,status==1)~age+sex+logbili,data=d) fit_real <- coxph(Surv(time,status==1)~age+sex+logbili,data=pbc) # compare estimated log-hazard ratios between simulated and real data cbind(coef(fit_sim),coef(fit_real)) u <- lvm() distribution(u,~sex) <- binomial.lvm() distribution(u,~age) <- normal.lvm() distribution(u,~trt) <- binomial.lvm() distribution(u,~logbili) <- normal.lvm() u <-eventTime(u,time~min(time.cens=0,time.transplant=1,time.death=2), "status") lava::regression(u,logbili~age+sex) <- 1 lava::regression(u,time.transplant~sex+age+logbili) <- 1 lava::regression(u,time.death~sex+age+logbili) <- 1 lava::regression(u,time.cens~1) <- 1 transform(u,logbili~bili) <- function(x){log(x)} u_synt <- synthesize(object=u, data=na.omit(pbc)) set.seed(8) d <- simsynth(u_synt,n=1000) # note: synthesize may relabel status variable fit_sim <- coxph(Surv(time,status==1)~age+sex+logbili,data=d) fit_real <- coxph(Surv(time,status==1)~age+sex+log(bili),data=pbc) # compare estimated log-hazard ratios between simulated and real data cbind(coef(fit_sim),coef(fit_real)) # # Cancer data # data(cancer) b <- lvm() distribution(b,~rx) <- binomial.lvm() distribution(b,~age) <- normal.lvm() distribution(b,~resid.ds) <- binomial.lvm() distribution(b,~ecog.ps) <- binomial.lvm() lava::regression(b,time.death~age+rx+resid.ds) <- 1 b<-eventTime(b,futime~min(time.cens=0,time.death=1), "fustat") b_synt <- synthesize(object = b, data = ovarian) D <- simsynth(b_synt,1000) fit_real <- coxph(Surv(futime,fustat)~age+rx+resid.ds, data=ovarian) fit_sim <- coxph(Surv(futime,fustat)~age+rx+resid.ds, data=D) cbind(coef(fit_sim),coef(fit_real)) w_synt <- synthesize(object=Surv(futime,fustat)~age+rx+resid.ds, data=ovarian) D <- simsynth(w_synt,1000) fit_sim <- coxph(Surv(futime,fustat==1)~age+rx+resid.ds,data=D) fit_real <- coxph(Surv(futime,fustat==1)~age+rx+resid.ds,data=ovarian) # compare estimated log-hazard ratios between simulated and real data cbind(coef(fit_sim),coef(fit_real))
# pbc data library(survival) library(lava) data(pbc) pbc <- na.omit(pbc[,c("time","status","sex","age","bili")]) pbc$logbili <- log(pbc$bili) v_synt <- synthesize(object=Surv(time,status)~sex+age+logbili,data=pbc) d <- simsynth(v_synt,1000) fit_sim <- coxph(Surv(time,status==1)~age+sex+logbili,data=d) fit_real <- coxph(Surv(time,status==1)~age+sex+logbili,data=pbc) # compare estimated log-hazard ratios between simulated and real data cbind(coef(fit_sim),coef(fit_real)) u <- lvm() distribution(u,~sex) <- binomial.lvm() distribution(u,~age) <- normal.lvm() distribution(u,~trt) <- binomial.lvm() distribution(u,~logbili) <- normal.lvm() u <-eventTime(u,time~min(time.cens=0,time.transplant=1,time.death=2), "status") lava::regression(u,logbili~age+sex) <- 1 lava::regression(u,time.transplant~sex+age+logbili) <- 1 lava::regression(u,time.death~sex+age+logbili) <- 1 lava::regression(u,time.cens~1) <- 1 transform(u,logbili~bili) <- function(x){log(x)} u_synt <- synthesize(object=u, data=na.omit(pbc)) set.seed(8) d <- simsynth(u_synt,n=1000) # note: synthesize may relabel status variable fit_sim <- coxph(Surv(time,status==1)~age+sex+logbili,data=d) fit_real <- coxph(Surv(time,status==1)~age+sex+log(bili),data=pbc) # compare estimated log-hazard ratios between simulated and real data cbind(coef(fit_sim),coef(fit_real)) # # Cancer data # data(cancer) b <- lvm() distribution(b,~rx) <- binomial.lvm() distribution(b,~age) <- normal.lvm() distribution(b,~resid.ds) <- binomial.lvm() distribution(b,~ecog.ps) <- binomial.lvm() lava::regression(b,time.death~age+rx+resid.ds) <- 1 b<-eventTime(b,futime~min(time.cens=0,time.death=1), "fustat") b_synt <- synthesize(object = b, data = ovarian) D <- simsynth(b_synt,1000) fit_real <- coxph(Surv(futime,fustat)~age+rx+resid.ds, data=ovarian) fit_sim <- coxph(Surv(futime,fustat)~age+rx+resid.ds, data=D) cbind(coef(fit_sim),coef(fit_real)) w_synt <- synthesize(object=Surv(futime,fustat)~age+rx+resid.ds, data=ovarian) D <- simsynth(w_synt,1000) fit_sim <- coxph(Surv(futime,fustat==1)~age+rx+resid.ds,data=D) fit_real <- coxph(Surv(futime,fustat==1)~age+rx+resid.ds,data=ovarian) # compare estimated log-hazard ratios between simulated and real data cbind(coef(fit_sim),coef(fit_real))
Extract terms for phreg objects
## S3 method for class 'phreg' terms(x, ...)
## S3 method for class 'phreg' terms(x, ...)
x |
a phreg object. |
... |
not used. |
Compute confidence intervals/bands and p-values after a transformation
transformCIBP( estimate, se, iid, null, conf.level, alternative, ci, type, min.value, max.value, band, method.band, n.sim, seed, p.value, df = NULL )
transformCIBP( estimate, se, iid, null, conf.level, alternative, ci, type, min.value, max.value, band, method.band, n.sim, seed, p.value, df = NULL )
estimate |
[numeric matrix] the estimate value before transformation. |
se |
[numeric matrix] the standard error before transformation. |
iid |
[numeric array] the iid decomposition before transformation. |
null |
[numeric] the value of the estimate (before transformation) under the null hypothesis. |
conf.level |
[numeric, 0-1] Level of confidence. |
alternative |
[character] a character string specifying the alternative hypothesis,
must be one of |
ci |
[logical] should confidence intervals be computed. |
type |
[character] the transforamtion.
Can be |
min.value |
[numeric] if not |
max.value |
[numeric] if not |
band |
[integer 0,1,2] When non-0, the confidence bands are computed for each contrasts ( |
method.band |
[character] method used to adjust for multiple comparisons.
Can be any element of |
n.sim |
[integer, >0] the number of simulations used to compute the quantiles for the confidence bands. |
seed |
[integer, >0] seed number set before performing simulations for the confidence bands. |
p.value |
[logical] should p-values and adjusted p-values be computed. Only active if |
df |
[integer, >0] optional. Degrees of freedom used for the student distribution of the test statistic. If not specified, use a normal distribution instead. |
The iid decomposition must have dimensions [n.obs,time,n.prediction] while estimate and se must have dimensions [n.prediction,time].
Single step max adjustment for multiple comparisons, i.e. accounting for the correlation between the test statistics but not for the ordering of the tests, can be performed setting the arguemnt method.band
to "maxT-integration"
or "maxT-simulation"
. The former uses numerical integration (pmvnorm
and qmvnorm
to perform the adjustment while the latter using simulation. Both assume that the test statistics are jointly normally distributed.
set.seed(10) n <- 100 X <- rnorm(n) res2sided <- transformCIBP(estimate = mean(X), se = cbind(sd(X)/sqrt(n)), null = 0, type = "none", ci = TRUE, conf.level = 0.95, alternative = "two.sided", min.value = NULL, max.value = NULL, band = FALSE, p.value = TRUE, seed = 10, df = n-1) resLess <- transformCIBP(estimate = mean(X), se = cbind(sd(X)/sqrt(n)), null = 0, type = "none", ci = TRUE, conf.level = 0.95, alternative = "less", min.value = NULL, max.value = NULL, band = FALSE, p.value = TRUE, seed = 10, df = n-1) resGreater <- transformCIBP(estimate = mean(X), se = cbind(sd(X)/sqrt(n)), null = 0, type = "none", ci = TRUE, conf.level = 0.95, alternative = "greater", min.value = NULL, max.value = NULL, band = FALSE, p.value = TRUE, seed = 10, df = n-1) ## comparison with t-test GS <- t.test(X, alternative = "two.sided") res2sided$p.value - GS$p.value unlist(res2sided[c("lower","upper")]) - GS$conf.int GS <- t.test(X, alternative = "less") resLess$p.value - GS$p.value unlist(resLess[c("lower","upper")]) - GS$conf.int GS <- t.test(X, alternative = "greater") resGreater$p.value - GS$p.value unlist(resGreater[c("lower","upper")]) - GS$conf.int
set.seed(10) n <- 100 X <- rnorm(n) res2sided <- transformCIBP(estimate = mean(X), se = cbind(sd(X)/sqrt(n)), null = 0, type = "none", ci = TRUE, conf.level = 0.95, alternative = "two.sided", min.value = NULL, max.value = NULL, band = FALSE, p.value = TRUE, seed = 10, df = n-1) resLess <- transformCIBP(estimate = mean(X), se = cbind(sd(X)/sqrt(n)), null = 0, type = "none", ci = TRUE, conf.level = 0.95, alternative = "less", min.value = NULL, max.value = NULL, band = FALSE, p.value = TRUE, seed = 10, df = n-1) resGreater <- transformCIBP(estimate = mean(X), se = cbind(sd(X)/sqrt(n)), null = 0, type = "none", ci = TRUE, conf.level = 0.95, alternative = "greater", min.value = NULL, max.value = NULL, band = FALSE, p.value = TRUE, seed = 10, df = n-1) ## comparison with t-test GS <- t.test(X, alternative = "two.sided") res2sided$p.value - GS$p.value unlist(res2sided[c("lower","upper")]) - GS$conf.int GS <- t.test(X, alternative = "less") resLess$p.value - GS$p.value unlist(resLess[c("lower","upper")]) - GS$conf.int GS <- t.test(X, alternative = "greater") resGreater$p.value - GS$p.value unlist(resGreater[c("lower","upper")]) - GS$conf.int
Logistic regression over multiple timepoints where right-censoring is handled using inverse probability of censoring weighting (IPCW).
wglm( regressor.event, formula.censor, times, data, cause = NA, fitter = "coxph", product.limit = FALSE )
wglm( regressor.event, formula.censor, times, data, cause = NA, fitter = "coxph", product.limit = FALSE )
regressor.event |
[formula] a formula with empty left hand side and the covariates for the logistic regression on the right hand side. |
formula.censor |
[formula] a formula used to fit the censoring model. |
times |
[numeric vector] time points at which to model the probability of experiencing an event. |
data |
[data.frame] dataset containing the time at which the event occured, the type of event, and regressors used to fit the censoring and logistic models. |
cause |
[character or numeric] the cause of interest. Defaults to the first cause. |
fitter |
[character] routine to fit the Cox regression models. |
product.limit |
[logical] if |
First, a Cox model is fitted (argument formula.censor) and the censoring probabilities are computed relative to each timepoint (argument times) to obtain the censoring weights. Then, for each timepoint, a logistic regression is fitted with the appropriate censoring weights and where the outcome is the indicator of having experience the event of interest (argument cause) at or before the timepoint.
an object of class "wglm"
.
library(survival) set.seed(10) n <- 250 tau <- 1:5 d <- sampleData(n, outcome = "competing.risks") dFull <- d[event!=0] ## remove censoring dSurv <- d[event!=2] ## remove competing risk #### no censoring #### e.wglm <- wglm(regressor.event = ~ X1, formula.censor = Surv(time,event==0) ~ 1, times = tau, data = dFull, product.limit = TRUE) e.wglm ## same as a logistic regression summary(ate(e.wglm, data = dFull, times = tau, treatment = "X1", verbose = FALSE)) #### right-censoring #### ## no covariante in the censoring model (independent censoring) eC.wglm <- wglm(regressor.event = ~ X1, formula.censor = Surv(time,event==0) ~ 1, times = tau, data = dSurv, product.limit = TRUE) eC.wglm ## with covariates in the censoring model eC2.wglm <- wglm(regressor.event = ~ X1 + X8, formula.censor = Surv(time,event==0) ~ X1*X8, times = tau, data = dSurv) eC2.wglm #### Competing risks #### ## here Kaplan-Meier as censoring model eCR.wglm <- wglm(regressor.event = ~ X1, formula.censor = Surv(time,event==0) ~ strata(X1), times = tau, data = d, cause = 1, product.limit = TRUE) eCR.wglm
library(survival) set.seed(10) n <- 250 tau <- 1:5 d <- sampleData(n, outcome = "competing.risks") dFull <- d[event!=0] ## remove censoring dSurv <- d[event!=2] ## remove competing risk #### no censoring #### e.wglm <- wglm(regressor.event = ~ X1, formula.censor = Surv(time,event==0) ~ 1, times = tau, data = dFull, product.limit = TRUE) e.wglm ## same as a logistic regression summary(ate(e.wglm, data = dFull, times = tau, treatment = "X1", verbose = FALSE)) #### right-censoring #### ## no covariante in the censoring model (independent censoring) eC.wglm <- wglm(regressor.event = ~ X1, formula.censor = Surv(time,event==0) ~ 1, times = tau, data = dSurv, product.limit = TRUE) eC.wglm ## with covariates in the censoring model eC2.wglm <- wglm(regressor.event = ~ X1 + X8, formula.censor = Surv(time,event==0) ~ X1*X8, times = tau, data = dSurv) eC2.wglm #### Competing risks #### ## here Kaplan-Meier as censoring model eCR.wglm <- wglm(regressor.event = ~ X1, formula.censor = Surv(time,event==0) ~ strata(X1), times = tau, data = d, cause = 1, product.limit = TRUE) eCR.wglm