--- title: "Conditional Nelson--Aalen and Aalen--Johansen Estimation" author: "Martin Bladt & Christian Furrer" date: "28th of February, 2023" package: "AalenJohansen" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{my-vignette} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` This vignette illustrates, through four examples, the potential uses of the R-package $\texttt{AalenJohansen}$, which is an implementation of the conditional Nelson--Aalen and Aalen--Johansen estimators introduced in Bladt \& Furrer (2023). ## 1. Markov model with independent censoring We start out with a simple time-inhomogeneous Markov model: \begin{align*} \frac{\mathrm{d}\Lambda(t)}{\mathrm{d}t}=\lambda(t)=\frac{1}{1+\frac{1}{2}t} \begin{pmatrix} -2 & 1& 1 \\ 2 & -3 & 1 \\ 0 & 0 & 0 \end{pmatrix}\!. \end{align*} ```{r} library(AalenJohansen) set.seed(2) jump_rate <- function(i, t, u){ if(i == 1){ 2 / (1+1/2*t) } else if(i == 2){ 3 / (1+1/2*t) } else{ 0 } } mark_dist <- function(i, s, v){ if(i == 1){ c(0, 1/2, 1/2) } else if(i == 2){ c(2/3, 0, 1/3) } else{ 0 } } lambda <- function(t){ A <- matrix(c(2/(1+1/2*t)*mark_dist(1, t, 0), 3/(1+1/2*t)*mark_dist(2, t, 0), rep(0, 3)), nrow = 3, ncol = 3, byrow = TRUE) diag(A) <- -rowSums(A) A } ``` We simulate $1,000$ independent and identically distributed realizations subject to independent right-censoring. Right-censoring follows the distribution $\text{Unif}(0,5)$. ```{r} n <- 1000 c <- runif(n, 0, 5) sim <- list() for(i in 1:n){ sim[[i]] <- sim_path(sample(1:2, 1), rates = jump_rate, dists = mark_dist, tn = c[i], bs = c(2*c[i], 3*c[i], 0)) } ``` The degree of censoring is ```{r} sum(c == unlist(lapply(sim, FUN = function(z){tail(z$times, 1)}))) / n ``` We fit the classic Nelson--Aalen and Aalen-Johansen estimators. ```{r} fit <- aalen_johansen(sim) ``` For illustrative purposes, we plot $\Lambda_{21}$ and the state occupation probability $p_2$ for both the true model (full) and using the classic estimators (dashed). ```{r, fig.align = 'center', fig.height = 3, fig.width = 6} v1 <- unlist(lapply(fit$Lambda, FUN = function(L) L[2,1])) v0 <- fit$t p <- unlist(lapply(fit$p, FUN = function(L) L[2])) P <- unlist(lapply(prodint(0, 5, 0.01, lambda), FUN = function(L) (c(1/2, 1/2, 0) %*% L)[2])) par(mfrow = c(1, 2)) par(mar = c(2.5, 2.5, 1.5, 1.5)) plot(v0, v1, type = "l", lty = 2, xlab = "", ylab = "", main = "Hazard") lines(v0, 4*log(1+1/2*v0)) plot(v0, p, type = "l", lty = 2, xlab = "", ylab = "", main = "Probability") lines(seq(0, 5, 0.01), P) ``` ## 2. Markov model with independent censoring and covariates We now consider a simple extension with covariates: \begin{align*} \frac{\mathrm{d}\Lambda(t|x)}{\mathrm{d}t}=\lambda(t|x)=\frac{1}{1+x\cdot t} \begin{pmatrix} -2 & 1& 1 \\ 2 & -3 & 1 \\ 0 & 0 & 0 \end{pmatrix}\!. \end{align*} We simulate $10,000$ independent realizations subject to independent right-censoring. Right-censoring follows the distribution $\text{Unif}(0,5)$, while $X\sim\text{Unif}(0,1)$. ```{r} jump_rate <- function(i, t, u){ if(i == 1){ 2 } else if(i == 2){ 3 } else{ 0 } } mark_dist <- function(i, s, v){ if(i == 1){ c(0, 1/2, 1/2) } else if(i == 2){ c(2/3, 0, 1/3) } else{ 0 } } lambda <- function(t, x){ A <- matrix(c(2/(1+x*t)*mark_dist(1, t, 0), 3/(1+x*t)*mark_dist(2, t, 0), rep(0, 3)), nrow = 3, ncol = 3, byrow = TRUE) diag(A) <- -rowSums(A) A } n <- 10000 X <- runif(n) c <- runif(n, 0, 5) sim <- list() for(i in 1:n){ rates <- function(j, y, z){jump_rate(j, y, z)/(1+X[i]*y)} sim[[i]] <- sim_path(sample(1:2, 1), rates = rates, dists = mark_dist, tn = c[i], bs = c(2*c[i], 3*c[i], 0)) sim[[i]]$X <- X[i] } ``` The degree of censoring is ```{r} sum(c == unlist(lapply(sim, FUN = function(z){tail(z$times, 1)}))) / n ``` We fit the conditional Nelson--Aalen and Aalen--Johansen estimators for $x=0.2, 0.8$. ```{r} x1 <- 0.2 x2 <- 0.8 fit1 <- aalen_johansen(sim, x = x1) fit2 <- aalen_johansen(sim, x = x2) ``` For illustrative purposes, we plot $\Lambda_{21}$ and the conditional state occupation probability $p_2$ for both the true model (full) and using the conditional estimators (dashed). This is done for both $x=0.2$ (in red) and $x=0.8$ (in blue). ```{r, fig.align = 'center', fig.height = 3, fig.width = 6} v11 <- unlist(lapply(fit1$Lambda, FUN = function(L) L[2,1])) v10 <- fit1$t v21 <- unlist(lapply(fit2$Lambda, FUN = function(L) L[2,1])) v20 <- fit2$t p1 <- unlist(lapply(fit1$p, FUN = function(L) L[2])) P1 <- unlist(lapply(prodint(0, 5, 0.01, function(t){lambda(t, x = x1)}), FUN = function(L) (c(1/2, 1/2, 0) %*% L)[2])) p2 <- unlist(lapply(fit2$p, FUN = function(L) L[2])) P2 <- unlist(lapply(prodint(0, 5, 0.01, function(t){lambda(t, x = x2)}), FUN = function(L) (c(1/2, 1/2, 0) %*% L)[2])) par(mfrow = c(1, 2)) par(mar = c(2.5, 2.5, 1.5, 1.5)) plot(v10, v11, type = "l", lty = 2, xlab = "", ylab = "", main = "Hazard", col = "red") lines(v10, 2/x1*log(1+x1*v10), col = "red") lines(v20, v21, lty = 2, col = "blue") lines(v20, 2/x2*log(1+x2*v20), col = "blue") plot(v10, p1, type = "l", lty = 2, xlab = "", ylab = "", main = "Probability", col = "red") lines(seq(0, 5, 0.01), P1, col = "red") lines(v20, p2, lty = 2, col = "blue") lines(seq(0, 5, 0.01), P2, col = "blue") ``` ## 3. Markov model with dependent censoring and covariates We consider the same model as before, but now introduce dependent right-censoring. To be precise, we assume that right-censoring occurs at rate $\frac{1}{2}\frac{1}{1+\frac{1}{2}t}$ in the first state, while it occurs at twice this rate in the second state. Finally, all remaining individuals are right-censored at time $t$. We again simulate $10,000$ independent realizations. ```{r} jump_rate_enlarged <- function(i, t, u){ if(i == 1){ 2.5 } else if(i == 2){ 4 } else{ 0 } } mark_dist_enlarged <- function(i, s, v){ if(i == 1){ c(0, 2/5, 2/5, 1/5) } else if(i == 2){ c(2/4, 0, 1/4, 1/4) } else{ 0 } } n <- 10000 X <- runif(n) tn <- 5 sim <- list() for(i in 1:n){ rates <- function(j, y, z){jump_rate_enlarged(j, y, z)/(1+X[i]*y)} sim[[i]] <- sim_path(sample(1:2, 1), rates = rates, dists = mark_dist_enlarged, tn = tn, abs = c(FALSE, FALSE, TRUE, TRUE), bs = c(2.5*tn, 4*tn, 0, 0)) sim[[i]]$X <- X[i] } ``` The degree of censoring is ```{r} sum(tn == unlist(lapply(sim, FUN = function(z){tail(z$times, 1)})) | 4 == unlist(lapply(sim, FUN = function(z){tail(z$states, 1)}))) / n ``` We fit the conditional Nelson--Aalen and Aalen--Johansen estimators for $x=0.2, 0.8$. ```{r} fit1 <- aalen_johansen(sim, x = x1, collapse = TRUE) fit2 <- aalen_johansen(sim, x = x2, collapse = TRUE) ``` For illustrative purposes, we plot $\Lambda_{21}$ and the conditional state occupation probability $p_2$ for both the true model (full) and using the conditional estimators (dashed). This is done for both $x=0.2$ (in red) and $x=0.8$ (in blue). ```{r, fig.align = 'center', fig.height = 3, fig.width = 6} v11 <- unlist(lapply(fit1$Lambda, FUN = function(L) L[2,1])) v10 <- fit1$t v21 <- unlist(lapply(fit2$Lambda, FUN = function(L) L[2,1])) v20 <- fit2$t p1 <- unlist(lapply(fit1$p, FUN = function(L) L[2])) p2 <- unlist(lapply(fit2$p, FUN = function(L) L[2])) par(mfrow = c(1, 2)) par(mar = c(2.5, 2.5, 1.5, 1.5)) plot(v10, v11, type = "l", lty = 2, xlab = "", ylab = "", main = "Hazard", col = "red") lines(v10, 2/x1*log(1+x1*v10), col = "red") lines(v20, v21, lty = 2, col = "blue") lines(v20, 2/x2*log(1+x2*v20), col = "blue") plot(v10, p1, type = "l", lty = 2, xlab = "", ylab = "", main = "Probability", col = "red") lines(seq(0, 5, 0.01), P1, col = "red") lines(v20, p2, lty = 2, col = "blue") lines(seq(0, 5, 0.01), P2, col = "blue") ``` ## 4. Semi-Markov model with independent censoring Last but not least, we consider a time-inhomogeneous semi-Markov model with non-zero transition rates given by \begin{align*} \lambda_{12}(t, u) &= 0.09 + 0.0018t, \\ \lambda_{13}(t, u) &= 0.01 + 0.0002t, \\ \lambda_{23}(t, u) &= 0.09 + 1(u < 4)0.20 + 0.001t. \end{align*} ```{r} jump_rate <- function(i, t, u){ if(i == 1){ 0.1 + 0.002*t } else if(i == 2){ ifelse(u < 4, 0.29, 0.09) + 0.001*t } else{ 0 } } mark_dist <- function(i, s, v){ if(i == 1){ c(0, 0.9, 0.1) } else if(i == 2){ c(0, 0, 1) } else{ 0 } } ``` We simulate $5,000$ independent and identically distributed realizations subject to independent right-censoring. Right-censoring follows the distribution $\text{Unif}(10,40)$. ```{r} n <- 5000 c <- runif(n, 10, 40) sim <- list() for(i in 1:n){ sim[[i]] <- sim_path(1, rates = jump_rate, dists = mark_dist, tn = c[i], bs = c(0.1+0.002*c[i], 0.29+0.001*c[i], 0)) } ``` The degree of censoring is ```{r} sum(c == unlist(lapply(sim, FUN = function(z){tail(z$times, 1)}))) / n ``` We fit the Aalen--Johansen estimator. ```{r} fit <- aalen_johansen(sim) ``` For illustrative purposes, we plot the estimate of the state occupation probability $p_2$ (dashed). The true values (full) are obtained via numerical integration, utilizing that this specific model has a hierarchical structure. ```{r, fig.align = 'center', fig.height = 4.5, fig.width = 4.5} v0 <- fit$t p <- unlist(lapply(fit$p, FUN = function(L) L[2])) integrand <- function(t, s){ exp(-0.1*s-0.001*s^2)*(0.09 + 0.0018*s)*exp(-0.20*pmin(t-s, 4)-0.09*(t-s)-0.0005*(t^2-s^2)) } P <- Vectorize(function(t){ integrate(f = integrand, lower = 0, upper = t, t = t)$value }, vectorize.args = "t") plot(v0, p, type = "l", lty = 2, xlab = "", ylab = "", main = "Probability") lines(seq(0, 40, 0.1), P(seq(0, 40, 0.1))) ``` We now want to estimate the conditional state occupation probability $p_2$, given sojourn in the second state at time $10$ with duration $u=1,5$. For this, we first need to sub-sample the data (landmarking). ```{r} landmark <- sim[unlist(lapply(sim, FUN = function(z){any(z$times <= 10 & c(z$times[-1], Inf) > 10 & z$states == 2)}))] landmark <- lapply(landmark, FUN = function(z){list(times = z$times, states = z$states, X = 10 - z$times[z$times <= 10 & c(z$times[-1], Inf) > 10 & z$states == 2])}) ``` The degree of sub-sampling is ```{r} length(landmark) / n ``` Next, we fit the conditional Aalen--Johansen estimator for $u=1,5$. We also fit the usual landmark Aalen--Johansen estimator. ```{r} u1 <- 1 u2 <- 5 fit1 <- aalen_johansen(landmark, x = u1) fit2 <- aalen_johansen(landmark, x = u2) fit3 <- aalen_johansen(landmark) ``` For illustrative purposes, we plot the conditional state occupation probability $p_2$ using the conditional estimator (dashed), the usual landmark estimator (dotted), and the true model (full). This is done for both $u=1$ (in red) and $u=5$ (in blue). ```{r, fig.align = 'center', fig.height = 4.5, fig.width = 4.5} v10 <- fit1$t v20 <- fit2$t v30 <- fit3$t p1 <- unlist(lapply(fit1$p, FUN = function(L) L[2])) p2 <- unlist(lapply(fit2$p, FUN = function(L) L[2])) p3 <- unlist(lapply(fit3$p, FUN = function(L) L[2])) P <- function(t, u){ exp(-(t-10)*0.09-(t^2-100)*0.0005-pmax(0, pmin(t, 4-(u-10))-10)*0.20) } plot(v10, p1, type = "l", lty = 2, xlab = "", ylab = "", main = "Probability", col = "red", xlim = c(10, 40)) lines(seq(10, 40, 0.1), P(seq(10, 40, 0.1), u1), col = "red") lines(v20, p2, lty = 2, col = "blue") lines(seq(10, 40, 0.1), P(seq(10, 40, 0.1), u2), col = "blue") lines(v30, p3, lty = 3) ```