Conditional Nelson–Aalen and Aalen–Johansen Estimation

This vignette illustrates, through four examples, the potential uses of the R-package AalenJohansen, which is an implementation of the conditional Nelson–Aalen and Aalen–Johansen estimators introduced in Bladt & Furrer (2023).

1. Markov model with independent censoring

We start out with a simple time-inhomogeneous Markov model:

library(AalenJohansen)

set.seed(2)

jump_rate <- function(i, t, u){
  if(i == 1){
    2 / (1+1/2*t)
  } else if(i == 2){
    3 / (1+1/2*t)
  } else{
    0
  }
}

mark_dist <- function(i, s, v){
  if(i == 1){
    c(0, 1/2, 1/2)
  } else if(i == 2){
    c(2/3, 0, 1/3)
  } else{
    0
  } 
}

lambda <- function(t){
  A <- matrix(c(2/(1+1/2*t)*mark_dist(1, t, 0), 3/(1+1/2*t)*mark_dist(2, t, 0), rep(0, 3)),
              nrow = 3, ncol = 3, byrow = TRUE)
  diag(A) <- -rowSums(A)
  A
}

We simulate 1, 000 independent and identically distributed realizations subject to independent right-censoring. Right-censoring follows the distribution Unif(0, 5).

n <- 1000

c <- runif(n, 0, 5)

sim <- list()
for(i in 1:n){
  sim[[i]] <- sim_path(sample(1:2, 1), rates = jump_rate, dists = mark_dist,
                       tn = c[i], bs = c(2*c[i], 3*c[i], 0))
}

The degree of censoring is

sum(c == unlist(lapply(sim, FUN = function(z){tail(z$times, 1)}))) / n
#> [1] 0.309

We fit the classic Nelson–Aalen and Aalen-Johansen estimators.

fit <- aalen_johansen(sim)

For illustrative purposes, we plot Λ21 and the state occupation probability p2 for both the true model (full) and using the classic estimators (dashed).

v1 <- unlist(lapply(fit$Lambda, FUN = function(L) L[2,1]))
v0 <- fit$t
p <- unlist(lapply(fit$p, FUN = function(L) L[2]))
P <- unlist(lapply(prodint(0, 5, 0.01, lambda), FUN = function(L) (c(1/2, 1/2, 0) %*% L)[2]))

par(mfrow = c(1, 2))
par(mar = c(2.5, 2.5, 1.5, 1.5))

plot(v0, v1, type = "l", lty = 2, xlab = "", ylab = "", main = "Hazard")
lines(v0, 4*log(1+1/2*v0))
plot(v0, p, type = "l", lty = 2, xlab = "", ylab = "", main = "Probability")
lines(seq(0, 5, 0.01), P)

2. Markov model with independent censoring and covariates

We now consider a simple extension with covariates:

We simulate 10, 000 independent realizations subject to independent right-censoring. Right-censoring follows the distribution Unif(0, 5), while X ∼ Unif(0, 1).

jump_rate <- function(i, t, u){
  if(i == 1){
    2
  } else if(i == 2){
    3
  } else{
    0
  }
}

mark_dist <- function(i, s, v){
  if(i == 1){
    c(0, 1/2, 1/2)
  } else if(i == 2){
    c(2/3, 0, 1/3)
  } else{
    0
  }
}

lambda <- function(t, x){
  A <- matrix(c(2/(1+x*t)*mark_dist(1, t, 0), 3/(1+x*t)*mark_dist(2, t, 0), rep(0, 3)),
              nrow = 3, ncol = 3, byrow = TRUE)
  diag(A) <- -rowSums(A)
  A
}

n <- 10000

X <- runif(n)
c <- runif(n, 0, 5)

sim <- list()
for(i in 1:n){
  rates <- function(j, y, z){jump_rate(j, y, z)/(1+X[i]*y)}
  sim[[i]] <- sim_path(sample(1:2, 1), rates = rates, dists = mark_dist,
                       tn = c[i], bs = c(2*c[i], 3*c[i], 0))
  sim[[i]]$X <- X[i]
}

The degree of censoring is

sum(c == unlist(lapply(sim, FUN = function(z){tail(z$times, 1)}))) / n
#> [1] 0.2954

We fit the conditional Nelson–Aalen and Aalen–Johansen estimators for x = 0.2, 0.8.

x1 <- 0.2
x2 <- 0.8

fit1 <- aalen_johansen(sim, x = x1)
fit2 <- aalen_johansen(sim, x = x2)

For illustrative purposes, we plot Λ21 and the conditional state occupation probability p2 for both the true model (full) and using the conditional estimators (dashed). This is done for both x = 0.2 (in red) and x = 0.8 (in blue).

v11 <- unlist(lapply(fit1$Lambda, FUN = function(L) L[2,1]))
v10 <- fit1$t
v21 <- unlist(lapply(fit2$Lambda, FUN = function(L) L[2,1]))
v20 <- fit2$t
p1 <- unlist(lapply(fit1$p, FUN = function(L) L[2]))
P1 <- unlist(lapply(prodint(0, 5, 0.01, function(t){lambda(t, x = x1)}),
FUN = function(L) (c(1/2, 1/2, 0) %*% L)[2]))
p2 <- unlist(lapply(fit2$p, FUN = function(L) L[2]))
P2 <- unlist(lapply(prodint(0, 5, 0.01, function(t){lambda(t, x = x2)}),
FUN = function(L) (c(1/2, 1/2, 0) %*% L)[2]))

par(mfrow = c(1, 2))
par(mar = c(2.5, 2.5, 1.5, 1.5))

plot(v10, v11, type = "l", lty = 2, xlab = "", ylab = "", main = "Hazard", col = "red")
lines(v10, 2/x1*log(1+x1*v10), col = "red")
lines(v20, v21, lty = 2, col = "blue")
lines(v20, 2/x2*log(1+x2*v20), col = "blue")

plot(v10, p1, type = "l", lty = 2, xlab = "", ylab = "", main = "Probability", col = "red")
lines(seq(0, 5, 0.01), P1, col = "red")
lines(v20, p2, lty = 2, col = "blue")
lines(seq(0, 5, 0.01), P2, col = "blue")

3. Markov model with dependent censoring and covariates

We consider the same model as before, but now introduce dependent right-censoring. To be precise, we assume that right-censoring occurs at rate $\frac{1}{2}\frac{1}{1+\frac{1}{2}t}$ in the first state, while it occurs at twice this rate in the second state. Finally, all remaining individuals are right-censored at time t.

We again simulate 10, 000 independent realizations.

jump_rate_enlarged <- function(i, t, u){
  if(i == 1){
    2.5
  } else if(i == 2){
    4
  } else{
    0
  }
}

mark_dist_enlarged <- function(i, s, v){
  if(i == 1){
    c(0, 2/5, 2/5, 1/5)
  } else if(i == 2){
    c(2/4, 0, 1/4, 1/4)
  } else{
    0
  }
}

n <- 10000

X <- runif(n)

tn <- 5
sim <- list()
for(i in 1:n){
  rates <- function(j, y, z){jump_rate_enlarged(j, y, z)/(1+X[i]*y)}
  sim[[i]] <- sim_path(sample(1:2, 1), rates = rates, dists = mark_dist_enlarged,
                       tn = tn, abs = c(FALSE, FALSE, TRUE, TRUE),
                       bs = c(2.5*tn, 4*tn, 0, 0))
  sim[[i]]$X <- X[i]
}

The degree of censoring is

sum(tn == unlist(lapply(sim, FUN = function(z){tail(z$times, 1)}))
    | 4 == unlist(lapply(sim, FUN = function(z){tail(z$states, 1)}))) / n
#> [1] 0.4373

We fit the conditional Nelson–Aalen and Aalen–Johansen estimators for x = 0.2, 0.8.

fit1 <- aalen_johansen(sim, x = x1, collapse = TRUE)
fit2 <- aalen_johansen(sim, x = x2, collapse = TRUE)

For illustrative purposes, we plot Λ21 and the conditional state occupation probability p2 for both the true model (full) and using the conditional estimators (dashed). This is done for both x = 0.2 (in red) and x = 0.8 (in blue).

v11 <- unlist(lapply(fit1$Lambda, FUN = function(L) L[2,1]))
v10 <- fit1$t
v21 <- unlist(lapply(fit2$Lambda, FUN = function(L) L[2,1]))
v20 <- fit2$t
p1 <- unlist(lapply(fit1$p, FUN = function(L) L[2]))
p2 <- unlist(lapply(fit2$p, FUN = function(L) L[2]))

par(mfrow = c(1, 2))
par(mar = c(2.5, 2.5, 1.5, 1.5))

plot(v10, v11, type = "l", lty = 2, xlab = "", ylab = "", main = "Hazard", col = "red")
lines(v10, 2/x1*log(1+x1*v10), col = "red")
lines(v20, v21, lty = 2, col = "blue")
lines(v20, 2/x2*log(1+x2*v20), col = "blue")

plot(v10, p1, type = "l", lty = 2, xlab = "", ylab = "", main = "Probability", col = "red")
lines(seq(0, 5, 0.01), P1, col = "red")
lines(v20, p2, lty = 2, col = "blue")
lines(seq(0, 5, 0.01), P2, col = "blue")

4. Semi-Markov model with independent censoring

Last but not least, we consider a time-inhomogeneous semi-Markov model with non-zero transition rates given by

jump_rate <- function(i, t, u){
  if(i == 1){
    0.1 + 0.002*t
  } else if(i == 2){
    ifelse(u < 4, 0.29, 0.09) + 0.001*t
  } else{
    0
  }
}

mark_dist <- function(i, s, v){
  if(i == 1){
    c(0, 0.9, 0.1)
  } else if(i == 2){
    c(0, 0, 1)
  } else{
    0
  }
}

We simulate 5, 000 independent and identically distributed realizations subject to independent right-censoring. Right-censoring follows the distribution Unif(10, 40).

n <- 5000

c <- runif(n, 10, 40)

sim <- list()
for(i in 1:n){
  sim[[i]] <- sim_path(1, rates = jump_rate, dists = mark_dist, tn = c[i],
                       bs = c(0.1+0.002*c[i], 0.29+0.001*c[i], 0))
}

The degree of censoring is

sum(c == unlist(lapply(sim, FUN = function(z){tail(z$times, 1)}))) / n
#> [1] 0.1758

We fit the Aalen–Johansen estimator.

fit <- aalen_johansen(sim)

For illustrative purposes, we plot the estimate of the state occupation probability p2 (dashed). The true values (full) are obtained via numerical integration, utilizing that this specific model has a hierarchical structure.

v0 <- fit$t
p <- unlist(lapply(fit$p, FUN = function(L) L[2]))
integrand <- function(t, s){
  exp(-0.1*s-0.001*s^2)*(0.09 + 0.0018*s)*exp(-0.20*pmin(t-s, 4)-0.09*(t-s)-0.0005*(t^2-s^2))
}
P <- Vectorize(function(t){
  integrate(f = integrand, lower = 0, upper = t, t = t)$value
}, vectorize.args = "t")
plot(v0, p, type = "l", lty = 2, xlab = "", ylab = "", main = "Probability")
lines(seq(0, 40, 0.1), P(seq(0, 40, 0.1)))

We now want to estimate the conditional state occupation probability p2, given sojourn in the second state at time 10 with duration u = 1, 5. For this, we first need to sub-sample the data (landmarking).

landmark <- sim[unlist(lapply(sim, FUN = function(z){any(z$times <= 10
                                                         & c(z$times[-1], Inf) > 10
                                                         & z$states == 2)}))]
landmark <- lapply(landmark, FUN = function(z){list(times = z$times, states = z$states,
                                                    X = 10 - z$times[z$times <= 10
                                                                     & c(z$times[-1], Inf) > 10
                                                                     & z$states == 2])})

The degree of sub-sampling is

length(landmark) / n
#> [1] 0.1852

Next, we fit the conditional Aalen–Johansen estimator for u = 1, 5. We also fit the usual landmark Aalen–Johansen estimator.

u1 <- 1
u2 <- 5

fit1 <- aalen_johansen(landmark, x = u1)
fit2 <- aalen_johansen(landmark, x = u2)
fit3 <- aalen_johansen(landmark)

For illustrative purposes, we plot the conditional state occupation probability p2 using the conditional estimator (dashed), the usual landmark estimator (dotted), and the true model (full). This is done for both u = 1 (in red) and u = 5 (in blue).

v10 <- fit1$t
v20 <- fit2$t
v30 <- fit3$t
p1 <- unlist(lapply(fit1$p, FUN = function(L) L[2]))
p2 <- unlist(lapply(fit2$p, FUN = function(L) L[2]))
p3 <- unlist(lapply(fit3$p, FUN = function(L) L[2]))
P <- function(t, u){
  exp(-(t-10)*0.09-(t^2-100)*0.0005-pmax(0, pmin(t, 4-(u-10))-10)*0.20)  
} 

plot(v10, p1, type = "l", lty = 2, xlab = "", ylab = "", main = "Probability",
     col = "red", xlim = c(10, 40))
lines(seq(10, 40, 0.1), P(seq(10, 40, 0.1), u1), col = "red")
lines(v20, p2, lty = 2, col = "blue")
lines(seq(10, 40, 0.1), P(seq(10, 40, 0.1), u2), col = "blue")
lines(v30, p3, lty = 3)