This vignette illustrates, through
four examples, the potential uses of the R-package AalenJohansen
, which is an
implementation of the conditional Nelson–Aalen and Aalen–Johansen
estimators introduced in Bladt & Furrer (2023).
We start out with a simple time-inhomogeneous Markov model:
library(AalenJohansen)
set.seed(2)
jump_rate <- function(i, t, u){
if(i == 1){
2 / (1+1/2*t)
} else if(i == 2){
3 / (1+1/2*t)
} else{
0
}
}
mark_dist <- function(i, s, v){
if(i == 1){
c(0, 1/2, 1/2)
} else if(i == 2){
c(2/3, 0, 1/3)
} else{
0
}
}
lambda <- function(t){
A <- matrix(c(2/(1+1/2*t)*mark_dist(1, t, 0), 3/(1+1/2*t)*mark_dist(2, t, 0), rep(0, 3)),
nrow = 3, ncol = 3, byrow = TRUE)
diag(A) <- -rowSums(A)
A
}
We simulate 1, 000 independent and identically distributed realizations subject to independent right-censoring. Right-censoring follows the distribution Unif(0, 5).
n <- 1000
c <- runif(n, 0, 5)
sim <- list()
for(i in 1:n){
sim[[i]] <- sim_path(sample(1:2, 1), rates = jump_rate, dists = mark_dist,
tn = c[i], bs = c(2*c[i], 3*c[i], 0))
}
The degree of censoring is
We fit the classic Nelson–Aalen and Aalen-Johansen estimators.
For illustrative purposes, we plot Λ21 and the state occupation probability p2 for both the true model (full) and using the classic estimators (dashed).
v1 <- unlist(lapply(fit$Lambda, FUN = function(L) L[2,1]))
v0 <- fit$t
p <- unlist(lapply(fit$p, FUN = function(L) L[2]))
P <- unlist(lapply(prodint(0, 5, 0.01, lambda), FUN = function(L) (c(1/2, 1/2, 0) %*% L)[2]))
par(mfrow = c(1, 2))
par(mar = c(2.5, 2.5, 1.5, 1.5))
plot(v0, v1, type = "l", lty = 2, xlab = "", ylab = "", main = "Hazard")
lines(v0, 4*log(1+1/2*v0))
plot(v0, p, type = "l", lty = 2, xlab = "", ylab = "", main = "Probability")
lines(seq(0, 5, 0.01), P)
We now consider a simple extension with covariates:
We simulate 10, 000 independent realizations subject to independent right-censoring. Right-censoring follows the distribution Unif(0, 5), while X ∼ Unif(0, 1).
jump_rate <- function(i, t, u){
if(i == 1){
2
} else if(i == 2){
3
} else{
0
}
}
mark_dist <- function(i, s, v){
if(i == 1){
c(0, 1/2, 1/2)
} else if(i == 2){
c(2/3, 0, 1/3)
} else{
0
}
}
lambda <- function(t, x){
A <- matrix(c(2/(1+x*t)*mark_dist(1, t, 0), 3/(1+x*t)*mark_dist(2, t, 0), rep(0, 3)),
nrow = 3, ncol = 3, byrow = TRUE)
diag(A) <- -rowSums(A)
A
}
n <- 10000
X <- runif(n)
c <- runif(n, 0, 5)
sim <- list()
for(i in 1:n){
rates <- function(j, y, z){jump_rate(j, y, z)/(1+X[i]*y)}
sim[[i]] <- sim_path(sample(1:2, 1), rates = rates, dists = mark_dist,
tn = c[i], bs = c(2*c[i], 3*c[i], 0))
sim[[i]]$X <- X[i]
}
The degree of censoring is
We fit the conditional Nelson–Aalen and Aalen–Johansen estimators for x = 0.2, 0.8.
For illustrative purposes, we plot Λ21 and the conditional state occupation probability p2 for both the true model (full) and using the conditional estimators (dashed). This is done for both x = 0.2 (in red) and x = 0.8 (in blue).
v11 <- unlist(lapply(fit1$Lambda, FUN = function(L) L[2,1]))
v10 <- fit1$t
v21 <- unlist(lapply(fit2$Lambda, FUN = function(L) L[2,1]))
v20 <- fit2$t
p1 <- unlist(lapply(fit1$p, FUN = function(L) L[2]))
P1 <- unlist(lapply(prodint(0, 5, 0.01, function(t){lambda(t, x = x1)}),
FUN = function(L) (c(1/2, 1/2, 0) %*% L)[2]))
p2 <- unlist(lapply(fit2$p, FUN = function(L) L[2]))
P2 <- unlist(lapply(prodint(0, 5, 0.01, function(t){lambda(t, x = x2)}),
FUN = function(L) (c(1/2, 1/2, 0) %*% L)[2]))
par(mfrow = c(1, 2))
par(mar = c(2.5, 2.5, 1.5, 1.5))
plot(v10, v11, type = "l", lty = 2, xlab = "", ylab = "", main = "Hazard", col = "red")
lines(v10, 2/x1*log(1+x1*v10), col = "red")
lines(v20, v21, lty = 2, col = "blue")
lines(v20, 2/x2*log(1+x2*v20), col = "blue")
plot(v10, p1, type = "l", lty = 2, xlab = "", ylab = "", main = "Probability", col = "red")
lines(seq(0, 5, 0.01), P1, col = "red")
lines(v20, p2, lty = 2, col = "blue")
lines(seq(0, 5, 0.01), P2, col = "blue")
We consider the same model as before, but now introduce dependent right-censoring. To be precise, we assume that right-censoring occurs at rate $\frac{1}{2}\frac{1}{1+\frac{1}{2}t}$ in the first state, while it occurs at twice this rate in the second state. Finally, all remaining individuals are right-censored at time t.
We again simulate 10, 000 independent realizations.
jump_rate_enlarged <- function(i, t, u){
if(i == 1){
2.5
} else if(i == 2){
4
} else{
0
}
}
mark_dist_enlarged <- function(i, s, v){
if(i == 1){
c(0, 2/5, 2/5, 1/5)
} else if(i == 2){
c(2/4, 0, 1/4, 1/4)
} else{
0
}
}
n <- 10000
X <- runif(n)
tn <- 5
sim <- list()
for(i in 1:n){
rates <- function(j, y, z){jump_rate_enlarged(j, y, z)/(1+X[i]*y)}
sim[[i]] <- sim_path(sample(1:2, 1), rates = rates, dists = mark_dist_enlarged,
tn = tn, abs = c(FALSE, FALSE, TRUE, TRUE),
bs = c(2.5*tn, 4*tn, 0, 0))
sim[[i]]$X <- X[i]
}
The degree of censoring is
sum(tn == unlist(lapply(sim, FUN = function(z){tail(z$times, 1)}))
| 4 == unlist(lapply(sim, FUN = function(z){tail(z$states, 1)}))) / n
#> [1] 0.4373
We fit the conditional Nelson–Aalen and Aalen–Johansen estimators for x = 0.2, 0.8.
fit1 <- aalen_johansen(sim, x = x1, collapse = TRUE)
fit2 <- aalen_johansen(sim, x = x2, collapse = TRUE)
For illustrative purposes, we plot Λ21 and the conditional state occupation probability p2 for both the true model (full) and using the conditional estimators (dashed). This is done for both x = 0.2 (in red) and x = 0.8 (in blue).
v11 <- unlist(lapply(fit1$Lambda, FUN = function(L) L[2,1]))
v10 <- fit1$t
v21 <- unlist(lapply(fit2$Lambda, FUN = function(L) L[2,1]))
v20 <- fit2$t
p1 <- unlist(lapply(fit1$p, FUN = function(L) L[2]))
p2 <- unlist(lapply(fit2$p, FUN = function(L) L[2]))
par(mfrow = c(1, 2))
par(mar = c(2.5, 2.5, 1.5, 1.5))
plot(v10, v11, type = "l", lty = 2, xlab = "", ylab = "", main = "Hazard", col = "red")
lines(v10, 2/x1*log(1+x1*v10), col = "red")
lines(v20, v21, lty = 2, col = "blue")
lines(v20, 2/x2*log(1+x2*v20), col = "blue")
plot(v10, p1, type = "l", lty = 2, xlab = "", ylab = "", main = "Probability", col = "red")
lines(seq(0, 5, 0.01), P1, col = "red")
lines(v20, p2, lty = 2, col = "blue")
lines(seq(0, 5, 0.01), P2, col = "blue")
Last but not least, we consider a time-inhomogeneous semi-Markov model with non-zero transition rates given by
jump_rate <- function(i, t, u){
if(i == 1){
0.1 + 0.002*t
} else if(i == 2){
ifelse(u < 4, 0.29, 0.09) + 0.001*t
} else{
0
}
}
mark_dist <- function(i, s, v){
if(i == 1){
c(0, 0.9, 0.1)
} else if(i == 2){
c(0, 0, 1)
} else{
0
}
}
We simulate 5, 000 independent and identically distributed realizations subject to independent right-censoring. Right-censoring follows the distribution Unif(10, 40).
n <- 5000
c <- runif(n, 10, 40)
sim <- list()
for(i in 1:n){
sim[[i]] <- sim_path(1, rates = jump_rate, dists = mark_dist, tn = c[i],
bs = c(0.1+0.002*c[i], 0.29+0.001*c[i], 0))
}
The degree of censoring is
We fit the Aalen–Johansen estimator.
For illustrative purposes, we plot the estimate of the state occupation probability p2 (dashed). The true values (full) are obtained via numerical integration, utilizing that this specific model has a hierarchical structure.
v0 <- fit$t
p <- unlist(lapply(fit$p, FUN = function(L) L[2]))
integrand <- function(t, s){
exp(-0.1*s-0.001*s^2)*(0.09 + 0.0018*s)*exp(-0.20*pmin(t-s, 4)-0.09*(t-s)-0.0005*(t^2-s^2))
}
P <- Vectorize(function(t){
integrate(f = integrand, lower = 0, upper = t, t = t)$value
}, vectorize.args = "t")
plot(v0, p, type = "l", lty = 2, xlab = "", ylab = "", main = "Probability")
lines(seq(0, 40, 0.1), P(seq(0, 40, 0.1)))
We now want to estimate the conditional state occupation probability p2, given sojourn in the second state at time 10 with duration u = 1, 5. For this, we first need to sub-sample the data (landmarking).
landmark <- sim[unlist(lapply(sim, FUN = function(z){any(z$times <= 10
& c(z$times[-1], Inf) > 10
& z$states == 2)}))]
landmark <- lapply(landmark, FUN = function(z){list(times = z$times, states = z$states,
X = 10 - z$times[z$times <= 10
& c(z$times[-1], Inf) > 10
& z$states == 2])})
The degree of sub-sampling is
Next, we fit the conditional Aalen–Johansen estimator for u = 1, 5. We also fit the usual landmark Aalen–Johansen estimator.
u1 <- 1
u2 <- 5
fit1 <- aalen_johansen(landmark, x = u1)
fit2 <- aalen_johansen(landmark, x = u2)
fit3 <- aalen_johansen(landmark)
For illustrative purposes, we plot the conditional state occupation probability p2 using the conditional estimator (dashed), the usual landmark estimator (dotted), and the true model (full). This is done for both u = 1 (in red) and u = 5 (in blue).
v10 <- fit1$t
v20 <- fit2$t
v30 <- fit3$t
p1 <- unlist(lapply(fit1$p, FUN = function(L) L[2]))
p2 <- unlist(lapply(fit2$p, FUN = function(L) L[2]))
p3 <- unlist(lapply(fit3$p, FUN = function(L) L[2]))
P <- function(t, u){
exp(-(t-10)*0.09-(t^2-100)*0.0005-pmax(0, pmin(t, 4-(u-10))-10)*0.20)
}
plot(v10, p1, type = "l", lty = 2, xlab = "", ylab = "", main = "Probability",
col = "red", xlim = c(10, 40))
lines(seq(10, 40, 0.1), P(seq(10, 40, 0.1), u1), col = "red")
lines(v20, p2, lty = 2, col = "blue")
lines(seq(10, 40, 0.1), P(seq(10, 40, 0.1), u2), col = "blue")
lines(v30, p3, lty = 3)