In this simulation study, we aim to see whether AMIS is capable of recovering the true parameter values used to simulate data.
We assumed that the number of worms X per host has negative binomial distribution NB(W,k), where W is the mean number of worms and k the dispersion parameter describing the degree of clumping of parasites. The probability mass function is given by
As such, the prevalence is given by the probability of observing worms:
We assumed that the mean number of worms is spatially-varying: Wl = 3exp { − 0.5 sl′Σsl}, l = 1, …, L. where sl = (lonl, latl)′ are the spatial coordinates of location l and Σ is a 2 × 2 matrix with entries Σ11 = Σ22 = 1 and Σ12 = Σ21 = 0.7. We generated data on a regular grid of L = 50 × 50 = 2500 coordinate values, with lonl ∈ [−4, 4] and latl ∈ [−2, 2].
Finally, we assumed the dispersion parameter is given by kl = 0.5exp {0.3Wl}, l = 1, …, L, so that it is also spatially-varying.
We assumed each location l has nl hosts, so that the number of worms per host in location l at time t was simulated by Xi, l, t ∼ NB(Wl, kl), i = 1, …, nl, so that the prevalence in location l at time t is given by $$ P_{l,t} = \frac{1}{n_l}\sum_{i=1}^{n_l}\text{I}_{\{X_{i,l,t}>0\}}. $$
We set nl = 500 and generated M = 100 map samples for each location at each time t ∈ {1, 2, 3}. Then, we fit AMIS to these map samples using a maximum of 10 iterations, with 1000 samples per iteration, and putting independent exponential priors on W and k.
lon <- seq(-4, 4, length.out=50)
lat <- seq(-2, 2, length.out=50)
simData <- expand.grid(lon=lon, lat=lat, W=NA, k=NA, expectedPrev=NA)
L <- nrow(simData) # number of locations
Sigma <- diag(2)
Sigma[1,2] <- Sigma[2,1] <- 0.7
for(l in 1:L){
pos <- as.numeric(simData[l,c("lon","lat")])
W <- 5*exp(-0.5*c(t(pos)%*%Sigma%*%pos))
k <- 0.4
simData$W[l] <- W
simData$k[l] <- k
simData$expectedPrev[l] <- 1 - (1 + W/k)^(-k)
}
Below we can see how the mean number of worms and the corresponding prevalence (at the equilibrium distribution) vary over space.
simData_sf <- sf::st_as_sf(simData, coords=c("lon","lat"))
th <- theme(axis.text=element_text(size=10),
legend.text=element_text(size=10))
plots_true_data <- vector(mode = "list", length = 2)
plots_true_data[[1]] <- ggplot(data=simData_sf) +
geom_sf(aes(colour=W),shape=15,size=5) + xlab("Lon") + ylab("Lat") + th +
scale_colour_gradientn(colours=rev(viridis::magma(6)), name="W", na.value="grey100")
plots_true_data[[2]] <- ggplot(data=simData_sf) +
geom_sf(aes(colour=expectedPrev),shape=15,size=5) +
xlab("Lon") + ylab("Lat") + th +
scale_colour_gradientn(colours=rev(viridis::magma(6)),
name="E[P]", na.value="grey100",
limits=c(0,1))
wrap_plots(plots_true_data, guides = 'keep', ncol = 2)
set.seed(1)
n_hosts <- 500 # number of hosts per location
M <- 100 # number of statistical map samples per location
n_tims <- 3 # number of time points
prevalence_map <- vector(mode = "list", length = n_tims)
for(t in 1:n_tims){
prevs_t <- matrix(NA,L,M)
for (l in 1:L) {
for(m in 1:M){
W <- simData$W[l]
k <- simData$k[l]
num_worms_l <- rnbinom(n=n_hosts, mu=W, size=k)
prevs_t[l,m] <- sum(num_worms_l>0)/n_hosts
}
}
prevalence_map[[t]]$data <- prevs_t
}
# Prior distribution for the model parameters
rprior <- function(n) {
params <- matrix(NA, n, 2)
colnames(params) <- c("W", "k")
params[,1] <- rexp(n=n, rate=lambda_W)
params[,2] <- rexp(n=n, rate=lambda_k)
return(params)
}
dprior <- function(x, log=FALSE) {
if (log) {
return(dexp(x=x[1], rate=lambda_W, log=TRUE) + dexp(x=x[2], rate=lambda_k, log=TRUE))
} else {
return(dexp(x=x[1], rate=lambda_W) * dexp(x=x[2], rate=lambda_k))
}
}
prior <- list(rprior=rprior,dprior=dprior)
amis_params <- AMISforInfectiousDiseases:::default_amis_params()
amis_params$n_samples <- 500
amis_params$target_ess <- 10000
amis_params$max_iters <- 10
amis_params$delta <- 0.1
In the lines below, we fit AMIS to the statistical maps by using different prior specifications for k, namely Exp(λk), λk ∈ {0.1, 0.3, 1}.
We can look at the posterior distribution for k at some particular location, e.g. the location with the largest mean number of worms:
loc <- which.max(simData$W)
xlimW <- c(0,10)
Wvals <- seq(xlimW[1],xlimW[2],len=100)
W_prior_dens <- dexp(Wvals, rate=lambda_W)
xlimk <- c(0,10)
kvals <- seq(xlimk[1],xlimk[2],len=100)
i <- 1
plots_k <- vector(mode = "list", length = num_lambda_k)
plots_W <- vector(mode = "list", length = num_lambda_k)
for(lambda_k in lambda_k_values){
k_prior_dens <- dexp(kvals, rate=lambda_k)
out <- outList[[i]]
# plotting W
plot(out, what = "W", type="hist", breaks=50, xlim=xlimW,
locations=loc, main=NA)
lines(Wvals, W_prior_dens, col="blue", lwd=2, lty=2)
abline(v = simData$W[loc], col="red", lwd=2, lty=2)
plots_W[[i]] <- recordPlot()
# plotting k
plot(out, what = "k", type="hist", breaks=100, xlim=xlimk,
locations=loc, main=bquote(lambda[k]*"="*.(lambda_k)))
lines(kvals, k_prior_dens, col="blue", lwd=2, lty=2)
abline(v = simData$k[loc], col="red", lwd=2, lty=2)
plots_k[[i]] <- recordPlot()
i <- i + 1
}
We can clearly see that the posterior distribution of k is largely determined by the prior, and that the existence of multiple time points did not help the recovery of the true parameter values.
Now, we assume that an intervention (reduction of the mean number of worms by 70%) occurs at time t = 2 and still has effect at t = 3.
for(t in 2:3){
prevs_t <- matrix(NA,L,M)
for (l in 1:L) {
for(m in 1:M){
W <- simData$W[l] * 0.3
k <- simData$k[l]
num_worms_l <- rnbinom(n=n_hosts, mu=W, size=k)
prevs_t[l,m] <- sum(num_worms_l>0)/n_hosts
}
}
prevalence_map[[t]]$data <- prevs_t
}
The intervention is known and is also taken into account in the transmission model:
transmission_model <- function(seeds, params, n_tims){
n_samples <- nrow(params)
p <- matrix(NA, nrow=n_samples, ncol=n_tims)
for(t in 1:n_tims){
for (i in 1:n_samples){
if(t==1){
W <- params[i,1]
}else{
W <- params[i,1] * 0.3
}
k <- params[i,2]
p[i,t] <- 1-(1+W/k)^(-k)
}
}
return(p)
}
We then fit AMIS using different priors for k as before.
outList <- vector("list", num_lambda_k)
i <- 1
for(lambda_k in lambda_k_values){
outList[[i]] <- amis(prevalence_map, transmission_model, prior, amis_params, seed=1)
i <- i + 1
}
Now, after introducing intervention, the posterior distribution for k is no longer determined by the prior:
xlimk <- c(0,3)
kvals <- seq(xlimk[1],xlimk[2],len=100)
i <- 1
for(lambda_k in lambda_k_values){
k_prior_dens <- dexp(kvals, rate=lambda_k)
out <- outList[[i]]
# plotting W
plot(out, what = "W", type="hist", breaks=50, xlim=xlimW,
locations=loc, main=NA)
lines(Wvals, W_prior_dens, col="blue", lwd=2, lty=2)
abline(v = simData$W[loc], col="red", lwd=2, lty=2)
plots_W[[i]] <- recordPlot()
# plotting k
plot(out, what = "k", type="hist", breaks=100/lambda_k, xlim=xlimk,
locations=loc, main=bquote(lambda[k]*"="*.(lambda_k)))
lines(kvals, k_prior_dens, col="blue", lwd=2, lty=2)
abline(v = simData$k[loc], col="red", lwd=2, lty=2)
plots_k[[i]] <- recordPlot()
i <- i + 1
}
In what follows, using the model with Exp(0.1) prior on k, we look at the posterior median for W over space at time t = 1.
out <- outList[[3]]
th <- theme(plot.title = element_text(size = 16, hjust = 0.5))
limits <- c(0,5)
W_post_medians <- sapply(1:L, function(l) calculate_summaries(out, what="W", locations=l)$median)
simData_sf$W_post_medians <- W_post_medians
plot_W_post_median <- ggplot(data=simData_sf) +
geom_sf(aes(colour=W_post_medians),shape=15,size=5) +
scale_colour_gradientn(colours=viridis::turbo(10),
name="Value",
na.value = "grey100",
limits = limits) +
xlab("Lon") + ylab("Lat") + th +
ggtitle("Posterior median of W")
pWtrue <- ggplot(data=simData_sf) +
geom_sf(aes(colour=W),shape=15,size=5) +
scale_colour_gradientn(colours=viridis::turbo(10),
name="Value",
na.value = "grey100",
limits = limits) + xlab("Lon") + ylab("Lat") + th +
ggtitle("True W")
wrap_plots(list(pWtrue, plot_W_post_median), guides = 'collect', ncol = 2)
Note that we assumed a constant true parameter value k = 0.4 for all locations, but obtained posterior samples for each location. To see how much the estimates for k varied over space, we can look at the posterior median of each location: