---
title: "Double Bayesian Predictive Stacking for (univariate) Spatial Analysis - Tutotial"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{Double Bayesian Predictive Stacking for (univariate) Spatial Analysis - Tutotial}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
bibliography: references.bib
---

```{r, include = FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
```

We provide a brief tutorial of the `spBPS` package. Here we shows the implementation of the Double Bayesian Predictive Stacking on synthetically univariate generated data. In particular, we focus on parallel computing using the packages `parallel`, `doParallel`; but it is not mandatory: it suffices to make it sequential. For any further details please refer to [@presicce2024]. More examples, for multivariate data, are available in documentation, and functions help.

```{r setup}
library(spBPS)
```

### Working packages
```{r, warning=FALSE, message=F}
library(foreach)
library(parallel)
library(doParallel)
library(tictoc)
library(MBA)
library(classInt)
library(RColorBrewer)
library(sp)
library(fields)
library(mvnfast)
```


### Data generation
We generate data from the model detailed in Equation (2.4) [@presicce2024], over a unit square.
```{r, results=F}
# dimensions
n <- 1000
u <- 250
p <- 2

# parameters
B <- c(-0.75, 1.85)
tau2 <- 0.25
sigma2 <- 1
delta <- tau2/sigma2
phi <- 4

set.seed(4-8-15-16-23-42)
# generate sintethic data
crd <- matrix(runif((n+u) * 2), ncol = 2)
X_or <- cbind(rep(1, n+u), matrix(runif((p-1)*(n+u)), ncol = (p-1)))
D <- arma_dist(crd)
Rphi <- exp(-phi * D)
W_or <- matrix(0, n+u) + mniw::rmNorm(1, rep(0, n+u), sigma2*Rphi)
Y_or <- X_or %*% B + W_or + mniw::rmNorm(1, rep(0, n+u), diag(delta*sigma2, n+u))

# train data
crd_s <- crd[1:n, ]
X <- X_or[1:n, ]
W <- W_or[1:n, ]
Y <- Y_or[1:n, ]

# prediction data
crd_u <- crd[-(1:n), ]
X_u <- X_or[-(1:n), ]
W_u <- W_or[-(1:n), ]
Y_u <- Y_or[-(1:n), ]
```


### Subset posterior models
We opt to divide the original data into `K=2`, such that each subsets results in 500 locations.
```{r, results=F}
# hyperparameters values
delta_seq <- c(0.2, 0.25, 0.3)
phi_seq <- c(3, 4, 5)

# function for the fit loop
fit_loop <- function(i) {

  Yi <- data_part$Y_list[[i]]
  Xi <- data_part$X_list[[i]]
  crd_i <- data_part$crd_list[[i]]
  p <- ncol(Xi)
  bps <- spBPS::BPS_weights(data = list(Y = Yi, X = Xi),
                           priors = list(mu_b = matrix(rep(0, p)),
                                         V_b = diag(10, p),
                                         a = 2,
                                         b = 2), coords = crd_i,
                           hyperpar = list(delta = delta_seq, phi = phi_seq), K = 5)
  w_hat <- bps$W
  epd <- bps$epd

  result <- list(epd, w_hat)
  return(result)

}

# function for the pred loop
pred_loop <- function(r) {

  ind_s <- subset_ind[r]
  Ys <- matrix(data_part$Y_list[[ind_s]])
  Xs <- data_part$X_list[[ind_s]]
  crds <- data_part$crd_list[[ind_s]]
  Ws <- W_list[[ind_s]]
  result <- spBPS::BPS_post(data = list(Y = Ys, X = Xs), coords = crds,
                           X_u = X_u, crd_u = crd_u,
                           priors = list(mu_b = matrix(rep(0, p)),
                                         V_b = diag(10, p),
                                         a = 2,
                                         b = 2),
                           hyperpar = list(delta = delta_seq, phi = phi_seq),
                           W = Ws, R = 1)

  return(result)
}


# subsetting data
subset_size <- 500
K <- n/subset_size
data_part <- subset_data(data = list(Y = matrix(Y), X = X, crd = crd_s), K = K)
```


### Double BPS parallel fit
Parallel implementation, exploiting 2 computing core.
```{r, results=F}
# number of clusters for parallel implementation
n.core <- 2

# list of function
funs_fit <- lsf.str()[which(lsf.str() != "fit_loop")]

# list of function
funs_pred <- lsf.str()[which(lsf.str() != "pred_loop")]

# starting cluster
cl <- makeCluster(n.core)
registerDoParallel(cl)

# timing
tic("total")

# parallelized subset computation of GP in different cores
tic("fit")
obj_fit <- foreach(i = 1:K, .noexport = funs_fit) %dopar% { fit_loop(i) }
fit_time <- toc()

gc(verbose = F)
# Combination using double BPS
tic("comb")
comb_bps <- BPS_combine(obj_fit, K, 1)
comb_time <- toc()
Wbps <- comb_bps$W
W_list <- comb_bps$W_list

gc(verbose = F)
# parallelized subset computation of GP in different cores
R <- 250
subset_ind <- sample(1:K, R, T, Wbps)
tic("prediction")
predictions <- foreach(r = 1:R, .noexport = funs_pred) %dopar% { pred_loop(r) }
prd_time <- toc()

# timing
tot_time <- toc()

# closing cluster
stopCluster(cl)
gc(verbose = F)
```


### Results collection
```{r}
# statistics computations W
pred_mat_W <- sapply(1:R, function(r){predictions[[r]][[1]]})
post_mean_W <- rowMeans(pred_mat_W)
post_var_W <- apply(pred_mat_W, 1, sd)
post_qnt_W <- apply(pred_mat_W, 1, quantile, c(0.025, 0.975))

# Empirical coverage for W
coverage_W <- mean(W_u >= post_qnt_W[1,] & W_u <= post_qnt_W[2,])
cat("Empirical coverage for Spatial process:", round(coverage_W, 3),"\n")

# statistics computations Y
pred_mat_Y <- sapply(1:R, function(r){predictions[[r]][[2]]})
post_mean_Y <- rowMeans(pred_mat_Y)
post_var_Y <- apply(pred_mat_Y, 1, sd)
post_qnt_Y <- apply(pred_mat_Y, 1, quantile, c(0.025, 0.975))

# Empirical coverage for Y
coverage_Y <- mean(Y_u >= post_qnt_Y[1,] & Y_u <= post_qnt_Y[2,])
cat("Empirical coverage for Response:", round(coverage_Y, 3),"\n")

# Root Mean Square Prediction Error
rmspe_W <- sqrt( mean( (W_u - post_mean_W)^2 ) )
rmspe_Y <- sqrt( mean( (Y_u - post_mean_Y)^2 ) )
cat("RMSPE for Spatial process:", round(rmspe_W, 3), "\n")
cat("RMSPE for Response:", round(rmspe_Y, 3), "\n")
```


### Plot results
```{r, echo=F, fig.dim = c(7.25, 4)}
# True spatial process surface interpolation
h <- 12
surf.W <- MBA::mba.surf(cbind(crd_s, W), no.X = 500, no.Y = 500,
                        exten = TRUE, sp = TRUE, h = h)$xyz.est
surf.brks <- classIntervals(surf.W$z, 100, 'pretty')$brks
col.pal <- colorRampPalette(brewer.pal(11,'RdBu')[11:1])
xlim <- c(0, 1)
zlim <- range(surf.W$z)

# image for plot
iw <- as.image.SpatialGridDataFrame(surf.W)

# BPS surfaces interpolation
h <- 12
surf.Wp <- MBA::mba.surf(cbind(crd_u, post_mean_W), no.X = 500, no.Y = 500,
                         exten = TRUE, sp = TRUE, h = h)$xyz.est
zlimp <- range(surf.Wp$z)

# image for plot
iwp <- as.image.SpatialGridDataFrame(surf.Wp)

# Plotting
oldpar <- par(no.readonly = TRUE)
par(mfrow = c(1, 2))

plot(crd, type="n", cex=0.5, xlim=xlim, axes=FALSE, ylab="Northing", xlab="Easting",
     main="Spatial process")
axis(2, las=1)
axis(1)
image.plot(iw, add=TRUE, col=rev(col.pal(length(surf.brks)-1)), zlim=zlim)

plot(crd, type="n", cex=0.5, xlim=xlim, axes=F, ylab="Northing", xlab="Easting")
title(main="Spatial process (Prediction)")
mtext(side = 3, paste("RMSPE :", round(rmspe_W, 3)))
axis(2, las=1)
axis(1)
image.plot(iwp, add=TRUE, col=rev(col.pal(length(surf.brks)-1)), zlim=zlimp)

par(oldpar)
```


```{r, echo=F, fig.dim = c(7.25, 4)}
# True response surface interpolation
h <- 12
surf.Y <- MBA::mba.surf(cbind(crd_s, Y), no.X = 500, no.Y = 500,
                        exten = TRUE, sp = TRUE, h = h)$xyz.est
surf.brks <- classIntervals(surf.Y$z, 100, 'pretty')$brks
col.pal <- colorRampPalette(brewer.pal(11,'RdBu')[11:1])
xlim <- c(0, 1)
zlim <- range(surf.Y$z)

# image for plot
iy <- as.image.SpatialGridDataFrame(surf.Y)

# BPS surfaces interpolation
h <- 12
surf.Yp <- MBA::mba.surf(cbind(crd_u, post_mean_Y), no.X = 500, no.Y = 500,
                         exten = TRUE, sp = TRUE, h = h)$xyz.est
zlimp <- range(surf.Yp$z)

# image for plot
iyp <- as.image.SpatialGridDataFrame(surf.Yp)

# Plotting
oldpar <- par(no.readonly = TRUE)
par(mfrow = c(1, 2))

plot(crd, type="n", cex=0.5, xlim=xlim, axes=FALSE, ylab="Northing", xlab="Easting",
     main="Response")
axis(2, las=1)
axis(1)
image.plot(iy, add=TRUE, col=rev(col.pal(length(surf.brks)-1)), zlim=zlim)

plot(crd, type="n", cex=0.5, xlim=xlim, axes=F, ylab="Northing", xlab="Easting")
title(main="Response (Prediction)")
mtext(side = 3, paste("RMSPE :", round(rmspe_Y, 3)))
axis(2, las=1)
axis(1)
image.plot(iyp, add=TRUE, col=rev(col.pal(length(surf.brks)-1)), zlim=zlimp)

par(oldpar)
```
------