--- title: "Simultaneous autoregressive process" author: "James T. Thorson" output: rmarkdown::html_vignette #output: rmarkdown::pdf_document vignette: > %\VignetteIndexEntry{Simultaneous autoregressive process} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} %\VignetteDepends{rnaturalearth} --- ```{r, include = FALSE} has_ggplot = requireNamespace("ggplot2", quietly = TRUE) EVAL <- has_ggplot knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = EVAL, purl = EVAL ) # Install locally # devtools::install_local( R'(C:\Users\James.Thorson\Desktop\Git\tinyVAST)', force=TRUE ) # Build # setwd(R'(C:\Users\James.Thorson\Desktop\Git\tinyVAST)'); devtools::build_rmd("vignettes/simultaneous_autoregressive_process.Rmd"); rmarkdown::render( "vignettes/simultaneous_autoregressive_process.Rmd", rmarkdown::pdf_document()) ``` ```{r setup, echo=TRUE, warning=FALSE, message=FALSE} library(tinyVAST) library(igraph) library(rnaturalearth) library(sf) options("tinyVAST.verbose" = FALSE) ``` `tinyVAST` is an R package for fitting vector autoregressive spatio-temporal (VAST) models using a minimal and user-friendly interface. We here show how it can fit a multivariate second-order autoregressive (AR2) model including spatial correlations using a simultaneous autoregressive (SAR) process specified using _igraph_. To do so, we first load salmong returns, and remove 0s to allow comparison between Tweedie and lognormal distributions. ```{r, eval=TRUE, echo=TRUE, message=FALSE, fig.width=6, fig.height=6} data( salmon_returns ) # Transform data salmon_returns$Biomass_nozeros = ifelse( salmon_returns$Biomass==0, NA, salmon_returns$Biomass ) Data = na.omit(salmon_returns) ``` We first explore an AR2 process, with independent variation among regions. This model shows a substantial first-order autocorrelation for sockeye and chum, and substantial second-order autocorrelation for pink salmon. An AR(2) process is stationary if $\phi_1 + \phi_2 < 1$ and $\phi_2 - \phi_1 < 1$, and this stationarity criterion suggests that each time-series is close to (but not quite) nonstationary. ```{r, eval=TRUE, echo=TRUE, message=FALSE, fig.width=6, fig.height=6} # Define graph for SAR process unconnected_graph = make_empty_graph( nlevels(Data$Region) ) V(unconnected_graph)$name = levels(Data$Region) plot(unconnected_graph) # Define SEM for AR2 process dsem = " sockeye -> sockeye, -1, lag1_sockeye sockeye -> sockeye, -2, lag2_sockeye pink -> pink, -1, lag1_pink pink -> pink, -2, lag2_pink chum -> chum, -1, lag1_chum chum -> chum, -2, lag2_chum " # Fit tinyVAST model mytiny0 = tinyVAST( formula = Biomass_nozeros ~ 0 + Species + Region, data = Data, spacetime_term = dsem, variable_column = "Species", time_column = "Year", space_column = "Region", distribution_column = "Species", family = list( "chum" = lognormal(), "pink" = lognormal(), "sockeye" = lognormal() ), spatial_domain = unconnected_graph, control = tinyVASTcontrol( profile="alpha_j" ) ) # Summarize output Summary = summary(mytiny0, what="spacetime_term") knitr::kable( Summary, digits=3) ``` We also explore an SAR process for adjacency among regions ```{r, eval=TRUE, echo=TRUE, message=FALSE, fig.width=6, fig.height=6} # Define graph for SAR process adjacency_graph = make_graph( ~ Korea - Japan - M.I - WKam - EKam - WAK - SPen - Kod - CI - PWS - SEAK - NBC - SBC - WA ) ``` We can plot this adjacency on a map to emphasize that it is a simple way to encode information about spatial proximity: ```{r, eval=TRUE, echo=TRUE, message=FALSE, fig.width=7, fig.height=5} #maps = ne_countries( country = c("united states of america","russia","canada","south korea","north korea","japan") ) maps = ne_countries( continent = c("north america","asia","europe") ) maps = st_combine( maps ) maps = st_transform( maps, crs=st_crs(3832) ) #maps = st_crop( maps, xmin = -5*1e5, xmax = 12*1e5, # ymin = 0 * 1e6, ymax=10 * 1e6 ) # Format inputs loc_xy = cbind( x = c(129,143,140,156,163,-163,-161,-154,-154,-147,-138,-129,-126,-125), y = c(36,40,57,53,57,60,55,56,59,61,57,54,50,45) ) loc_xy = sf_project( loc_xy, from=st_crs(4326), to=st_crs(3832) ) # Plot xlim = c(-4,10) * 1e6 ylim = c(3,10) * 1e6 plot( maps, xlim = xlim, ylim = ylim, col = "grey", asp = FALSE, add = FALSE ) plot( adjacency_graph, layout = loc_xy, add = TRUE, rescale = FALSE, vertex.label.color = "red", xlim = xlim, ylim = ylim, edge.width = 2, edge.color = "red" ) ``` We can then pass this adjacency graph to `tinyVAST` during fitting: ```{r, eval=TRUE, echo=TRUE, message=FALSE, fig.width=6, fig.height=6, warning=FALSE} # Fit tinyVAST model mytiny = tinyVAST( formula = Biomass_nozeros ~ 0 + Species + Region, data = Data, spacetime_term = dsem, variable_column = "Species", time_column = "Year", space_column = "Region", distribution_column = "Species", family = list( "chum" = lognormal(), "pink" = lognormal(), "sockeye" = lognormal() ), spatial_domain = adjacency_graph, control = tinyVASTcontrol( profile="alpha_j" ) ) # Summarize output Summary = summary(mytiny, what="spacetime_term") knitr::kable( Summary, digits=3) ``` We can use AIC to compare these two models. This comparison suggests that spatial adjancency is not a parsimonious way to describe correlations among time-series. ```{r, eval=TRUE, echo=TRUE, message=FALSE, fig.width=6, fig.height=6} # AIC for unconnected time-series AIC(mytiny0) # AIC for SAR spatial variation AIC(mytiny) ``` Finally, we can plot observations and predictions for the selected model ```{r, eval=TRUE, echo=TRUE, message=FALSE, fig.width=6, fig.height=6} # Compile long-form dataframe of observations and predictions Resid = rbind( cbind(Data[,c('Species','Year','Region','Biomass_nozeros')], "Which"="Obs"), cbind(Data[,c('Species','Year','Region')], "Biomass_nozeros"=predict(mytiny0,Data), "Which"="Pred") ) # plot using ggplot library(ggplot2) ggplot( data=Resid, aes(x=Year, y=Biomass_nozeros, col=Which) ) + # , group=yhat.id geom_line() + facet_grid( rows=vars(Region), cols=vars(Species), scales="free" ) + scale_y_continuous(trans='log') # ```