Predictor Identifier: Nonparametric PREDiction

library(NPRED)
require(zoo)
#> Loading required package: zoo
#> 
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#> 
#>     as.Date, as.Date.numeric
require(ggplot2)
#> Loading required package: ggplot2

Synthetic data generation

# Other statistical models for generating synthetic data see Package - synthesis
require(synthesis)
#> Loading required package: synthesis
#> 
#> Attaching package: 'synthesis'
#> The following objects are masked from 'package:NPRED':
#> 
#>     data.gen.ar1, data.gen.ar4, data.gen.ar9

set.seed(2020) # set seed for reproducible results
# AR1 model from paper with 9 dummy variables
data.ar1 <- synthesis::data.gen.ar1(500)

# AR4 model from paper with total 9 dimensions
data.ar4 <- synthesis::data.gen.ar4(500)

# AR9 model from paper with total 9 dimensions
data.ar9 <- synthesis::data.gen.ar9(500)

# plot.zoo(cbind(data.ar1$x, data.ar4$x, data.ar9$x),
#   xlab = NA, main = "Example of AR models",
#   ylab = c("AR1", "AR4", "AR9"))

Partial informational correlation (PIC)

# calculate PIC
pic.calc(data.ar1$x, data.ar1$dp)
#> $pmi
#> [1] 0.69319346 0.41783003 0.26170451 0.17923658 0.12102203 0.09080493 0.05591261
#> [8] 0.02951112 0.01654911
#> 
#> $pic
#> [1] 0.8660388 0.7526034 0.6383594 0.5488694 0.4636577 0.4075210 0.3252683
#> [8] 0.2394038 0.1804341

pic.calc(data.ar4$x, data.ar4$dp)
#> $pmi
#> [1]  0.273343774  0.031705618  0.001377018  0.151620432  0.235788221
#> [6]  0.126943971  0.021324429 -0.008302517  0.030173933
#> 
#> $pic
#> [1] 0.6489498 0.2478761 0.0524428 0.5114477 0.6131739 0.4735201 0.2043335
#> [8] 0.0000000 0.2419980

pic.calc(data.ar9$x, data.ar9$dp)
#> $pmi
#> [1]  0.034701149 -0.008256610  0.073552041  0.353385264 -0.004119804
#> [6]  0.027551717  0.088435930  0.091603445  0.049201990
#> 
#> $pic
#> [1] 0.2589377 0.0000000 0.3698593 0.7118746 0.0000000 0.2315443 0.4026324
#> [8] 0.4091505 0.3061328

Predictor identifier: stepwise.PIC

pic1 <- stepwise.PIC(data.ar1$x, data.ar1$dp)

pic4 <- stepwise.PIC(data.ar4$x, data.ar4$dp)

pic9 <- stepwise.PIC(data.ar9$x, data.ar9$dp)

Partial weights (PW)

# calculate partial weights
pw.calc(data.ar1$x, data.ar1$dp, pic1$cpy, pic1$cpyPIC)
#> $pw
#> [1] 1

pw.calc(data.ar4$x, data.ar4$dp, pic4$cpy, pic4$cpyPIC)
#> $pw
#> [1] 0.5405296 0.4594704

pw.calc(data.ar9$x, data.ar9$dp, pic9$cpy, pic9$cpyPIC)
#> $pw
#> [1] 0.46352053 0.32817626 0.16792648 0.04037673

Nonparameteric prediction: knn

data("data3")
x <- ts(data3[, 1]) # response
z <- ts(data3[, -1]) # possible predictors
zout <- ts(data.gen.ar1(500, ndim = 15)$dp) # new input

xhat1 <- xhat2 <- x
# xhat1 <- NPRED::knn(x,z,zout,k=5,reg=T,extrap=F)
# xhat2 <- NPRED::knn(x,z,zout,k=5,reg=T,extrap=T)

for (i in 1:500) {
  xhat1[i] <- NPRED::knn(x[-i], z[-i, ], z[i, ], extrap = F)
  xhat2[i] <- NPRED::knn(x[-i], z[-i, ], z[i, ], extrap = T)
}

if (TRUE) {
  par(mfrow = c(1, 1),
      mar=c(3,2,1,1), mgp=c(1.5,0.5,0),
      pty = c("m"))

  ts.plot(x, xhat1, xhat2, col = c("black","red","blue"), ylim = c(-10, 10), lwd = c(1, 1, 1))
  legend("topleft",
    bty = "n", lwd = 3, cex = 1, lty = 1,
    # inset = c(-0.5, 0),
    legend = c("OBS", "Pred", "Pred(extrap=T)"),
    x.intersp = 0, xjust = 0, yjust = 0, text.width = c(0, 50, 50), horiz = T,
    col = c("black","red","blue"))

  par(mfrow = c(1, 1), pty = c("s"))
  plot(xhat1, xhat2, xlim = c(-10, 10), ylim = c(-10, 10))
  abline(coef = c(0, 1), lwd = 1, col = 2)

}
Example of KNN implemented in NPRED

Example of KNN implemented in NPRED

Example of KNN implemented in NPRED

Example of KNN implemented in NPRED

Illustration of the usage of partial weights

sample <- 500
k <- 0
u <- runif(sample, 0, 5 * pi)
z <- sin(u) + rnorm(sample, sd = 0.2)

u1 <- cbind(u, runif(sample, 0, 5 * pi), runif(sample, 0, 5 * pi), runif(sample, 0, 5 * pi))
# zhat1 <- knnregl1cv(x=z, z=u1, k=k)
zhat1 <- sapply(1:sample, function(i) knn(x = z[-i], z = u1[-i, ], zout = u1[i, ], k = k))

sel <- stepwise.PIC(x = z, py = u1)
sel
#> $cpy
#> [1] 1
#> 
#> $cpyPIC
#> [1] 0.1606175
#> 
#> $wt
#> [1] 1
#> 
#> $lstwet
#>   Intercept           X 
#> 0.141643996 0.002144423 
#> 
#> $icpy
#> [1] 1
# zhat2 <- knnregl1cv(x=z, z=u1[,sel$cpy], k=k)
zhat2 <- sapply(1:sample, function(i) knn(x = z[-i], z = u1[-i, sel$cpy], zout = u1[i, sel$cpy], k = k))

if (TRUE) {
  par(mfrow = c(1, 1), pty = c("m"))
  
  plot(u, z, pch = 16)
  lines(sort(u), zhat1[order(u)], col = "green")
  lines(sort(u), zhat2[order(u)], col = "red")
  abline(a = 0, b = 0)
}
Illustration of the usage of partial weights

Illustration of the usage of partial weights

Application to predicting drought anomalies

# An demo example used in Jiang, Z., Rashid, M. M., Johnson, F., & Sharma, A. (2020)
# Jiang, Z., Rashid, M. M., Johnson, F., & Sharma, A. (2021). A wavelet-based tool to modulate variance in predictors: An application to predicting drought anomalies. Environmental Modelling and Software, 135, 104907.

require(WASP)
#> Loading required package: WASP
#> 
#> Attaching package: 'WASP'
#> The following objects are masked from 'package:synthesis':
#> 
#>     data.gen.HL, data.gen.Rossler, data.gen.SW, data.gen.ar1,
#>     data.gen.ar4, data.gen.ar9, data.gen.tar1, data.gen.tar2
#> The following objects are masked from 'package:NPRED':
#> 
#>     data.gen.ar1, data.gen.ar4, data.gen.ar9, knn, knnregl1cv, pic.calc
#load response and predictor variables
data("SPI.12", package="WASP"); 
data("data.CI", package="WASP")
data("Ind_AWAP.2.5", package="WASP")

#study grids and period
Grid <- sample(Ind_AWAP.2.5,1)
Grid = 149 #Grid used in the SI
SPI.12.ts <- window(SPI.12, start=c(1910,1),end=c(2009,12))
data.CI.ts <- window(data.CI, start=c(1910,1),end=c(2009,12))
#partition into two folds
folds <- cut(seq(1,nrow(SPI.12.ts)),breaks=2,labels=FALSE)
sub.cali <- which(folds==1, arr.ind=TRUE); sub.vali <- which(folds==2, arr.ind=TRUE)
#-------------------------------------------------------------------------------------
###calibration and selection
data <- list(x=SPI.12.ts[sub.cali,Grid],dp=data.CI.ts[sub.cali,])

#variance transformation - calibration
modwt <- modwt.vt(data, wf="haar", J=8, boundary="periodic")

#stepwise PIC selection
sel <- NPRED::stepwise.PIC(modwt$x, modwt$dp.n)
#-------------------------------------------------------------------------------------
###validation and prediction
data.val <- list(x=SPI.12.ts[sub.vali,Grid],dp=data.CI.ts[sub.vali,])

#variance transformation - validation
modwt.val <- modwt.vt.val(data.val, J=8, modwt)

#knn prediction
cpy <- sel$cpy; wt <- sel$wt
x=data$x; z=modwt$dp.n[,cpy]; zout=modwt.val$dp.n[,cpy]
mod <- knn(x, z, zout, k=5, pw=wt, extrap=T)

#-------------------------------------------------------------------------------------
###plot
start.cal <- c(1910,1); start.val <- c(1960,1)
ndim = ncol(data.CI.ts); CI.names = colnames(data.CI.ts)
par(mfcol=c(ndim+1,2),mar=c(2,4,2,2),mgp=c(1.5,0.5,0),
    bg = "white",pty="m",cex.lab=1.5,ps=8)
#----------------------------------------------
#plot before and after vt - calibration
if(TRUE){

  x <- ts(modwt$x, start=start.cal, frequency = 12)
  dp <- ts(modwt$dp, start=start.cal, frequency = 12)
  dp.n <- ts(modwt$dp.n, start=start.cal, frequency = 12)

  ts.plot(x,xlab=NA, main=paste0("Sampled Grid: ", Grid), ylab=paste0("SPI",12), col=c("black"),lwd=c(1))
  for(nc in 1:ndim)
    ts.plot(dp[,nc],dp.n[,nc],xlab=NA,ylab=paste0(CI.names[nc]),
            col=c("red","blue"),lwd=c(1,1),lty=c(1,2))

}
#----------------------------------------------
#plot before and after vt - validation
if(TRUE){

  x <- ts(modwt.val$x, start=start.val, frequency = 12)
  dp <- ts(modwt.val$dp, start=start.val, frequency = 12)
  dp.n <- ts(modwt.val$dp.n, start=start.val, frequency = 12)

  ts.plot(x, xlab=NA, main=paste0("Sampled Grid: ",Grid), ylab=paste0("SPI",12), col=c("black"),lwd=c(1))
  for(nc in 1:ndim)
    ts.plot(dp[,nc],dp.n[,nc],xlab=NA,ylab=paste0(CI.names[nc]),
            col=c("red","blue"),lwd=c(1,1),lty=c(1,2))

}
Application to predicting drought anomalies

Application to predicting drought anomalies