GPR - example 2

library(GPFDA)
require(MASS)
# packages required for visualisation:
require(interp)
require(fields)

Simulating data from a GP with 2-dimensional input

We simulate 10 independent realisations (surfaces) from a zero-mean GP with a Matern (ν = 3/2) covariance function. Each observed surface has a sample size of 30 × 30 = 900 points on [0, 1]2.

set.seed(123)
nrep <- 10
n1 <- 30
n2 <- 30
n <- n1*n2
input1 <- seq(0,1,len=n1)
input2 <- seq(0,1,len=n2)
input <- as.matrix(expand.grid(input1=input1, input2=input2))
hp <- list('matern.v'=log(2),'matern.w'=c(log(20), log(25)),'vv'=log(0.2))
nu <- 1.5
Sigma <- cov.matern(hyper = hp, input = input, nu = nu) + diag(exp(hp$vv), n)
Y <- t(mvrnorm(n=nrep, mu=rep(0,n), Sigma=Sigma))

We now split the dataset into training and test sets, leaving about 80% of the observations for the test set.

idx <- expand.grid(1:n1, 1:n2)
n1test <- floor(n1*0.8)
n2test <- floor(n2*0.8)
idx1 <- sort(sample(1:n1, n1test))
idx2 <- sort(sample(1:n2, n2test))
whichTest <- idx[,1]%in%idx1 & idx[,2]%in%idx2

inputTest <- input[whichTest, ]
Ytest <- Y[whichTest, ]
inputTrain <- input[!whichTest, ]
Ytrain <- Y[!whichTest, ]

Estimation

Estimation of the GPR model is done by:

fit <- gpr(input=inputTrain, response=Ytrain, Cov='matern', trace=4, useGradient=T,
            iter.max=50, nu=nu, nInitCandidates=50)
#> 
#>  --------- Initialising ---------- 
#> iter:  -loglik:     matern.v     matern.w1     matern.w2     vv     
#>   0:     5358.2889:  4.36887  8.17782 0.587245 -0.792392
#>   4:     3914.3805: -0.658306  6.85254  4.96447 -2.12831
#>   8:     3074.1300: 0.507486  3.95414  3.45436 -1.72688
#>  12:     3040.0058: 0.342478  3.47543  3.51811 -1.62289
#>  16:     3036.1037: 0.539588  3.16273  3.30576 -1.62608
#>  20:     3035.9589: 0.553369  3.17162  3.26757 -1.63124
#> 
#>      optimization finished.

The hyperparameter estimates are:

sapply(fit$hyper, exp)
#> $matern.v
#> [1] 1.739
#> 
#> $matern.w
#> [1] 23.84 26.25
#> 
#> $vv
#> [1] 0.1957

Prediction

Predictions for the test set can then be found:

pred <- gprPredict(train=fit, inputNew=inputTest, noiseFreePred=T)
zlim <- range(c(pred$pred.mean, Ytest))

plotImage(response = Ytest, input = inputTest, realisation = 1, 
            n1 = n1test, n2 = n2test,
            zlim = zlim, main = "observed")


plotImage(response = pred$pred.mean, input = inputTest, realisation = 1, 
            n1 = n1test, n2 = n2test,
            zlim = zlim, main = "prediction")