To run this vignette, the next R packages should be installed and loaded:
Library CSTools, should be installed from CRAN and loaded:
In this example, the seasonal temperature and precipitation forecasts, initialized in november, will be used to assess the glosea5 seasonal forecasting system from the Met Office, by computing the multivariate RMSE for both temperature and precipitation.
The parameters defined are the initializing month and the variables:
The simulations available for this model cover the period 1993-2012. So, the starting and ending dates can be defined by running the following lines:
ini <- 1993
fin <- 2012
start <- as.Date(paste(ini, mth, "01", sep = ""), "%Y%m%d")
end <- as.Date(paste(fin, mth, "01", sep = ""), "%Y%m%d")
dateseq <- format(seq(start, end, by = "year"), "%Y%m%d")
The grid in which all data will be interpolated should be also specified. The observational dataset used in this example is the EraInterim.
Using the CST_Start
function from CSTool
package, the data available in our data store can be loaded.
The following lines show how this function can be used. Here, the data
is loaded from a previous saved .RData
file: Ask
nuria.perez at bsc.es for the data to run the recipe.
latmin = 25
latmax = 75
lonmin = -20
lonmax = 70
dateseq <- format(seq(start, end, by = "year"), "%Y%m")
repos1 <- "/esarchive/exp/glosea5/glosea5c3s/monthly_mean/$var$_f6h/$var$_$sdate$.nc"
exp_T <- CST_Start(dataset = list(list(name = 'glosea5c3s', path = repos1)),
var = temp,
member = startR::indices(1:9),
sdate = dateseq,
ftime = startR::indices(2:4),
lat = startR::values(list(latmin, latmax)),
lat_reorder = startR::Sort(decreasing = TRUE),
lon = startR::values(list(lonmin, lonmax)),
lon_reorder = startR::CircularSort(0, 360),
synonims = list(lon = c('lon', 'longitude'),
lat = c('lat', 'latitude'),
member = c('member', 'ensemble'),
ftime = c('ftime', 'time')),
transform = startR::CDORemapper,
transform_extra_cells = 2,
transform_params = list(grid = 'r256x128',
method = 'bilinear'),
transform_vars = c('lat', 'lon'),
return_vars = list(lat = NULL,
lon = NULL, ftime = 'sdate'),
retrieve = TRUE)
dates_exp <- exp_T$attrs$Dates
repos2 <- "/esarchive/recon/ecmwf/erainterim/monthly_mean/$var$/$var$_$date$.nc"
obs_T <- CST_Start(dataset = list(list(name = 'erainterim', path = repos2)),
var = temp,
date = unique(format(dates_exp, '%Y%m')),
ftime = startR::values(dates_exp),
ftime_across = 'date',
ftime_var = 'ftime',
merge_across_dims = TRUE,
split_multiselected_dims = TRUE,
lat = startR::values(list(latmin, latmax)),
lat_reorder = startR::Sort(decreasing = TRUE),
lon = startR::values(list(lonmin, lonmax)),
lon_reorder = startR::CircularSort(0, 360),
synonims = list(lon = c('lon', 'longitude'),
lat = c('lat', 'latitude'),
ftime = c('ftime', 'time')),
transform = startR::CDORemapper,
transform_extra_cells = 2,
transform_params = list(grid = 'r256x128',
method = 'bilinear'),
transform_vars = c('lat', 'lon'),
return_vars = list(lon = NULL,
lat = NULL,
ftime = 'date'),
retrieve = TRUE)
repos3 <- "/esarchive/exp/glosea5/glosea5c3s/monthly_mean/$var$_f24h/$var$_$sdate$.nc"
exp_P <- CST_Start(dataset = list(list(name = 'glosea5c3s', path = repos3)),
var = precip,
member = startR::indices(1:9),
sdate = dateseq,
ftime = startR::indices(2:4),
lat = startR::values(list(latmin, latmax)),
lat_reorder = startR::Sort(decreasing = TRUE),
lon = startR::values(list(lonmin, lonmax)),
lon_reorder = startR::CircularSort(0, 360),
synonims = list(lon = c('lon', 'longitude'),
lat = c('lat', 'latitude'),
member = c('member', 'ensemble'),
ftime = c('ftime', 'time')),
transform = startR::CDORemapper,
transform_extra_cells = 2,
transform_params = list(grid = 'r256x128',
method = 'bilinear'),
transform_vars = c('lat', 'lon'),
return_vars = list(lat = NULL,
lon = NULL, ftime = 'sdate'),
retrieve = TRUE)
dates_exp <- exp_P$attrs$Dates
obs_P <- CST_Start(dataset = list(list(name = 'erainterim', path = repos2)),
var = precip,
date = unique(format(dates_exp, '%Y%m')),
ftime = startR::values(dates_exp),
ftime_across = 'date',
ftime_var = 'ftime',
merge_across_dims = TRUE,
split_multiselected_dims = TRUE,
lat = startR::values(list(latmin, latmax)),
lat_reorder = startR::Sort(decreasing = TRUE),
lon = startR::values(list(lonmin, lonmax)),
lon_reorder = startR::CircularSort(0, 360),
synonims = list(lon = c('lon', 'longitude'),
lat = c('lat', 'latitude'),
ftime = c('ftime', 'time')),
transform = startR::CDORemapper,
transform_extra_cells = 2,
transform_params = list(grid = 'r256x128',
method = 'bilinear'),
transform_vars = c('lat', 'lon'),
return_vars = list(lon = NULL,
lat = NULL,
ftime = 'date'),
retrieve = TRUE)
# save(exp_T, obs_T, exp_P, obs_P, file = "./tas_prlr_toydata.RData")
# Or use the following line to load the file provided in .RData format:
# load(file = "./tas_prlr_toydata.RData")
There should be four new elements loaded in the R working
environment: exp_T
, obs_T
, exp_P
and obs_P
. The first two elements correspond to the
experimental and observed data for temperature and the other are the
equivalent for the precipitation data. It is possible to check that they
are of class sd2v_cube
by running:
class(exp_T)
class(obs_T)
class(exp_P)
class(obs_P)
Loading the data using CST_Load
allows to obtain two
lists, one for the experimental data and another for the observe data,
with the same elements and compatible dimensions of the data
element:
> dim(exp_T$data)
dataset var member sdate ftime lat lon
1 1 9 20 3 35 64
> dim(obs_T$data)
dataset var sdate ftime lat lon
1 1 20 3 35 64
Latitudes and longitudes of the common grid can be saved:
The next step is to compute the anomalies of the experimental and
observational data using CST_Anomaly
function, which could
be applied over data from each variable, and in this case it’s compute
applying cross validation technique over individual members:
c(ano_exp_T, ano_obs_T) %<-% CST_Anomaly(exp = exp_T, obs = obs_T, cross = TRUE, memb = TRUE)
c(ano_exp_P, ano_obs_P) %<-% CST_Anomaly(exp = exp_P, obs = obs_P, cross = TRUE, memb = TRUE)
The original dimensions are preserved and the anomalies are stored in
the data
element of the correspondent object:
> str(ano_exp_T$data)
num [1:20, 1, 1:9, 1, 1:3, 1:35, 1:64] -1.399 -0.046 -0.133 0.361 -5.696 ...
> str(ano_obs_T$data)
num [1:20, 1, 1, 1, 1:3, 1:35, 1:64] 1.556 1.397 -0.346 -5.99 -0.273 ...
Two lists containing the experiment ,ano_exp
, and the
observation, ano_obs
, lists should be put together to serve
as input of the function to compute multivariate RMSEs.
Furthermore, some weights can be applied to the difference variables based on their relative importance (if no weights are given, a value of 1 is automatically assigned to each variable). For this example, we’ll give a weight of 2 to the temperature dataset and a weight of 1 to the precipitation dataset:
The multivariate RMSE gives an indication of the forecast performance
(RMSE) for multiple variables simultaneously. Variables can be weighted
based on their relative importance. It is obtained by running the
CST_MultivarRMSE
function:
ano_obs[[1]] <- CST_InsertDim(ano_obs[[1]], posdim = 3, lendim = 1, name = "member")
ano_obs[[2]] <- CST_InsertDim(ano_obs[[2]], posdim = 3, lendim = 1, name = "member")
mvrmse <- CST_MultivarRMSE(exp = ano_exp, obs = ano_obs, weight)
The function CST_MultivarRMSE
returns the multivariate
RMSE value for 2 or more variables. The output is a CSTool object
containing the RMSE values in the data
element and other
relevant information:
> class(mvrmse)
> str(mvrmse$data)
num [1, 1, 1, 1:35, 1:64] 806916 832753 838254 833206 996828 ...
> str(mvrmse$attrs$Variable)
List of 2
$ varName : chr [1:2] "tas" "prlr"
$ metadata:List of 8
..$ lat : num [1:35(1d)] 73.8 72.4 71 69.6 68.2 ...
..$ lon : num [1:64(1d)] 0 1.41 2.81 4.22 5.62 ...
..$ ftime: POSIXct[1:60], format: "1993-12-16 00:00:00" "1994-12-16 00:00:00" ...
..$ tas :List of 11
.. ..$ prec : chr "float"
.. ..$ units : chr "K"
.. ..$ dim :List of 4
.. .. ..$ :List of 10
.. .. .. ..$ name : chr "lon"
.. .. .. ..$ len : int 64
.. .. .. ..$ unlim : logi FALSE
.. .. .. ..$ group_index : int 1
.. .. .. ..$ group_id : int 65536
.. .. .. ..$ id : int 1
.. .. .. ..$ dimvarid :List of 5
.. .. .. .. ..$ id : int 1
.. .. .. .. ..$ group_index: int 1
.. .. .. .. ..$ group_id : int 65536
.. .. .. .. ..$ list_index : num -1
.. .. .. .. ..$ isdimvar : logi TRUE
.. .. .. .. ..- attr(*, "class")= chr "ncid4"
.. .. .. ..$ units : chr "degrees_east"
.. .. .. ..$ vals : num [1:64(1d)] -19.7 -18.3 -16.9 -15.5 -14.1 ...
.. .. .. ..$ create_dimvar: logi TRUE
.. .. .. ..- attr(*, "class")= chr "ncdim4"
.. .. ..$ :List of 10
.. .. .. ..$ name : chr "lat"
.. .. .. ..$ len : int 35
.. .. .. ..$ unlim : logi FALSE
.. .. .. ..$ group_index : int 1
.. .. .. ..$ group_id : int 65536
.. .. .. ..$ id : int 0
.. .. .. ..$ dimvarid :List of 5
.. .. .. .. ..$ id : int 0
.. .. .. .. ..$ group_index: int 1
.. .. .. .. ..$ group_id : int 65536
.. .. .. .. ..$ list_index : num -1
.. .. .. .. ..$ isdimvar : logi TRUE
.. .. .. .. ..- attr(*, "class")= chr "ncid4"
.. .. .. ..$ units : chr "degrees_north"
.. .. .. ..$ vals : num [1:35(1d)] 26 27.4 28.8 30.2 31.6 ...
.. .. .. ..$ create_dimvar: logi TRUE
.. .. .. ..- attr(*, "class")= chr "ncdim4"
.. .. ..$ :List of 10
.. .. .. ..$ name : chr "ensemble"
.. .. .. ..$ len : int 14
.. .. .. ..$ unlim : logi FALSE
.. .. .. ..$ group_index : int 1
.. .. .. ..$ group_id : int 65536
.. .. .. ..$ id : int 3
.. .. .. ..$ dimvarid :List of 5
.. .. .. .. ..$ id : int -1
.. .. .. .. ..$ group_index: int 1
.. .. .. .. ..$ group_id : int 65536
.. .. .. .. ..$ list_index : num -1
.. .. .. .. ..$ isdimvar : logi TRUE
.. .. .. .. ..- attr(*, "class")= chr "ncid4"
.. .. .. ..$ vals : int [1:14] 1 2 3 4 5 6 7 8 9 10 ...
.. .. .. ..$ units : chr ""
.. .. .. ..$ create_dimvar: logi FALSE
.. .. .. ..- attr(*, "class")= chr "ncdim4"
.. .. ..$ :List of 11
.. .. .. ..$ name : chr "time"
.. .. .. ..$ len : int 7
.. .. .. ..$ unlim : logi TRUE
.. .. .. ..$ group_index : int 1
.. .. .. ..$ group_id : int 65536
.. .. .. ..$ id : int 2
.. .. .. ..$ dimvarid :List of 5
.. .. .. .. ..$ id : int 2
.. .. .. .. ..$ group_index: int 1
.. .. .. .. ..$ group_id : int 65536
.. .. .. .. ..$ list_index : num -1
.. .. .. .. ..$ isdimvar : logi TRUE
.. .. .. .. ..- attr(*, "class")= chr "ncid4"
.. .. .. ..$ units : chr "months since 1993-11-15 12:00:00"
.. .. .. ..$ calendar : chr "proleptic_gregorian"
.. .. .. ..$ vals : num [1:7(1d)] 0 1 2 3 4 5 6
.. .. .. ..$ create_dimvar: logi TRUE
.. .. .. ..- attr(*, "class")= chr "ncdim4"
.. ..$ unlim : logi TRUE
.. ..$ make_missing_value: logi TRUE
.. ..$ missval : num -9e+33
.. ..$ hasAddOffset : logi FALSE
.. ..$ hasScaleFact : logi FALSE
.. ..$ table : int 128
.. ..$ _FillValue : num -9e+33
.. ..$ missing_value : num -9e+33
..$ lat : num [1:35(1d)] 73.8 72.4 71 69.6 68.2 ...
..$ lon : num [1:64(1d)] 0 1.41 2.81 4.22 5.62 ...
..$ ftime: POSIXct[1:60], format: "1993-12-16 00:00:00" "1994-12-16 00:00:00" ...
..$ prlr :List of 9
.. ..$ prec : chr "float"
.. ..$ units : chr "m s-1"
.. ..$ dim :List of 4
.. .. ..$ :List of 10
.. .. .. ..$ name : chr "lon"
.. .. .. ..$ len : int 64
.. .. .. ..$ unlim : logi FALSE
.. .. .. ..$ group_index : int 1
.. .. .. ..$ group_id : int 65536
.. .. .. ..$ id : int 1
.. .. .. ..$ dimvarid :List of 5
.. .. .. .. ..$ id : int 1
.. .. .. .. ..$ group_index: int 1
.. .. .. .. ..$ group_id : int 65536
.. .. .. .. ..$ list_index : num -1
.. .. .. .. ..$ isdimvar : logi TRUE
.. .. .. .. ..- attr(*, "class")= chr "ncid4"
.. .. .. ..$ units : chr "degrees_east"
.. .. .. ..$ vals : num [1:64(1d)] -19.7 -18.3 -16.9 -15.5 -14.1 ...
.. .. .. ..$ create_dimvar: logi TRUE
.. .. .. ..- attr(*, "class")= chr "ncdim4"
.. .. ..$ :List of 10
.. .. .. ..$ name : chr "lat"
.. .. .. ..$ len : int 35
.. .. .. ..$ unlim : logi FALSE
.. .. .. ..$ group_index : int 1
.. .. .. ..$ group_id : int 65536
.. .. .. ..$ id : int 0
.. .. .. ..$ dimvarid :List of 5
.. .. .. .. ..$ id : int 0
.. .. .. .. ..$ group_index: int 1
.. .. .. .. ..$ group_id : int 65536
.. .. .. .. ..$ list_index : num -1
.. .. .. .. ..$ isdimvar : logi TRUE
.. .. .. .. ..- attr(*, "class")= chr "ncid4"
.. .. .. ..$ units : chr "degrees_north"
.. .. .. ..$ vals : num [1:35(1d)] 26 27.4 28.8 30.2 31.6 ...
.. .. .. ..$ create_dimvar: logi TRUE
.. .. .. ..- attr(*, "class")= chr "ncdim4"
.. .. ..$ :List of 10
.. .. .. ..$ name : chr "ensemble"
.. .. .. ..$ len : int 14
.. .. .. ..$ unlim : logi FALSE
.. .. .. ..$ group_index : int 1
.. .. .. ..$ group_id : int 65536
.. .. .. ..$ id : int 3
.. .. .. ..$ dimvarid :List of 5
.. .. .. .. ..$ id : int -1
.. .. .. .. ..$ group_index: int 1
.. .. .. .. ..$ group_id : int 65536
.. .. .. .. ..$ list_index : num -1
.. .. .. .. ..$ isdimvar : logi TRUE
.. .. .. .. ..- attr(*, "class")= chr "ncid4"
.. .. .. ..$ vals : int [1:14] 1 2 3 4 5 6 7 8 9 10 ...
.. .. .. ..$ units : chr ""
.. .. .. ..$ create_dimvar: logi FALSE
.. .. .. ..- attr(*, "class")= chr "ncdim4"
.. .. ..$ :List of 11
.. .. .. ..$ name : chr "time"
.. .. .. ..$ len : int 7
.. .. .. ..$ unlim : logi TRUE
.. .. .. ..$ group_index : int 1
.. .. .. ..$ group_id : int 65536
.. .. .. ..$ id : int 2
.. .. .. ..$ dimvarid :List of 5
.. .. .. .. ..$ id : int 2
.. .. .. .. ..$ group_index: int 1
.. .. .. .. ..$ group_id : int 65536
.. .. .. .. ..$ list_index : num -1
.. .. .. .. ..$ isdimvar : logi TRUE
.. .. .. .. ..- attr(*, "class")= chr "ncid4"
.. .. .. ..$ units : chr "months since 1993-11-15 12:00:00"
.. .. .. ..$ calendar : chr "proleptic_gregorian"
.. .. .. ..$ vals : num [1:7(1d)] 0 1 2 3 4 5 6
.. .. .. ..$ create_dimvar: logi TRUE
.. .. .. ..- attr(*, "class")= chr "ncdim4"
.. ..$ unlim : logi TRUE
.. ..$ make_missing_value: logi FALSE
.. ..$ missval : num 1e+30
.. ..$ hasAddOffset : logi FALSE
.. ..$ hasScaleFact : logi FALSE
.. ..$ table : int 128
The following lines plot the multivariate RMSE
PlotEquiMap(mvrmse$data, lon = Lon, lat = Lat, filled.continents = FALSE,
toptitle = "Multivariate RMSE tas, prlr 1993 - 2012", colNA = "white")