This vignette covers advanced workflows built on top of the core NRMstatsML modules, including:
stat_fn closuresThe package ships a plain-text version of nrm_example
under inst/extdata for users who want to demonstrate a full
import-to-analysis pipeline:
library(NRMstatsML)
csv_path <- system.file("extdata", "nrm_example.csv",
package = "NRMstatsML")
d <- read.csv(csv_path, stringsAsFactors = FALSE)
nrm_data_check(d, time_var = "year", verbose = TRUE)
#> === NRMstatsML: Data Check ===
#> Rows : 20
#> Columns : 10
#> Numeric : year, crop_yield, soil_OC, N, P, K, rainfall, runoff, biomass
#> Status : OKA common question in long-term NRM studies is: is the observed trend in yield real, or could it arise by chance? Bootstrap sampling answers this by resampling the dataset many times and computing Sen’s slope on each resample.
# Define a statistic function: Sen's slope on crop_yield
sens_stat <- function(df) {
nrm_sens_slope(df, time_var = "year", value_var = "crop_yield")$slope
}
# Bootstrap the slope — 500 replicates for speed
bs_slope <- nrm_bootstrap(d, stat_fn = sens_stat, n_iter = 500)
print(bs_slope)
#> -- Bootstrap --
#> Mean : -0.0009 SD : 0.0249 CI : [-0.0548, 0.0521]If the 95% CI for the slope excludes zero, the trend is significant. This approach is more robust than a single-pass Mann-Kendall test because it directly quantifies slope uncertainty.
When two inputs interact (e.g. N × rainfall), a full response surface
reveals the joint optimum. We build this by iterating
nrm_response_curve() across a grid and collecting predicted
yields.
# Fit a quadratic surface: yield ~ N + rainfall (manual grid approach)
mv_surface <- nrm_multivariate(d,
formula = crop_yield ~ N + I(N^2) + rainfall + I(rainfall^2) + N:rainfall,
scale = FALSE)
# Prediction grid
N_seq <- seq(60, 120, length.out = 30)
R_seq <- seq(610, 720, length.out = 30)
grid <- expand.grid(N = N_seq, rainfall = R_seq)
grid$predicted_yield <- stats::predict(mv_surface$model, newdata = grid)
# Simple contour summary (base graphics — no extra dependencies)
with(
reshape(grid, idvar = "N", timevar = "rainfall",
direction = "wide")[, 1:10], # abbreviated for display
message(sprintf("Predicted yield range: %.2f to %.2f t/ha",
min(grid$predicted_yield),
max(grid$predicted_yield)))
)# ggplot2 visualisation of the response surface
library(ggplot2)
ggplot(grid, aes(x = N, y = rainfall, fill = predicted_yield)) +
geom_tile() +
scale_fill_viridis_c(name = "Yield\n(t/ha)", option = "C") +
geom_contour(aes(z = predicted_yield), colour = "white",
alpha = 0.5, linewidth = 0.4) +
labs(
title = "Predicted Yield Response Surface",
subtitle = "Quadratic model: crop_yield ~ N × rainfall",
x = "Nitrogen applied (kg/ha)",
y = "Rainfall (mm)"
) +
theme_minimal(base_size = 13)Instead of a single ARIMA forecast, we propagate parametric
uncertainty by running nrm_forecast() on bootstrap
resamples of the historical record.
# Collect horizon-1 point forecasts from 200 bootstrap resamples
set.seed(42)
boot_forecasts <- replicate(200, {
idx <- sample(nrow(d), replace = TRUE)
boot_d <- d[sort(idx), ] # keep time order approximately
boot_d$year <- d$year # restore exact year sequence
fc <- nrm_forecast(boot_d, value_var = "crop_yield",
horizon = 1, method = "auto_arima")
as.numeric(fc$forecast$mean)
}, simplify = TRUE)
cat(sprintf(
"Bootstrap forecast (h=1):\n Mean = %.3f t/ha\n 95%% CI: [%.3f, %.3f]\n",
mean(boot_forecasts),
quantile(boot_forecasts, 0.025),
quantile(boot_forecasts, 0.975)
))
#> Bootstrap forecast (h=1):
#> Mean = 5.448 t/ha
#> 95% CI: [5.157, 5.548]stat_fn Closuresnrm_uncertainty() and nrm_bootstrap()
accept any function of the form f(data) → scalar. This
makes them highly extensible.
opt_N_fn <- function(df) {
rc <- nrm_response_curve(df, input_var = "N",
response_var = "crop_yield",
type = "quadratic")
opt <- nrm_optimize_input(rc, price_ratio = 0.6)
opt$economic_optimum
}
bs_optN <- nrm_bootstrap(d, stat_fn = opt_N_fn, n_iter = 300)
cat(sprintf(
"Bootstrapped economic optimum N:\n Mean = %.1f kg/ha\n 95%% CI: [%.1f, %.1f]\n",
bs_optN$mean, bs_optN$ci[1], bs_optN$ci[2]
))
#> Bootstrapped economic optimum N:
#> Mean = -98660.8 kg/ha
#> 95% CI: [-279854.7, -11328.2]Compare OLS, quadratic OLS, and PLS side-by-side using
nrm_benchmark() on a simple hold-out split.
set.seed(123)
n_d <- nrow(d)
idx <- sample(n_d, floor(0.75 * n_d))
train <- d[ idx, ]
test <- d[-idx, ]
# Fit three candidate models
m_ols <- lm(crop_yield ~ N + P + K + rainfall, data = train)
m_quad <- lm(crop_yield ~ N + I(N^2) + rainfall, data = train)
m_full <- lm(crop_yield ~ N + P + K + rainfall + soil_OC, data = train)
bm <- nrm_benchmark(
models = list(ols = m_ols, quadratic = m_quad, full_ols = m_full),
test_data = test,
response_var = "crop_yield"
)
print(bm)
#> === NRMstatsML: Model Benchmark ===
#>
#> model RMSE MAE Rsq
#> full_ols 0.0279 0.0200 0.9974
#> ols 0.0285 0.0191 0.9973
#> quadratic 0.0435 0.0380 0.9937Putting it all together — a single self-contained script suitable for a supplementary materials section:
library(NRMstatsML)
# 1. Import
d <- read.csv(system.file("extdata", "nrm_example.csv",
package = "NRMstatsML"))
# 2. Validate
nrm_data_check(d)
# 3. Trend
tr <- nrm_trend(d, time_var = "year", value_var = "crop_yield")
nrm_plot(tr)
# 4. Multivariate
mv <- nrm_multivariate(d, crop_yield ~ N + P + K + rainfall)
nrm_summary(mv)
# 5. Response curve and optimum
rc <- nrm_response_curve(d, "N", "crop_yield", type = "quadratic")
opt <- nrm_optimize_input(rc, price_ratio = 0.6)
nrm_plot(rc)
# 6. Forecast
fc <- nrm_forecast(d, "crop_yield", horizon = 5)
nrm_plot(fc)
# 7. Uncertainty
unc <- nrm_uncertainty(d,
stat_fn = function(x) mean(x$crop_yield),
method = "bootstrap", n_iter = 1000)
print(unc)sessionInfo()
#> R version 4.6.0 (2026-04-24)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.4 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
#> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
#> [9] LC_ADDRESS=C LC_TELEPHONE=C
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: Etc/UTC
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] ggplot2_4.0.3 NRMstatsML_0.1.4 rmarkdown_2.31
#>
#> loaded via a namespace (and not attached):
#> [1] sass_0.4.10 generics_0.1.4 lattice_0.22-9
#> [4] digest_0.6.39 magrittr_2.0.5 evaluate_1.0.5
#> [7] grid_4.6.0 RColorBrewer_1.1-3 fastmap_1.2.0
#> [10] jsonlite_2.0.0 trend_1.1.6 forecast_9.0.2
#> [13] extraDistr_1.10.0.4 viridisLite_0.4.3 scales_1.4.0
#> [16] isoband_0.3.0 jquerylib_0.1.4 cli_3.6.6
#> [19] rlang_1.2.0 Kendall_2.2.2 withr_3.0.2
#> [22] cachem_1.1.0 yaml_2.3.12 otel_0.2.0
#> [25] tools_4.6.0 parallel_4.6.0 dplyr_1.2.1
#> [28] colorspace_2.1-2 boot_1.3-32 buildtools_1.0.0
#> [31] vctrs_0.7.3 R6_2.6.1 zoo_1.8-15
#> [34] lifecycle_1.0.5 pkgconfig_2.0.3 urca_1.3-4
#> [37] pillar_1.11.1 bslib_0.11.0 gtable_0.3.6
#> [40] glue_1.8.1 Rcpp_1.1.1-1.1 xfun_0.58
#> [43] tibble_3.3.1 tidyselect_1.2.1 sys_3.4.3
#> [46] knitr_1.51 farver_2.1.2 htmltools_0.5.9
#> [49] nlme_3.1-169 maketools_1.3.2 labeling_0.4.3
#> [52] timeDate_4052.112 fracdiff_1.5-4 compiler_4.6.0
#> [55] S7_0.2.2