Comparison with other R packages

Data setup

Univariate mean change

# Univariate mean change
set.seed(1)
p <- 1
mean_data_1 <- rbind(
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(100, p)),
  mvtnorm::rmvnorm(400, mean = rep(50, p), sigma = diag(100, p)),
  mvtnorm::rmvnorm(300, mean = rep(2, p), sigma = diag(100, p))
)

plot.ts(mean_data_1)
plot of chunk data-setup-univariate-mean-change
plot of chunk data-setup-univariate-mean-change

Univariate mean and/or variance change

# Univariate mean and/or variance change
set.seed(1)
p <- 1
mv_data_1 <- rbind(
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(400, mean = rep(10, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(100, p)),
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(400, mean = rep(10, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(300, mean = rep(10, p), sigma = diag(100, p))
)

plot.ts(mv_data_1)
plot of chunk data-setup-univariate-mean-and-or-variance-change
plot of chunk data-setup-univariate-mean-and-or-variance-change

Multivariate mean change

# Multivariate mean change
set.seed(1)
p <- 3
mean_data_3 <- rbind(
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(100, p)),
  mvtnorm::rmvnorm(400, mean = rep(50, p), sigma = diag(100, p)),
  mvtnorm::rmvnorm(300, mean = rep(2, p), sigma = diag(100, p))
)

plot.ts(mean_data_3)
plot of chunk data-setup-multivariate-mean-change
plot of chunk data-setup-multivariate-mean-change

Multivariate mean and/or variance change

# Multivariate mean and/or variance change
set.seed(1)
p <- 4
mv_data_3 <- rbind(
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(400, mean = rep(10, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(100, p)),
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(400, mean = rep(10, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(300, mean = rep(10, p), sigma = diag(100, p))
)

plot.ts(mv_data_3)
plot of chunk data-setup-multivariate-mean-and-or-variance-change
plot of chunk data-setup-multivariate-mean-and-or-variance-change

Linear regression

# Linear regression
set.seed(1)
n <- 300
p <- 4
x <- mvtnorm::rmvnorm(n, rep(0, p), diag(p))
theta_0 <- rbind(c(1, 3.2, -1, 0), c(-1, -0.5, 2.5, -2), c(0.8, 0, 1, 2))
y <- c(
  x[1:100, ] %*% theta_0[1, ] + rnorm(100, 0, 3),
  x[101:200, ] %*% theta_0[2, ] + rnorm(100, 0, 3),
  x[201:n, ] %*% theta_0[3, ] + rnorm(100, 0, 3)
)
lm_data <- data.frame(y = y, x = x)

plot.ts(lm_data)
plot of chunk data-setup-linear-regression
plot of chunk data-setup-linear-regression

Logistic regression

# Logistic regression
set.seed(1)
n <- 500
p <- 4
x <- mvtnorm::rmvnorm(n, rep(0, p), diag(p))
theta <- rbind(rnorm(p, 0, 1), rnorm(p, 2, 1))
y <- c(
  rbinom(300, 1, 1 / (1 + exp(-x[1:300, ] %*% theta[1, ]))),
  rbinom(200, 1, 1 / (1 + exp(-x[301:n, ] %*% theta[2, ])))
)
binomial_data <- data.frame(y = y, x = x)

plot.ts(binomial_data)
plot of chunk data-setup-logistic-regression
plot of chunk data-setup-logistic-regression

Poisson regression

# Poisson regression
set.seed(1)
n <- 1100
p <- 3
x <- mvtnorm::rmvnorm(n, rep(0, p), diag(p))
delta <- rnorm(p)
theta_0 <- c(1, 0.3, -1)
y <- c(
  rpois(500, exp(x[1:500, ] %*% theta_0)),
  rpois(300, exp(x[501:800, ] %*% (theta_0 + delta))),
  rpois(200, exp(x[801:1000, ] %*% theta_0)),
  rpois(100, exp(x[1001:1100, ] %*% (theta_0 - delta)))
)
poisson_data <- data.frame(y = y, x = x)

plot.ts(log(poisson_data$y))
plot of chunk data-setup-poisson-regression
plot of chunk data-setup-poisson-regression
plot.ts(poisson_data[, -1])
plot of chunk data-setup-poisson-regression
plot of chunk data-setup-poisson-regression

Lasso

# Lasso
set.seed(1)
n <- 480
p_true <- 6
p <- 50
x <- mvtnorm::rmvnorm(n, rep(0, p), diag(p))
theta_0 <- rbind(
  runif(p_true, -5, -2),
  runif(p_true, -3, 3),
  runif(p_true, 2, 5),
  runif(p_true, -5, 5)
)
theta_0 <- cbind(theta_0, matrix(0, ncol = p - p_true, nrow = 4))
y <- c(
  x[1:80, ] %*% theta_0[1, ] + rnorm(80, 0, 1),
  x[81:200, ] %*% theta_0[2, ] + rnorm(120, 0, 1),
  x[201:320, ] %*% theta_0[3, ] + rnorm(120, 0, 1),
  x[321:n, ] %*% theta_0[4, ] + rnorm(160, 0, 1)
)
lasso_data <- data.frame(y = y, x = x)

plot.ts(lasso_data[, seq_len(p_true + 1)])
plot of chunk data-setup-lasso
plot of chunk data-setup-lasso

AR(3)

# AR(3)
set.seed(1)
n <- 1000
x <- rep(0, n + 3)
for (i in 1:600) {
  x[i + 3] <- 0.6 * x[i + 2] - 0.2 * x[i + 1] + 0.1 * x[i] + rnorm(1, 0, 3)
}
for (i in 601:1000) {
  x[i + 3] <- 0.3 * x[i + 2] + 0.4 * x[i + 1] + 0.2 * x[i] + rnorm(1, 0, 3)
}
ar_data <- x[-seq_len(3)]

plot.ts(ar_data)
plot of chunk data-setup-ar3
plot of chunk data-setup-ar3

GARCH(1, 1)

# GARCH(1, 1)
set.seed(1)
n <- 400
sigma_2 <- rep(1, n + 1)
x <- rep(0, n + 1)
for (i in seq_len(200)) {
  sigma_2[i + 1] <- 20 + 0.5 * x[i]^2 + 0.1 * sigma_2[i]
  x[i + 1] <- rnorm(1, 0, sqrt(sigma_2[i + 1]))
}
for (i in 201:400) {
  sigma_2[i + 1] <- 1 + 0.1 * x[i]^2 + 0.5 * sigma_2[i]
  x[i + 1] <- rnorm(1, 0, sqrt(sigma_2[i + 1]))
}
garch_data <- x[-1]

plot.ts(garch_data)
plot of chunk data-setup-garch11
plot of chunk data-setup-garch11

VAR(2)

# VAR(2)
set.seed(1)
n <- 800
p <- 2
theta_1 <- matrix(c(-0.3, 0.6, -0.5, 0.4, 0.2, 0.2, 0.2, -0.2), nrow = p)
theta_2 <- matrix(c(0.3, -0.4, 0.1, -0.5, -0.5, -0.2, -0.5, 0.2), nrow = p)
x <- matrix(0, n + 2, p)
for (i in 1:500) {
  x[i + 2, ] <- theta_1 %*% c(x[i + 1, ], x[i, ]) + rnorm(p, 0, 1)
}
for (i in 501:n) {
  x[i + 2, ] <- theta_2 %*% c(x[i + 1, ], x[i, ]) + rnorm(p, 0, 1)
}
var_data <- x[-seq_len(2), ]

plot.ts(var_data)
plot of chunk data-setup-var2
plot of chunk data-setup-var2

Univariate mean change

The true change points are 300 and 700. Some methods are plotted due to the un-retrievable change points.

results[["mean_data_1"]][["fastcpd"]] <-
  fastcpd::fastcpd.mean(mean_data_1, r.progress = FALSE)@cp_set
results[["mean_data_1"]][["fastcpd"]]
#> [1] 300 700
results[["mean_data_1"]][["CptNonPar"]] <-
  CptNonPar::np.mojo(mean_data_1, G = floor(length(mean_data_1) / 6))$cpts
results[["mean_data_1"]][["CptNonPar"]]
#> [1] 300 700
results[["mean_data_1"]][["strucchange"]] <-
  strucchange::breakpoints(y ~ 1, data = data.frame(y = mean_data_1))$breakpoints
results[["mean_data_1"]][["strucchange"]]
#> [1] 300 700
results[["mean_data_1"]][["ecp"]] <- ecp::e.divisive(mean_data_1)$estimates
results[["mean_data_1"]][["ecp"]]
#> [1]    1  301  701 1001
results[["mean_data_1"]][["changepoint"]] <-
  changepoint::cpts(changepoint::cpt.mean(c(mean_data_1)/mad(mean_data_1), method = "PELT"))
results[["mean_data_1"]][["changepoint"]]
#> [1] 300 700
results[["mean_data_1"]][["breakfast"]] <-
  breakfast::breakfast(mean_data_1)$cptmodel.list[[6]]$cpts
results[["mean_data_1"]][["breakfast"]]
#> [1] 300 700
results[["mean_data_1"]][["wbs"]] <-
  wbs::wbs(mean_data_1)$cpt$cpt.ic$mbic.penalty
results[["mean_data_1"]][["wbs"]]
#> [1] 300 700
results[["mean_data_1"]][["mosum"]] <-
  mosum::mosum(c(mean_data_1), G = 40)$cpts.info$cpts
results[["mean_data_1"]][["mosum"]]
#> [1] 300 700
results[["mean_data_1"]][["fpop"]] <-
  fpop::Fpop(mean_data_1, nrow(mean_data_1))$t.est
results[["mean_data_1"]][["fpop"]]
#> [1]  300  700 1000
results[["mean_data_1"]][["gfpop"]] <-
  gfpop::gfpop(
    data = mean_data_1,
    mygraph = gfpop::graph(
      penalty = 2 * log(nrow(mean_data_1)) * gfpop::sdDiff(mean_data_1) ^ 2,
      type = "updown"
    ),
    type = "mean"
  )$changepoints
results[["mean_data_1"]][["gfpop"]]
#> [1]  300  700 1000
results[["mean_data_1"]][["jointseg"]] <-
  jointseg::jointSeg(mean_data_1, K = 2)$bestBkp
results[["mean_data_1"]][["jointseg"]]
#> [1] 300 700
results[["mean_data_1"]][["stepR"]] <-
  stepR::stepFit(mean_data_1, alpha = 0.5)$rightEnd
results[["mean_data_1"]][["stepR"]]
#> [1]  300  700 1000
results[["mean_data_1"]][["cpm"]] <-
  cpm::processStream(mean_data_1, cpmType = "Student")$changePoints
results[["mean_data_1"]][["cpm"]]
#> [1] 299 699
results[["mean_data_1"]][["segmented"]] <-
  segmented::stepmented(
    as.numeric(mean_data_1), npsi = 2
  )$psi[, "Est."]
results[["mean_data_1"]][["segmented"]]
#> psi1.index psi2.index 
#>   300.0813   700.1513
results[["mean_data_1"]][["mcp"]] <- mcp::mcp(
  list(y ~ 1, ~ 1, ~ 1),
  data = data.frame(y = mean_data_1, x = seq_len(nrow(mean_data_1))),
  par_x = "x"
)
#> Compiling model graph
#>    Resolving undeclared variables
#>    Allocating nodes
#> Graph information:
#>    Observed stochastic nodes: 1000
#>    Unobserved stochastic nodes: 6
#>    Total graph size: 17028
#> 
#> Initializing model
#> Finished sampling in 19.3 seconds
if (requireNamespace("mcp", quietly = TRUE)) {
  plot(results[["mean_data_1"]][["mcp"]])
}
plot of chunk univariate-mean-change-mcp-result
plot of chunk univariate-mean-change-mcp-result
results[["mean_data_1"]][["not"]] <-
  not::not(mean_data_1, contrast = "pcwsConstMean")
if (requireNamespace("not", quietly = TRUE)) {
  plot(results[["mean_data_1"]][["not"]])
}
plot of chunk univariate-mean-change-not-result
plot of chunk univariate-mean-change-not-result
results[["mean_data_1"]][["bcp"]] <- bcp::bcp(mean_data_1)
#> Loading required package: bcp
#> Loading required package: grid
if (requireNamespace("bcp", quietly = TRUE)) {
  plot(results[["mean_data_1"]][["bcp"]])
}
plot of chunk univariate-mean-change-bcp-result
plot of chunk univariate-mean-change-bcp-result

Univariate mean and/or variance change

The true change points are 300, 700, 1000, 1300 and 1700. Some methods are plotted due to the un-retrievable change points.

results[["mv_data_1"]][["fastcpd"]] <-
  fastcpd::fastcpd.mv(mv_data_1, r.progress = FALSE)@cp_set
results[["mv_data_1"]][["fastcpd"]]
#> [1]  300  700 1001 1300 1700
results[["mv_data_1"]][["ecp"]] <- ecp::e.divisive(mv_data_1)$estimates
results[["mv_data_1"]][["ecp"]]
#> [1]    1  301  701 1001 1301 1701 2001
results[["mv_data_1"]][["changepoint"]] <-
  changepoint::cpts(changepoint::cpt.meanvar(c(mv_data_1), method = "PELT"))
results[["mv_data_1"]][["changepoint"]]
#> [1]  300  700 1000 1300 1700
results[["mv_data_1"]][["CptNonPar"]] <-
  CptNonPar::np.mojo(mv_data_1, G = floor(length(mv_data_1) / 6))$cpts
results[["mv_data_1"]][["CptNonPar"]]
#> [1]  333  700 1300
results[["mv_data_1"]][["cpm"]] <-
  cpm::processStream(mv_data_1, cpmType = "GLR")$changePoints
results[["mv_data_1"]][["cpm"]]
#>  [1]  293  300  403  408  618  621  696 1000 1021 1024 1293 1300 1417 1693 1700
#> [16] 1981
results[["mv_data_1"]][["mcp"]] <- mcp::mcp(
  list(y ~ 1, ~ 1, ~ 1, ~ 1, ~ 1, ~ 1),
  data = data.frame(y = mv_data_1, x = seq_len(nrow(mv_data_1))),
  par_x = "x"
)
#> Compiling model graph
#>    Resolving undeclared variables
#>    Allocating nodes
#> Graph information:
#>    Observed stochastic nodes: 2000
#>    Unobserved stochastic nodes: 12
#>    Total graph size: 64034
#> 
#> Initializing model
#> Finished sampling in 150.3 seconds
if (requireNamespace("mcp", quietly = TRUE)) {
  plot(results[["mv_data_1"]][["mcp"]])
}
plot of chunk univariate-mean-and-or-variance-change-mcp-result
plot of chunk univariate-mean-and-or-variance-change-mcp-result
results[["mv_data_1"]][["not"]] <-
  not::not(mv_data_1, contrast = "pcwsConstMeanVar")
if (requireNamespace("not", quietly = TRUE)) {
  plot(results[["mv_data_1"]][["not"]])
}
plot of chunk univariate-mean-and-or-variance-change-not-result
plot of chunk univariate-mean-and-or-variance-change-not-result
#> Press [enter] to continue
plot of chunk univariate-mean-and-or-variance-change-not-result
plot of chunk univariate-mean-and-or-variance-change-not-result

Multivariate mean change

The true change points are 300 and 700. Some methods are plotted due to the un-retrievable change points.

results[["mean_data_3"]][["fastcpd"]] <-
  fastcpd::fastcpd.mean(mean_data_3, r.progress = FALSE)@cp_set
results[["mean_data_3"]][["fastcpd"]]
#> [1] 300 700
results[["mean_data_3"]][["CptNonPar"]] <-
  CptNonPar::np.mojo(mean_data_3, G = floor(nrow(mean_data_3) / 6))$cpts
results[["mean_data_3"]][["CptNonPar"]]
#> [1] 300 700
results[["mean_data_3"]][["jointseg"]] <-
  jointseg::jointSeg(mean_data_3, K = 2)$bestBkp
results[["mean_data_3"]][["jointseg"]]
#> [1] 300 700
results[["mean_data_3"]][["strucchange"]] <-
  strucchange::breakpoints(
    cbind(y.1, y.2, y.3) ~ 1, data = data.frame(y = mean_data_3)
  )$breakpoints
results[["mean_data_3"]][["strucchange"]]
#> [1] 300 700
results[["mean_data_3"]][["ecp"]] <- ecp::e.divisive(mean_data_3)$estimates
results[["mean_data_3"]][["ecp"]]
#> [1]    1  301  701 1001
results[["mean_data_3"]][["bcp"]] <- bcp::bcp(mean_data_3)
if (requireNamespace("bcp", quietly = TRUE)) {
  plot(results[["mean_data_3"]][["bcp"]])
}
plot of chunk multivariate-mean-change-bcp-result
plot of chunk multivariate-mean-change-bcp-result

Multivariate mean and/or variance change

The true change points are 300, 700, 1000, 1300 and 1700. Some methods are plotted due to the un-retrievable change points.

results[["mv_data_3"]][["fastcpd"]] <-
  fastcpd::fastcpd.mv(mv_data_3, r.progress = FALSE)@cp_set
results[["mv_data_3"]][["fastcpd"]]
#> [1]  300  700 1000 1300 1700
results[["mv_data_3"]][["ecp"]] <- ecp::e.divisive(mv_data_3)$estimates
results[["mv_data_3"]][["ecp"]]
#> [1]    1  301  701 1001 1301 1701 2001

Linear regression

The true change points are 100 and 200.

results[["lm_data"]][["fastcpd"]] <-
  fastcpd::fastcpd.lm(lm_data, r.progress = FALSE)@cp_set
results[["lm_data"]][["fastcpd"]]
#> [1]  97 201
results[["lm_data"]][["strucchange"]] <-
  strucchange::breakpoints(y ~ . - 1, data = lm_data)$breakpoints
results[["lm_data"]][["strucchange"]]
#> [1] 100 201
results[["lm_data"]][["segmented"]] <-
  segmented::segmented(
    lm(
      y ~ . - 1, data.frame(y = lm_data$y, x = lm_data[, -1], index = seq_len(nrow(lm_data)))
    ),
    seg.Z = ~ index
  )$psi[, "Est."]
results[["lm_data"]][["segmented"]]
#> [1] 233

Logistic regression

The true change point is 300.

results[["binomial_data"]][["fastcpd"]] <-
  fastcpd::fastcpd.binomial(binomial_data, r.progress = FALSE)@cp_set
results[["binomial_data"]][["fastcpd"]]
#> [1] 302
results[["binomial_data"]][["strucchange"]] <-
  strucchange::breakpoints(y ~ . - 1, data = binomial_data)$breakpoints
results[["binomial_data"]][["strucchange"]]
#> [1] 297

Poisson regression

The true change points are 500, 800 and 1000.

results[["poisson_data"]][["fastcpd"]] <-
  fastcpd::fastcpd.poisson(poisson_data, r.progress = FALSE)@cp_set
results[["poisson_data"]][["fastcpd"]]
#> [1]  498  805 1003
results[["poisson_data"]][["strucchange"]] <-
  strucchange::breakpoints(y ~ . - 1, data = poisson_data)$breakpoints
results[["poisson_data"]][["strucchange"]]
#> [1] 935

Lasso

The true change points are 80, 200 and 320.

results[["lasso_data"]][["fastcpd"]] <-
  fastcpd::fastcpd.lasso(lasso_data, r.progress = FALSE)@cp_set
results[["lasso_data"]][["fastcpd"]]
#> [1]  79 199 321
results[["lasso_data"]][["strucchange"]] <-
  strucchange::breakpoints(y ~ . - 1, data = lasso_data)$breakpoints
results[["lasso_data"]][["strucchange"]]
#> [1]  80 200 321

AR(3)

The true change point is 600. Some methods are plotted due to the un-retrievable change points.

results[["ar_data"]][["fastcpd"]] <-
  fastcpd::fastcpd.ar(ar_data, 3, r.progress = FALSE)@cp_set
results[["ar_data"]][["fastcpd"]]
#> [1] 614
results[["ar_data"]][["CptNonPar"]] <-
  CptNonPar::np.mojo(ar_data, G = floor(length(ar_data) / 6))$cpts
results[["ar_data"]][["CptNonPar"]]
#> numeric(0)
results[["ar_data"]][["segmented"]] <-
  segmented::segmented(
    lm(
      y ~ x + 1, data.frame(y = ar_data, x = seq_along(ar_data))
    ),
    seg.Z = ~ x
  )$psi[, "Est."]
results[["ar_data"]][["segmented"]]
#> [1] 690
results[["ar_data"]][["mcp"]] <-
  mcp::mcp(
    list(y ~ 1 + ar(3), ~ 0 + ar(3)),
    data = data.frame(y = ar_data, x = seq_along(ar_data)),
    par_x = "x"
  )
#> Compiling model graph
#>    Resolving undeclared variables
#>    Allocating nodes
#> Graph information:
#>    Observed stochastic nodes: 1000
#>    Unobserved stochastic nodes: 9
#>    Total graph size: 24020
#> 
#> Initializing model
#> Finished sampling in 77.5 seconds
if (requireNamespace("mcp", quietly = TRUE)) {
  plot(results[["ar_data"]][["mcp"]])
}
plot of chunk ar3-mcp-result
plot of chunk ar3-mcp-result

GARCH(1, 1)

The true change point is 200.

results[["garch_data"]][["fastcpd"]] <-
  fastcpd::fastcpd.garch(garch_data, c(1, 1), r.progress = FALSE)@cp_set
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo
results[["garch_data"]][["fastcpd"]]
#> [1] 205
results[["garch_data"]][["CptNonPar"]] <-
  CptNonPar::np.mojo(garch_data, G = floor(length(garch_data) / 6))$cpts
results[["garch_data"]][["CptNonPar"]]
#> [1] 206
results[["garch_data"]][["strucchange"]] <-
  strucchange::breakpoints(x ~ 1, data = data.frame(x = garch_data))$breakpoints
results[["garch_data"]][["strucchange"]]
#> [1] NA

VAR(2)

The true change points is 500.

results[["var_data"]][["fastcpd"]] <-
  fastcpd::fastcpd.var(var_data, 2, r.progress = FALSE)@cp_set
results[["var_data"]][["fastcpd"]]
#> [1] 500
results[["var_data"]][["VARDetect"]] <- VARDetect::tbss(var_data)$cp
results[["var_data"]][["VARDetect"]]
#> [1] 501

Detection comparison using well_log

well_log <- fastcpd::well_log
well_log <- well_log[well_log > 1e5]

results[["well_log"]] <- list(
  fastcpd = fastcpd::fastcpd.mean(well_log, trim = 0.003)@cp_set,
  changepoint = changepoint::cpts(changepoint::cpt.mean(well_log/mad(well_log), method = "PELT")),
  CptNonPar =
    CptNonPar::np.mojo(well_log, G = floor(length(well_log) / 6))$cpts,
  strucchange = strucchange::breakpoints(
    y ~ 1, data = data.frame(y = well_log)
  )$breakpoints,
  ecp = ecp::e.divisive(matrix(well_log))$estimates,
  breakfast = breakfast::breakfast(well_log)$cptmodel.list[[6]]$cpts,
  wbs = wbs::wbs(well_log)$cpt$cpt.ic$mbic.penalty,
  mosum = mosum::mosum(c(well_log), G = 40)$cpts.info$cpts,
  # fpop = fpop::Fpop(well_log, length(well_log))$t.est,  # meaningless
  gfpop = gfpop::gfpop(
    data = well_log,
    mygraph = gfpop::graph(
      penalty = 2 * log(length(well_log)) * gfpop::sdDiff(well_log) ^ 2,
      type = "updown"
    ),
    type = "mean"
  )$changepoints,
  jointseg = jointseg::jointSeg(well_log, K = 12)$bestBkp,
  stepR = stepR::stepFit(well_log, alpha = 0.5)$rightEnd
)
results[["well_log"]]
#> $fastcpd
#>  [1]   12  572  704  776 1021 1057 1198 1347 1406 1502 1660 1842 2023 2202 2384
#> [16] 2445 2507 2567 2749 2926 3094 3107 3509 3622 3709 3820 3976
#> 
#> $changepoint
#>  [1]    6 1021 1057 1502 1661 1842 2023 2385 2445 2507 2567 2745
#> 
#> $CptNonPar
#> [1] 1021 1681 2022 2738
#> 
#> $strucchange
#> [1] 1057 1660 2568 3283
#> 
#> $ecp
#>  [1]    1   33  315  435  567  705  803 1026 1058 1348 1503 1662 1843 2024 2203
#> [16] 2386 2446 2508 2569 2745 2780 2922 3073 3136 3252 3465 3500 3554 3623 3710
#> [31] 3821 3868 3990
#> 
#> $breakfast
#>  [1]   33  310  434  572  704  779 1021 1057 1347 1502 1659 1842 2021 2032 2202
#> [16] 2384 2446 2507 2567 2747 2779 2926 3094 3106 3125 3283 3464 3499 3622 3709
#> [31] 3806 3835 3848 3877 3896 3976
#> 
#> $wbs
#>  [1] 2568 1057 1661 1842 2385 2023 1502 2445 2744    6 2507 1021 3709 3820 1347
#> [16]  434 1200 3131 1197  776  704 3509 1402 3976 3622 3104 3094  314 2921 3251
#> [31] 3848 3464 3906 2202 2779   60 3904  566   12 3639 3636    7  706    8 1408
#> 
#> $mosum
#>  [1]    6  434  704 1017 1057 1325 1502 1661 1842 2023 2385 2445 2507 2567 2744
#> [16] 3060 3438 3509 3610 3697 3820 3867 3976
#> 
#> $gfpop
#>  [1]    6    7    8   12  314  434  556  560  704  776 1021 1057 1197 1200 1347
#> [16] 1364 1405 1407 1491 1502 1661 1842 2023 2385 2445 2507 2567 2664 2747 2752
#> [31] 2921 3094 3104 3125 3251 3464 3499 3622 3709 3820 3976 3989
#> 
#> $jointseg
#>  [1]    6 1021 1057 1502 1661 1842 2022 2384 2445 2507 2568 2738
#> 
#> $stepR
#>  [1]    7   14  314  434  566  704  776 1021 1057 1197 1200 1347 1405 1407 1502
#> [16] 1661 1694 1842 2023 2202 2385 2445 2507 2567 2747 2752 2921 3094 3104 3125
#> [31] 3251 3464 3499 3609 3658 3709 3820 3867 3905 3976 3989
package_list <- sort(names(results[["well_log"]]), decreasing = TRUE)
comparison_table <- NULL
for (package_index in seq_along(package_list)) {
  package <- package_list[[package_index]]
  comparison_table <- rbind(
    comparison_table,
    data.frame(
      change_point = results[["well_log"]][[package]],
      package = package,
      y_offset = (package_index - 1) * 1000
    )
  )
}

most_selected <- sort(table(comparison_table$change_point), decreasing = TRUE)
most_selected <- sort(as.numeric(names(most_selected[most_selected >= 4])))
for (i in seq_len(length(most_selected) - 1)) {
  if (most_selected[i + 1] - most_selected[i] < 2) {
    most_selected[i] <- NA
    most_selected[i + 1] <- most_selected[i + 1] - 0.5
  }
}
(most_selected <- most_selected[!is.na(most_selected)])
#>  [1]    6  434  704  776 1021 1057 1347 1502 1661 1842 2023 2202 2385 2445 2507
#> [16] 2567 3094 3464 3622 3709 3820 3976
if (requireNamespace("ggplot2", quietly = TRUE)) {
  ggplot2::ggplot() +
    ggplot2::geom_point(
      data = data.frame(x = seq_along(well_log), y = c(well_log)),
      ggplot2::aes(x = x, y = y)
    ) +
    ggplot2::geom_vline(
      xintercept = most_selected,
      color = "black",
      linetype = "dashed",
      alpha = 0.2
    ) +
    ggplot2::geom_point(
      data = comparison_table,
      ggplot2::aes(x = change_point, y = 50000 + y_offset, color = package),
      shape = 17,
      size = 1.9
    ) +
    ggplot2::geom_hline(
      data = comparison_table,
      ggplot2::aes(yintercept = 50000 + y_offset, color = package),
      linetype = "dashed",
      alpha = 0.1
    ) +
    ggplot2::coord_cartesian(
      ylim = c(50000 - 500, max(well_log) + 1000),
      xlim = c(-200, length(well_log) + 200),
      expand = FALSE
    ) +
    ggplot2::theme(
      panel.background = ggplot2::element_blank(),
      panel.border = ggplot2::element_rect(colour = "black", fill = NA),
      panel.grid.major = ggplot2::element_blank(),
      panel.grid.minor = ggplot2::element_blank()
    ) +
    ggplot2::xlab(NULL) + ggplot2::ylab(NULL)
}
plot of chunk detection-comparison-well-log-plot
plot of chunk detection-comparison-well-log-plot

Time comparison using well_log

results[["microbenchmark"]] <- microbenchmark::microbenchmark(
  fastcpd = fastcpd::fastcpd.mean(well_log, trim = 0.003, r.progress = FALSE),
  changepoint = changepoint::cpt.mean(well_log/mad(well_log), method = "PELT"),
  CptNonPar = CptNonPar::np.mojo(well_log, G = floor(length(well_log) / 6)),
  strucchange =
    strucchange::breakpoints(y ~ 1, data = data.frame(y = well_log)),
  ecp = ecp::e.divisive(matrix(well_log)),
  breakfast = breakfast::breakfast(well_log),
  wbs = wbs::wbs(well_log),
  mosum = mosum::mosum(c(well_log), G = 40),
  fpop = fpop::Fpop(well_log, nrow(well_log)),
  gfpop = gfpop::gfpop(
    data = well_log,
    mygraph = gfpop::graph(
      penalty = 2 * log(length(well_log)) * gfpop::sdDiff(well_log) ^ 2,
      type = "updown"
    ),
    type = "mean"
  ),
  jointseg = jointseg::jointSeg(well_log, K = 12),
  stepR = stepR::stepFit(well_log, alpha = 0.5),
  not = not::not(well_log, contrast = "pcwsConstMean"),
  times = 10
)
results[["microbenchmark"]]
#> Unit: microseconds
#>         expr          min           lq         mean       median           uq
#>      fastcpd 1.432566e+05 1.441787e+05 1.487573e+05 1.487723e+05 1.504056e+05
#>  changepoint 1.201330e+05 1.210716e+05 1.224048e+05 1.219767e+05 1.236914e+05
#>    CptNonPar 7.770534e+06 7.879313e+06 8.154598e+06 8.073969e+06 8.481170e+06
#>  strucchange 3.470085e+07 3.484380e+07 3.527593e+07 3.512008e+07 3.540097e+07
#>          ecp 2.781250e+08 2.822974e+08 2.953586e+08 2.952175e+08 3.042787e+08
#>    breakfast 4.747592e+06 4.783883e+06 4.877119e+06 4.839840e+06 4.971219e+06
#>          wbs 5.636799e+04 5.654363e+04 5.865315e+04 5.779885e+04 5.904718e+04
#>        mosum 8.551370e+02 9.224180e+02 1.420199e+03 9.635615e+02 2.363978e+03
#>         fpop 1.259028e+03 1.290721e+03 3.643412e+03 1.664846e+03 2.532652e+03
#>        gfpop 3.262276e+04 3.289951e+04 3.426555e+04 3.367465e+04 3.490031e+04
#>     jointseg 1.135598e+04 1.192050e+04 1.376823e+04 1.228512e+04 1.305715e+04
#>        stepR 1.755264e+05 1.763980e+05 1.805602e+05 1.778727e+05 1.788489e+05
#>          not 5.285913e+04 5.315892e+04 5.415099e+04 5.393064e+04 5.470171e+04
#>           max neval
#>  1.600519e+05    10
#>  1.254676e+05    10
#>  8.737743e+06    10
#>  3.704112e+07    10
#>  3.143164e+08    10
#>  5.092928e+06    10
#>  6.612705e+04    10
#>  2.852042e+03    10
#>  2.065100e+04    10
#>  3.829789e+04    10
#>  2.500828e+04    10
#>  2.072325e+05    10
#>  5.629927e+04    10
if (requireNamespace("ggplot2", quietly = TRUE) && requireNamespace("microbenchmark", quietly = TRUE)) {
  ggplot2::autoplot(results[["microbenchmark"]])
}
plot of chunk time-comparison-well-log-plot
plot of chunk time-comparison-well-log-plot

Notes

This document is generated by the following code:

R -e 'knitr::knit("vignettes/comparison-packages.Rmd.original", output = "vignettes/comparison-packages.Rmd")' && rm -rf vignettes/comparison-packages && mv -f comparison-packages vignettes

Acknowledgements

  • Dr. Vito Muggeo, author of the segmented package for the tips about the piece-wise constant function.
  • Dr. Rebecca Killick, author of the changepoint package for the tips about the package update.

Appendix: all code snippets

knitr::opts_chunk$set(
  collapse = TRUE, comment = "#>", eval = TRUE, cache = FALSE,
  warning = FALSE, fig.width = 8, fig.height = 5,
  fig.path="comparison-packages/"
)

# devtools::install_github(c("swang87/bcp", "veseshan/DNAcopy", "vrunge/gfpop"))
# install.packages(c("bcp", "changepoint", "cpm", "CptNonPar", "strucchange", "ecp", "breakfast", "wbs", "mcp", "mosum", "not", "fpop", "jointseg", "microbenchmark", "segmented", "stepR", "VARDetect"))

if (requireNamespace("microbenchmark", quietly = TRUE)) {
  library(microbenchmark)
}

if (file.exists("comparison-packages-results.RData")) {
  # Available at https://pcloud.xingchi.li/comparison-packages-results.RData
  load("comparison-packages-results.RData")
} else {
  results <- list()
}
# Univariate mean change
set.seed(1)
p <- 1
mean_data_1 <- rbind(
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(100, p)),
  mvtnorm::rmvnorm(400, mean = rep(50, p), sigma = diag(100, p)),
  mvtnorm::rmvnorm(300, mean = rep(2, p), sigma = diag(100, p))
)

plot.ts(mean_data_1)
# Univariate mean and/or variance change
set.seed(1)
p <- 1
mv_data_1 <- rbind(
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(400, mean = rep(10, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(100, p)),
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(400, mean = rep(10, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(300, mean = rep(10, p), sigma = diag(100, p))
)

plot.ts(mv_data_1)
# Multivariate mean change
set.seed(1)
p <- 3
mean_data_3 <- rbind(
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(100, p)),
  mvtnorm::rmvnorm(400, mean = rep(50, p), sigma = diag(100, p)),
  mvtnorm::rmvnorm(300, mean = rep(2, p), sigma = diag(100, p))
)

plot.ts(mean_data_3)
# Multivariate mean and/or variance change
set.seed(1)
p <- 4
mv_data_3 <- rbind(
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(400, mean = rep(10, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(100, p)),
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(400, mean = rep(10, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(300, mean = rep(10, p), sigma = diag(100, p))
)

plot.ts(mv_data_3)
# Linear regression
set.seed(1)
n <- 300
p <- 4
x <- mvtnorm::rmvnorm(n, rep(0, p), diag(p))
theta_0 <- rbind(c(1, 3.2, -1, 0), c(-1, -0.5, 2.5, -2), c(0.8, 0, 1, 2))
y <- c(
  x[1:100, ] %*% theta_0[1, ] + rnorm(100, 0, 3),
  x[101:200, ] %*% theta_0[2, ] + rnorm(100, 0, 3),
  x[201:n, ] %*% theta_0[3, ] + rnorm(100, 0, 3)
)
lm_data <- data.frame(y = y, x = x)

plot.ts(lm_data)
# Logistic regression
set.seed(1)
n <- 500
p <- 4
x <- mvtnorm::rmvnorm(n, rep(0, p), diag(p))
theta <- rbind(rnorm(p, 0, 1), rnorm(p, 2, 1))
y <- c(
  rbinom(300, 1, 1 / (1 + exp(-x[1:300, ] %*% theta[1, ]))),
  rbinom(200, 1, 1 / (1 + exp(-x[301:n, ] %*% theta[2, ])))
)
binomial_data <- data.frame(y = y, x = x)

plot.ts(binomial_data)
# Poisson regression
set.seed(1)
n <- 1100
p <- 3
x <- mvtnorm::rmvnorm(n, rep(0, p), diag(p))
delta <- rnorm(p)
theta_0 <- c(1, 0.3, -1)
y <- c(
  rpois(500, exp(x[1:500, ] %*% theta_0)),
  rpois(300, exp(x[501:800, ] %*% (theta_0 + delta))),
  rpois(200, exp(x[801:1000, ] %*% theta_0)),
  rpois(100, exp(x[1001:1100, ] %*% (theta_0 - delta)))
)
poisson_data <- data.frame(y = y, x = x)

plot.ts(log(poisson_data$y))
plot.ts(poisson_data[, -1])
# Lasso
set.seed(1)
n <- 480
p_true <- 6
p <- 50
x <- mvtnorm::rmvnorm(n, rep(0, p), diag(p))
theta_0 <- rbind(
  runif(p_true, -5, -2),
  runif(p_true, -3, 3),
  runif(p_true, 2, 5),
  runif(p_true, -5, 5)
)
theta_0 <- cbind(theta_0, matrix(0, ncol = p - p_true, nrow = 4))
y <- c(
  x[1:80, ] %*% theta_0[1, ] + rnorm(80, 0, 1),
  x[81:200, ] %*% theta_0[2, ] + rnorm(120, 0, 1),
  x[201:320, ] %*% theta_0[3, ] + rnorm(120, 0, 1),
  x[321:n, ] %*% theta_0[4, ] + rnorm(160, 0, 1)
)
lasso_data <- data.frame(y = y, x = x)

plot.ts(lasso_data[, seq_len(p_true + 1)])
# AR(3)
set.seed(1)
n <- 1000
x <- rep(0, n + 3)
for (i in 1:600) {
  x[i + 3] <- 0.6 * x[i + 2] - 0.2 * x[i + 1] + 0.1 * x[i] + rnorm(1, 0, 3)
}
for (i in 601:1000) {
  x[i + 3] <- 0.3 * x[i + 2] + 0.4 * x[i + 1] + 0.2 * x[i] + rnorm(1, 0, 3)
}
ar_data <- x[-seq_len(3)]

plot.ts(ar_data)
# GARCH(1, 1)
set.seed(1)
n <- 400
sigma_2 <- rep(1, n + 1)
x <- rep(0, n + 1)
for (i in seq_len(200)) {
  sigma_2[i + 1] <- 20 + 0.5 * x[i]^2 + 0.1 * sigma_2[i]
  x[i + 1] <- rnorm(1, 0, sqrt(sigma_2[i + 1]))
}
for (i in 201:400) {
  sigma_2[i + 1] <- 1 + 0.1 * x[i]^2 + 0.5 * sigma_2[i]
  x[i + 1] <- rnorm(1, 0, sqrt(sigma_2[i + 1]))
}
garch_data <- x[-1]

plot.ts(garch_data)
# VAR(2)
set.seed(1)
n <- 800
p <- 2
theta_1 <- matrix(c(-0.3, 0.6, -0.5, 0.4, 0.2, 0.2, 0.2, -0.2), nrow = p)
theta_2 <- matrix(c(0.3, -0.4, 0.1, -0.5, -0.5, -0.2, -0.5, 0.2), nrow = p)
x <- matrix(0, n + 2, p)
for (i in 1:500) {
  x[i + 2, ] <- theta_1 %*% c(x[i + 1, ], x[i, ]) + rnorm(p, 0, 1)
}
for (i in 501:n) {
  x[i + 2, ] <- theta_2 %*% c(x[i + 1, ], x[i, ]) + rnorm(p, 0, 1)
}
var_data <- x[-seq_len(2), ]

plot.ts(var_data)
results[["mean_data_1"]][["fastcpd"]] <-
  fastcpd::fastcpd.mean(mean_data_1, r.progress = FALSE)@cp_set
results[["mean_data_1"]][["fastcpd"]]
testthat::expect_equal(results[["mean_data_1"]][["fastcpd"]], c(300, 700), tolerance = 0.2)
results[["mean_data_1"]][["CptNonPar"]] <-
  CptNonPar::np.mojo(mean_data_1, G = floor(length(mean_data_1) / 6))$cpts
results[["mean_data_1"]][["CptNonPar"]]
testthat::expect_equal(results[["mean_data_1"]][["CptNonPar"]], c(300, 700), tolerance = 0.2)
results[["mean_data_1"]][["strucchange"]] <-
  strucchange::breakpoints(y ~ 1, data = data.frame(y = mean_data_1))$breakpoints
results[["mean_data_1"]][["strucchange"]]
testthat::expect_equal(results[["mean_data_1"]][["strucchange"]], c(300, 700), tolerance = 0.2)
results[["mean_data_1"]][["ecp"]] <- ecp::e.divisive(mean_data_1)$estimates
results[["mean_data_1"]][["ecp"]]
testthat::expect_equal(results[["mean_data_1"]][["ecp"]], c(1, 301, 701, 1001), tolerance = 0.2)
results[["mean_data_1"]][["changepoint"]] <-
  changepoint::cpts(changepoint::cpt.mean(c(mean_data_1)/mad(mean_data_1), method = "PELT"))
results[["mean_data_1"]][["changepoint"]]
testthat::expect_equal(results[["mean_data_1"]][["changepoint"]], c(300, 700), tolerance = 0.2)
results[["mean_data_1"]][["breakfast"]] <-
  breakfast::breakfast(mean_data_1)$cptmodel.list[[6]]$cpts
results[["mean_data_1"]][["breakfast"]]
testthat::expect_equal(results[["mean_data_1"]][["breakfast"]], c(300, 700), tolerance = 0.2)
results[["mean_data_1"]][["wbs"]] <-
  wbs::wbs(mean_data_1)$cpt$cpt.ic$mbic.penalty
results[["mean_data_1"]][["wbs"]]
testthat::expect_equal(results[["mean_data_1"]][["wbs"]], c(300, 700), tolerance = 0.2)
results[["mean_data_1"]][["mosum"]] <-
  mosum::mosum(c(mean_data_1), G = 40)$cpts.info$cpts
results[["mean_data_1"]][["mosum"]]
testthat::expect_equal(results[["mean_data_1"]][["mosum"]], c(300, 700), tolerance = 0.2)
results[["mean_data_1"]][["fpop"]] <-
  fpop::Fpop(mean_data_1, nrow(mean_data_1))$t.est
results[["mean_data_1"]][["fpop"]]
testthat::expect_equal(results[["mean_data_1"]][["fpop"]], c(300, 700, 1000), tolerance = 0.2)
results[["mean_data_1"]][["gfpop"]] <-
  gfpop::gfpop(
    data = mean_data_1,
    mygraph = gfpop::graph(
      penalty = 2 * log(nrow(mean_data_1)) * gfpop::sdDiff(mean_data_1) ^ 2,
      type = "updown"
    ),
    type = "mean"
  )$changepoints
results[["mean_data_1"]][["gfpop"]]
testthat::expect_equal(results[["mean_data_1"]][["gfpop"]], c(300, 700, 1000), tolerance = 0.2)
results[["mean_data_1"]][["jointseg"]] <-
  jointseg::jointSeg(mean_data_1, K = 2)$bestBkp
results[["mean_data_1"]][["jointseg"]]
testthat::expect_equal(results[["mean_data_1"]][["jointseg"]], c(300, 700), tolerance = 0.2)
results[["mean_data_1"]][["stepR"]] <-
  stepR::stepFit(mean_data_1, alpha = 0.5)$rightEnd
results[["mean_data_1"]][["stepR"]]
testthat::expect_equal(results[["mean_data_1"]][["stepR"]], c(300, 700, 1000), tolerance = 0.2)
results[["mean_data_1"]][["cpm"]] <-
  cpm::processStream(mean_data_1, cpmType = "Student")$changePoints
results[["mean_data_1"]][["cpm"]]
testthat::expect_equal(results[["mean_data_1"]][["cpm"]], c(299, 699), tolerance = 0.2)
results[["mean_data_1"]][["segmented"]] <-
  segmented::stepmented(
    as.numeric(mean_data_1), npsi = 2
  )$psi[, "Est."]
results[["mean_data_1"]][["segmented"]]
testthat::expect_equal(results[["mean_data_1"]][["segmented"]], c(298, 699), ignore_attr = TRUE, tolerance = 0.2)
results[["mean_data_1"]][["mcp"]] <- mcp::mcp(
  list(y ~ 1, ~ 1, ~ 1),
  data = data.frame(y = mean_data_1, x = seq_len(nrow(mean_data_1))),
  par_x = "x"
)
if (requireNamespace("mcp", quietly = TRUE)) {
  plot(results[["mean_data_1"]][["mcp"]])
}
results[["mean_data_1"]][["not"]] <-
  not::not(mean_data_1, contrast = "pcwsConstMean")
if (requireNamespace("not", quietly = TRUE)) {
  plot(results[["mean_data_1"]][["not"]])
}
results[["mean_data_1"]][["bcp"]] <- bcp::bcp(mean_data_1)
if (requireNamespace("bcp", quietly = TRUE)) {
  plot(results[["mean_data_1"]][["bcp"]])
}
results[["mv_data_1"]][["fastcpd"]] <-
  fastcpd::fastcpd.mv(mv_data_1, r.progress = FALSE)@cp_set
results[["mv_data_1"]][["fastcpd"]]
testthat::expect_equal(results[["mv_data_1"]][["fastcpd"]], c(300, 700, 1001, 1300, 1700), tolerance = 0.2)
results[["mv_data_1"]][["ecp"]] <- ecp::e.divisive(mv_data_1)$estimates
results[["mv_data_1"]][["ecp"]]
testthat::expect_equal(results[["mv_data_1"]][["ecp"]], c(1, 301, 701, 1001, 1301, 1701, 2001), tolerance = 0.2)
results[["mv_data_1"]][["changepoint"]] <-
  changepoint::cpts(changepoint::cpt.meanvar(c(mv_data_1), method = "PELT"))
results[["mv_data_1"]][["changepoint"]]
testthat::expect_equal(results[["mv_data_1"]][["changepoint"]], c(300, 700, 1000, 1300, 1700), tolerance = 0.2)
results[["mv_data_1"]][["CptNonPar"]] <-
  CptNonPar::np.mojo(mv_data_1, G = floor(length(mv_data_1) / 6))$cpts
results[["mv_data_1"]][["CptNonPar"]]
testthat::expect_equal(results[["mv_data_1"]][["CptNonPar"]], c(333, 700, 1300), tolerance = 0.2)
results[["mv_data_1"]][["cpm"]] <-
  cpm::processStream(mv_data_1, cpmType = "GLR")$changePoints
results[["mv_data_1"]][["cpm"]]
testthat::expect_equal(results[["mv_data_1"]][["cpm"]], c(293, 300, 403, 408, 618, 621, 696, 1000, 1021, 1024, 1293, 1300, 1417, 1693, 1700, 1981), tolerance = 0.2)
results[["mv_data_1"]][["mcp"]] <- mcp::mcp(
  list(y ~ 1, ~ 1, ~ 1, ~ 1, ~ 1, ~ 1),
  data = data.frame(y = mv_data_1, x = seq_len(nrow(mv_data_1))),
  par_x = "x"
)
if (requireNamespace("mcp", quietly = TRUE)) {
  plot(results[["mv_data_1"]][["mcp"]])
}
results[["mv_data_1"]][["not"]] <-
  not::not(mv_data_1, contrast = "pcwsConstMeanVar")
if (requireNamespace("not", quietly = TRUE)) {
  plot(results[["mv_data_1"]][["not"]])
}
results[["mean_data_3"]][["fastcpd"]] <-
  fastcpd::fastcpd.mean(mean_data_3, r.progress = FALSE)@cp_set
results[["mean_data_3"]][["fastcpd"]]
testthat::expect_equal(results[["mean_data_3"]][["fastcpd"]], c(300, 700), tolerance = 0.2)
results[["mean_data_3"]][["CptNonPar"]] <-
  CptNonPar::np.mojo(mean_data_3, G = floor(nrow(mean_data_3) / 6))$cpts
results[["mean_data_3"]][["CptNonPar"]]
testthat::expect_equal(results[["mean_data_3"]][["CptNonPar"]], c(300, 700), tolerance = 0.2)
results[["mean_data_3"]][["jointseg"]] <-
  jointseg::jointSeg(mean_data_3, K = 2)$bestBkp
results[["mean_data_3"]][["jointseg"]]
testthat::expect_equal(results[["mean_data_3"]][["jointseg"]], c(300, 700), tolerance = 0.2)
results[["mean_data_3"]][["strucchange"]] <-
  strucchange::breakpoints(
    cbind(y.1, y.2, y.3) ~ 1, data = data.frame(y = mean_data_3)
  )$breakpoints
results[["mean_data_3"]][["strucchange"]]
testthat::expect_equal(results[["mean_data_3"]][["strucchange"]], c(300, 700), tolerance = 0.2)
results[["mean_data_3"]][["ecp"]] <- ecp::e.divisive(mean_data_3)$estimates
results[["mean_data_3"]][["ecp"]]
testthat::expect_equal(results[["mean_data_3"]][["ecp"]], c(1, 301, 701, 1001), tolerance = 0.2)
results[["mean_data_3"]][["bcp"]] <- bcp::bcp(mean_data_3)
if (requireNamespace("bcp", quietly = TRUE)) {
  plot(results[["mean_data_3"]][["bcp"]])
}
results[["mv_data_3"]][["fastcpd"]] <-
  fastcpd::fastcpd.mv(mv_data_3, r.progress = FALSE)@cp_set
results[["mv_data_3"]][["fastcpd"]]
testthat::expect_equal(results[["mv_data_3"]][["fastcpd"]], c(300, 700, 1000, 1300, 1700), tolerance = 0.2)
results[["mv_data_3"]][["ecp"]] <- ecp::e.divisive(mv_data_3)$estimates
results[["mv_data_3"]][["ecp"]]
testthat::expect_equal(results[["mv_data_3"]][["ecp"]], c(1, 301, 701, 1001, 1301, 1701, 2001), tolerance = 0.2)
results[["lm_data"]][["fastcpd"]] <-
  fastcpd::fastcpd.lm(lm_data, r.progress = FALSE)@cp_set
results[["lm_data"]][["fastcpd"]]
testthat::expect_equal(results[["lm_data"]][["fastcpd"]], c(97, 201), tolerance = 0.2)
results[["lm_data"]][["strucchange"]] <-
  strucchange::breakpoints(y ~ . - 1, data = lm_data)$breakpoints
results[["lm_data"]][["strucchange"]]
testthat::expect_equal(results[["lm_data"]][["strucchange"]], c(100, 201), tolerance = 0.2)
results[["lm_data"]][["segmented"]] <-
  segmented::segmented(
    lm(
      y ~ . - 1, data.frame(y = lm_data$y, x = lm_data[, -1], index = seq_len(nrow(lm_data)))
    ),
    seg.Z = ~ index
  )$psi[, "Est."]
results[["lm_data"]][["segmented"]]
testthat::expect_equal(results[["lm_data"]][["segmented"]], c(233), ignore_attr = TRUE, tolerance = 0.2)
results[["binomial_data"]][["fastcpd"]] <-
  fastcpd::fastcpd.binomial(binomial_data, r.progress = FALSE)@cp_set
results[["binomial_data"]][["fastcpd"]]
testthat::expect_equal(results[["binomial_data"]][["fastcpd"]], 302, tolerance = 0.2)
results[["binomial_data"]][["strucchange"]] <-
  strucchange::breakpoints(y ~ . - 1, data = binomial_data)$breakpoints
results[["binomial_data"]][["strucchange"]]
testthat::expect_equal(results[["binomial_data"]][["strucchange"]], 297, tolerance = 0.2)
results[["poisson_data"]][["fastcpd"]] <-
  fastcpd::fastcpd.poisson(poisson_data, r.progress = FALSE)@cp_set
results[["poisson_data"]][["fastcpd"]]
testthat::expect_equal(results[["poisson_data"]][["fastcpd"]], c(498, 805, 1003), tolerance = 0.2)
results[["poisson_data"]][["strucchange"]] <-
  strucchange::breakpoints(y ~ . - 1, data = poisson_data)$breakpoints
results[["poisson_data"]][["strucchange"]]
testthat::expect_equal(results[["poisson_data"]][["strucchange"]], 935, tolerance = 0.2)
results[["lasso_data"]][["fastcpd"]] <-
  fastcpd::fastcpd.lasso(lasso_data, r.progress = FALSE)@cp_set
results[["lasso_data"]][["fastcpd"]]
testthat::expect_equal(results[["lasso_data"]][["fastcpd"]], c(79, 199, 320), tolerance = 0.2)
results[["lasso_data"]][["strucchange"]] <-
  strucchange::breakpoints(y ~ . - 1, data = lasso_data)$breakpoints
results[["lasso_data"]][["strucchange"]]
testthat::expect_equal(results[["lasso_data"]][["strucchange"]], c(80, 200, 321), tolerance = 0.2)
results[["ar_data"]][["fastcpd"]] <-
  fastcpd::fastcpd.ar(ar_data, 3, r.progress = FALSE)@cp_set
results[["ar_data"]][["fastcpd"]]
testthat::expect_equal(results[["ar_data"]][["fastcpd"]], c(614), tolerance = 0.2)
results[["ar_data"]][["CptNonPar"]] <-
  CptNonPar::np.mojo(ar_data, G = floor(length(ar_data) / 6))$cpts
results[["ar_data"]][["CptNonPar"]]
testthat::expect_equal(results[["ar_data"]][["CptNonPar"]], numeric(0), tolerance = 0.2)
results[["ar_data"]][["segmented"]] <-
  segmented::segmented(
    lm(
      y ~ x + 1, data.frame(y = ar_data, x = seq_along(ar_data))
    ),
    seg.Z = ~ x
  )$psi[, "Est."]
results[["ar_data"]][["segmented"]]
testthat::expect_equal(results[["ar_data"]][["segmented"]], c(690), ignore_attr = TRUE, tolerance = 0.2)
results[["ar_data"]][["mcp"]] <-
  mcp::mcp(
    list(y ~ 1 + ar(3), ~ 0 + ar(3)),
    data = data.frame(y = ar_data, x = seq_along(ar_data)),
    par_x = "x"
  )
if (requireNamespace("mcp", quietly = TRUE)) {
  plot(results[["ar_data"]][["mcp"]])
}
results[["garch_data"]][["fastcpd"]] <-
  fastcpd::fastcpd.garch(garch_data, c(1, 1), r.progress = FALSE)@cp_set
results[["garch_data"]][["fastcpd"]]
testthat::expect_equal(results[["garch_data"]][["fastcpd"]], c(205), tolerance = 0.2)
results[["garch_data"]][["CptNonPar"]] <-
  CptNonPar::np.mojo(garch_data, G = floor(length(garch_data) / 6))$cpts
results[["garch_data"]][["CptNonPar"]]
testthat::expect_equal(results[["garch_data"]][["CptNonPar"]], c(206), tolerance = 0.2)
results[["garch_data"]][["strucchange"]] <-
  strucchange::breakpoints(x ~ 1, data = data.frame(x = garch_data))$breakpoints
results[["garch_data"]][["strucchange"]]
testthat::expect_equal(results[["garch_data"]][["strucchange"]], NA, tolerance = 0.2)
results[["var_data"]][["fastcpd"]] <-
  fastcpd::fastcpd.var(var_data, 2, r.progress = FALSE)@cp_set
results[["var_data"]][["fastcpd"]]
testthat::expect_equal(results[["var_data"]][["fastcpd"]], c(500), tolerance = 0.2)
results[["var_data"]][["VARDetect"]] <- VARDetect::tbss(var_data)$cp
results[["var_data"]][["VARDetect"]]
testthat::expect_equal(results[["var_data"]][["VARDetect"]], c(501), tolerance = 0.2)
well_log <- fastcpd::well_log
well_log <- well_log[well_log > 1e5]

results[["well_log"]] <- list(
  fastcpd = fastcpd::fastcpd.mean(well_log, trim = 0.003)@cp_set,
  changepoint = changepoint::cpts(changepoint::cpt.mean(well_log/mad(well_log), method = "PELT")),
  CptNonPar =
    CptNonPar::np.mojo(well_log, G = floor(length(well_log) / 6))$cpts,
  strucchange = strucchange::breakpoints(
    y ~ 1, data = data.frame(y = well_log)
  )$breakpoints,
  ecp = ecp::e.divisive(matrix(well_log))$estimates,
  breakfast = breakfast::breakfast(well_log)$cptmodel.list[[6]]$cpts,
  wbs = wbs::wbs(well_log)$cpt$cpt.ic$mbic.penalty,
  mosum = mosum::mosum(c(well_log), G = 40)$cpts.info$cpts,
  # fpop = fpop::Fpop(well_log, length(well_log))$t.est,  # meaningless
  gfpop = gfpop::gfpop(
    data = well_log,
    mygraph = gfpop::graph(
      penalty = 2 * log(length(well_log)) * gfpop::sdDiff(well_log) ^ 2,
      type = "updown"
    ),
    type = "mean"
  )$changepoints,
  jointseg = jointseg::jointSeg(well_log, K = 12)$bestBkp,
  stepR = stepR::stepFit(well_log, alpha = 0.5)$rightEnd
)
results[["well_log"]]
package_list <- sort(names(results[["well_log"]]), decreasing = TRUE)
comparison_table <- NULL
for (package_index in seq_along(package_list)) {
  package <- package_list[[package_index]]
  comparison_table <- rbind(
    comparison_table,
    data.frame(
      change_point = results[["well_log"]][[package]],
      package = package,
      y_offset = (package_index - 1) * 1000
    )
  )
}

most_selected <- sort(table(comparison_table$change_point), decreasing = TRUE)
most_selected <- sort(as.numeric(names(most_selected[most_selected >= 4])))
for (i in seq_len(length(most_selected) - 1)) {
  if (most_selected[i + 1] - most_selected[i] < 2) {
    most_selected[i] <- NA
    most_selected[i + 1] <- most_selected[i + 1] - 0.5
  }
}
(most_selected <- most_selected[!is.na(most_selected)])
if (requireNamespace("ggplot2", quietly = TRUE)) {
  ggplot2::ggplot() +
    ggplot2::geom_point(
      data = data.frame(x = seq_along(well_log), y = c(well_log)),
      ggplot2::aes(x = x, y = y)
    ) +
    ggplot2::geom_vline(
      xintercept = most_selected,
      color = "black",
      linetype = "dashed",
      alpha = 0.2
    ) +
    ggplot2::geom_point(
      data = comparison_table,
      ggplot2::aes(x = change_point, y = 50000 + y_offset, color = package),
      shape = 17,
      size = 1.9
    ) +
    ggplot2::geom_hline(
      data = comparison_table,
      ggplot2::aes(yintercept = 50000 + y_offset, color = package),
      linetype = "dashed",
      alpha = 0.1
    ) +
    ggplot2::coord_cartesian(
      ylim = c(50000 - 500, max(well_log) + 1000),
      xlim = c(-200, length(well_log) + 200),
      expand = FALSE
    ) +
    ggplot2::theme(
      panel.background = ggplot2::element_blank(),
      panel.border = ggplot2::element_rect(colour = "black", fill = NA),
      panel.grid.major = ggplot2::element_blank(),
      panel.grid.minor = ggplot2::element_blank()
    ) +
    ggplot2::xlab(NULL) + ggplot2::ylab(NULL)
}
results[["microbenchmark"]] <- microbenchmark::microbenchmark(
  fastcpd = fastcpd::fastcpd.mean(well_log, trim = 0.003, r.progress = FALSE),
  changepoint = changepoint::cpt.mean(well_log/mad(well_log), method = "PELT"),
  CptNonPar = CptNonPar::np.mojo(well_log, G = floor(length(well_log) / 6)),
  strucchange =
    strucchange::breakpoints(y ~ 1, data = data.frame(y = well_log)),
  ecp = ecp::e.divisive(matrix(well_log)),
  breakfast = breakfast::breakfast(well_log),
  wbs = wbs::wbs(well_log),
  mosum = mosum::mosum(c(well_log), G = 40),
  fpop = fpop::Fpop(well_log, nrow(well_log)),
  gfpop = gfpop::gfpop(
    data = well_log,
    mygraph = gfpop::graph(
      penalty = 2 * log(length(well_log)) * gfpop::sdDiff(well_log) ^ 2,
      type = "updown"
    ),
    type = "mean"
  ),
  jointseg = jointseg::jointSeg(well_log, K = 12),
  stepR = stepR::stepFit(well_log, alpha = 0.5),
  not = not::not(well_log, contrast = "pcwsConstMean"),
  times = 10
)
results[["microbenchmark"]]
if (requireNamespace("ggplot2", quietly = TRUE) && requireNamespace("microbenchmark", quietly = TRUE)) {
  ggplot2::autoplot(results[["microbenchmark"]])
}
if (!file.exists("comparison-packages-results.RData")) {
  save(results, file = "comparison-packages-results.RData")
}