This vignette describes how to use GSCUSUM charts, an extension to standard CUSUM charts for binary performance data grouped in samples of unequal size.
Following information have to be available:
dplyr::group_indices()
)These information are collected in a numeric matrix.
Non-risk-adjusted example data:
head(gscusum_example_data)
#> # A tibble: 6 × 4
#> t y year block_identifier
#> <int> <lgl> <dbl> <int>
#> 1 1 FALSE 2016 1
#> 2 2 FALSE 2016 1
#> 3 3 FALSE 2016 1
#> 4 4 FALSE 2016 1
#> 5 5 FALSE 2016 1
#> 6 6 FALSE 2016 1
Risk-adjusted example data:
Like in the standard CUSUM chart (see vignette for CUSUM charts), parameters have to be estimated in order to set up the charts,
failure_probability <- mean(gscusum_example_data$y[gscusum_example_data$year == 2016])
n_patients <- nrow(gscusum_example_data[gscusum_example_data$year == 2016,])
and control limits have to be estimated:
cusum_limit <- cusum_limit_sim(failure_probability,
n_patients,
odds_multiplier = 2,
n_simulation = 1000,
alpha = 0.05,
seed = 2046)
print(cusum_limit)
#> [1] 4.91128
GSCUSUM charts are constructed on performance data from 2017.
gscusum_data <- gscusum_example_data[gscusum_example_data$year == 2017,]
input_outcomes <- matrix(c(gscusum_data$y, gscusum_data$block_identifier), ncol = 2)
gcs <- gscusum(input_outcomes = input_outcomes,
failure_probability = failure_probability,
odds_multiplier = 2,
limit = cusum_limit,
max_num_shuffles = 1000,
quantiles = c(0.,0.05,0.25,0.5,0.75,.95,1))
This function returns the signal probability, average CUSUM values and quantiles of the CUSUM distribution specified in the function call.
gcs <- as.data.frame(gcs)
names(gcs) <- c("sig_prob", "avg", "min", "q05", "q25", "median","q75","q95","max")
head(gcs)
#> sig_prob avg min q05 q25 median q75 q95 max
#> 1 0 0.08347473 0 0 0 0.00000000 0.0000000 0.4910278 0.4910278
#> 2 0 0.12666736 0 0 0 0.00000000 0.2889099 0.4910278 0.9820557
#> 3 0 0.14930341 0 0 0 0.00000000 0.2889099 0.4910278 0.9820557
#> 4 0 0.16452646 0 0 0 0.00000000 0.2889099 0.5778198 0.9820557
#> 5 0 0.17577017 0 0 0 0.00000000 0.3757019 0.5778198 0.9820557
#> 6 0 0.19900305 0 0 0 0.08679199 0.3757019 0.7799377 0.9820557
gcs$block_identifier <- input_outcomes[,2]
gcs$t <- seq(1,nrow(gcs))
col1 <- "#f7ba02"
col2 <- "#4063bc"
palette <- rep(c(col1, col2), 300)
ggplot() +
geom_line(data = gcs, aes(x = t, y = sig_prob)) +
geom_point(data = gcs, aes(x = t, y = sig_prob, col = as.factor(block_identifier) )) +
scale_color_manual(guide=FALSE, values = palette) +
scale_y_continuous(name = "Signal Probability", limits = c(0,1))+
theme_bw()
#> Warning: The `guide` argument in `scale_*()` cannot be `FALSE`. This was deprecated in
#> ggplot2 3.3.4.
#> ℹ Please use "none" instead.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.
The complete run can be plotted with:
nblock <- max(gcs$block_identifier)
p <- ggplot(gcs)
for ( i in 1: nblock){
dblock <- gcs[gcs$block_identifier == i,]
col <- ifelse(i %% 2 == 0,col2,col1)
dblock_before <- dblock[1,]
dblock_before$t <- dblock_before$t - .5
dblock_after <- dblock[nrow(dblock),]
dblock_after$t <- dblock_after$t + .5
dblock_n <- rbind(dblock, dblock_before, dblock_after)
p <- p +
geom_ribbon(data = dblock_n, aes(x = t, ymin = min, ymax = max), fill = col, alpha = 0.2) +
geom_ribbon(data = dblock_n, aes(x = t, ymin = q05, ymax = q95), fill = col, alpha = 0.2) +
geom_ribbon(data = dblock_n, aes(x = t, ymin = q25, ymax = q75), fill = col, alpha = 0.2)
}
p <- p +
geom_line(data = gcs, aes(x = t, y = median)) +
geom_point(data = gcs, aes( x = t, y = median, fill = as.factor(block_identifier)), size=2, pch = 21)+
geom_hline(aes(yintercept = cusum_limit), linetype = 2) +
theme_bw() +
scale_y_continuous(name = "CUSUM Distribution") +
scale_x_continuous(name = "Sequence of Observations") +
scale_fill_manual(values = palette, guide = FALSE) +
labs(subtitle = "GSCUSUM")
p
Like in the standard RA-CUSUM chart (see vignette for CUSUM charts), parameters are estimated in order to set up the charts,
and control limits are set:
racusum_limit <- racusum_limit_sim(patient_risks = ragscusum_example_data$score[ragscusum_example_data$year == 2016],
odds_multiplier = 2,
n_simulation = 1000,
alpha = 0.05,
seed = 2046)
print(racusum_limit)
#> [1] 2.403465
GSCUSUM charts are constructed on performance data from 2017.
ragscusum_data <- ragscusum_example_data[ragscusum_example_data$year == 2017,]
input_outcomes <- matrix(c(gscusum_data$y, gscusum_data$block_identifier), ncol = 2)
gcs <- gscusum(input_outcomes = input_outcomes,
failure_probability = failure_probability,
odds_multiplier = 2,
limit = cusum_limit,
max_num_shuffles = 1000,
quantiles = c(0.,0.05,0.25,0.5,0.75,.95,1))
This function returns the signal probability, average CUSUM values and quantiles of the CUSUM distribution specified in the function call.
gcs <- as.data.frame(gcs)
names(gcs) <- c("sig_prob", "avg", "min", "q05", "q25", "median","q75","q95","max")
head(gcs)
#> sig_prob avg min q05 q25 median q75 q95 max
#> 1 0 0.08003754 0 0 0 0.00000000 0.0000000 0.4910278 0.4910278
#> 2 0 0.13302100 0 0 0 0.00000000 0.2889099 0.4910278 0.9820557
#> 3 0 0.15557025 0 0 0 0.00000000 0.2889099 0.4910278 0.9820557
#> 4 0 0.17836322 0 0 0 0.00000000 0.2889099 0.7799377 0.9820557
#> 5 0 0.18437802 0 0 0 0.00000000 0.3757019 0.7799377 0.9820557
#> 6 0 0.19227609 0 0 0 0.08679199 0.3757019 0.5778198 0.9820557
gcs$block_identifier <- input_outcomes[,2]
gcs$t <- seq(1,nrow(gcs))
col1 <- "#f7ba02"
col2 <- "#4063bc"
palette <- rep(c(col1, col2), 300)
ggplot() +
geom_line(data = gcs, aes(x = t, y = sig_prob)) +
geom_point(data = gcs, aes(x = t, y = sig_prob, col = as.factor(block_identifier) )) +
scale_color_manual(guide=FALSE, values = palette) +
scale_y_continuous(name = "Signal Probability", limit = c(0,1))+
theme_bw()
The complete run can be plotted with:
nblock <- max(gcs$block_identifier)
p <- ggplot(gcs)
for ( i in 1: nblock){
dblock <- gcs[gcs$block_identifier == i,]
col <- ifelse(i %% 2 == 0,col2,col1)
dblock_before <- dblock[1,]
dblock_before$t <- dblock_before$t - .5
dblock_after <- dblock[nrow(dblock),]
dblock_after$t <- dblock_after$t + .5
dblock_n <- rbind(dblock, dblock_before, dblock_after)
p <- p +
geom_ribbon(data = dblock_n, aes(x = t, ymin = min, ymax = max), fill = col, alpha = 0.2) +
geom_ribbon(data = dblock_n, aes(x = t, ymin = q05, ymax = q95), fill = col, alpha = 0.2) +
geom_ribbon(data = dblock_n, aes(x = t, ymin = q25, ymax = q75), fill = col, alpha = 0.2)
}
p <- p +
geom_line(data = gcs, aes(x = t, y = median)) +
geom_point(data = gcs, aes( x = t, y = median, fill = as.factor(block_identifier)), size=2, pch = 21)+
geom_hline(aes(yintercept = cusum_limit), linetype = 2) +
theme_bw() +
scale_y_continuous(name = "RACUSUM Distribution") +
scale_x_continuous(name = "Sequence of Observations") +
scale_fill_manual(values = palette, guide = FALSE) +
labs(subtitle = "RA-GSCUSUM")
p