GSCUSUM charts

Lena Hubig

2019-10-02

Overview

This vignette describes how to use GSCUSUM charts, an extension to standard CUSUM charts for binary performance data grouped in samples of unequal size.

Data preparation

Following information have to be available:

These information are collected in a numeric matrix.

Non-risk-adjusted example data:

head(gscusum_example_data)
#> # A tibble: 6 x 4
#>       t y      year block_identifier
#>   <int> <lgl> <dbl>            <int>
#> 1     1 FALSE  2016                1
#> 2     2 FALSE  2016                1
#> 3     3 FALSE  2016                1
#> 4     4 FALSE  2016                1
#> 5     5 FALSE  2016                1
#> 6     6 FALSE  2016                1

Risk-adjusted example data:

head(ragscusum_example_data)
#> # A tibble: 6 x 5
#>       t y       score  year block_identifier
#>   <int> <lgl>   <dbl> <dbl>            <int>
#> 1     1 FALSE 0.00829  2016                1
#> 2     2 FALSE 0.00237  2016                1
#> 3     3 FALSE 0.00926  2016                1
#> 4     4 FALSE 0.00394  2016                1
#> 5     5 FALSE 0.0241   2016                1
#> 6     6 FALSE 0.00557  2016                1

Non-risk-adjusted GSCUSUM chart

Like in the standard CUSUM chart (see vignette for CUSUM charts), parameters have to be estimated in order to set up the charts,

failure_probability <- mean(gscusum_example_data$y[gscusum_example_data$year == 2016])

n_patients <- nrow(gscusum_example_data[gscusum_example_data$year == 2016,])

and control limits have to be estimated:


cusum_limit <- cusum_limit_sim(failure_probability,
                            n_patients,
                            odds_multiplier = 2,
                            n_simulation = 1000,
                            alpha = 0.05,
                            seed = 2046)


print(cusum_limit)
#> [1] 4.91128

GSCUSUM charts are constructed on performance data from 2017.


gscusum_data <- gscusum_example_data[gscusum_example_data$year == 2017,]

input_outcomes <- matrix(c(gscusum_data$y, gscusum_data$block_identifier), ncol = 2)


gcs <- gscusum(input_outcomes = input_outcomes,
              failure_probability = failure_probability,
              odds_multiplier = 2,
              limit = cusum_limit,
              max_num_shuffles = 1000,
              quantiles = c(0.,0.05,0.25,0.5,0.75,.95,1))

This function returns the signal probability, average CUSUM values and quantiles of the CUSUM distribution specified in the function call.

gcs <- as.data.frame(gcs)
names(gcs) <- c("sig_prob", "avg", "min", "q05", "q25", "median","q75","q95","max")
head(gcs)
#>   sig_prob        avg min q05 q25     median       q75       q95       max
#> 1        0 0.08151062   0   0   0 0.00000000 0.0000000 0.4910278 0.4910278
#> 2        0 0.13900012   0   0   0 0.00000000 0.2889099 0.4910278 0.9820557
#> 3        0 0.15609100   0   0   0 0.00000000 0.2889099 0.7799377 0.9820557
#> 4        0 0.17359918   0   0   0 0.00000000 0.2889099 0.7799377 0.9820557
#> 5        0 0.19099323   0   0   0 0.00000000 0.3757019 0.7799377 0.9820557
#> 6        0 0.19285629   0   0   0 0.08679199 0.3757019 0.5778198 0.9820557
gcs$block_identifier <- input_outcomes[,2]
gcs$t <- seq(1,nrow(gcs))

col1 <- "#f7ba02"
col2 <- "#4063bc"
palette <- rep(c(col1, col2), 300)

ggplot() +
  geom_line(data = gcs, aes(x = t, y = sig_prob)) +
  geom_point(data = gcs, aes(x = t, y = sig_prob, col = as.factor(block_identifier) )) +
  scale_color_manual(guide=FALSE, values = palette) +
  scale_y_continuous(name = "Signal Probability", limits = c(0,1))+
  theme_bw()

The complete run can be plotted with:

nblock <- max(gcs$block_identifier)

p <- ggplot(gcs)

for ( i in 1: nblock){
  dblock <- gcs[gcs$block_identifier == i,]
  col <- ifelse(i %% 2 == 0,col2,col1)
  dblock_before <- dblock[1,]
  dblock_before$t <- dblock_before$t - .5
  dblock_after <- dblock[nrow(dblock),]
  dblock_after$t <- dblock_after$t + .5
  dblock_n <- rbind(dblock, dblock_before, dblock_after)

  p <- p +
    geom_ribbon(data = dblock_n, aes(x = t, ymin = min, ymax = max), fill = col, alpha = 0.2) +
    geom_ribbon(data = dblock_n, aes(x = t, ymin = q05, ymax = q95), fill = col, alpha = 0.2) +
    geom_ribbon(data = dblock_n, aes(x = t, ymin = q25, ymax = q75), fill = col, alpha = 0.2)

}

p <- p +
  geom_line(data = gcs, aes(x = t, y = median)) +
  geom_point(data = gcs, aes( x = t, y = median, fill = as.factor(block_identifier)), size=2, pch = 21)+
  geom_hline(aes(yintercept = cusum_limit), linetype = 2) +
  theme_bw() +
  scale_y_continuous(name = "CUSUM Distribution") +
  scale_x_continuous(name = "Sequence of Observations") +
  scale_fill_manual(values = palette, guide = FALSE) +
  labs(subtitle = "GSCUSUM")
p

Risk-adjusted GSCUSUM chart

Like in the standard RA-CUSUM chart (see vignette for CUSUM charts), parameters are estimated in order to set up the charts,

n_patients <- nrow(ragscusum_example_data[ragscusum_example_data$year == 2016,])

and control limits are set:


racusum_limit <- racusum_limit_sim(patient_risks = ragscusum_example_data$score[ragscusum_example_data$year == 2016],
                            odds_multiplier = 2,
                            n_simulation = 1000,
                            alpha = 0.05,
                            seed = 2046)


print(racusum_limit)
#> [1] 2.403465

GSCUSUM charts are constructed on performance data from 2017.


ragscusum_data <- ragscusum_example_data[ragscusum_example_data$year == 2017,]

input_outcomes <- matrix(c(gscusum_data$y, gscusum_data$block_identifier), ncol = 2)


gcs <- gscusum(input_outcomes = input_outcomes,
              failure_probability = failure_probability,
              odds_multiplier = 2,
              limit = cusum_limit,
              max_num_shuffles = 1000,
              quantiles = c(0.,0.05,0.25,0.5,0.75,.95,1))

This function returns the signal probability, average CUSUM values and quantiles of the CUSUM distribution specified in the function call.

gcs <- as.data.frame(gcs)
names(gcs) <- c("sig_prob", "avg", "min", "q05", "q25", "median","q75","q95","max")
head(gcs)
#>   sig_prob        avg min q05 q25     median       q75       q95       max
#> 1        0 0.07905548   0   0   0 0.00000000 0.0000000 0.4910278 0.4910278
#> 2        0 0.12831284   0   0   0 0.00000000 0.2889099 0.4910278 0.9820557
#> 3        0 0.16331018   0   0   0 0.00000000 0.2889099 0.7799377 0.9820557
#> 4        0 0.17408902   0   0   0 0.00000000 0.2889099 0.5778198 0.9820557
#> 5        0 0.19183142   0   0   0 0.00000000 0.3757019 0.7799377 0.9820557
#> 6        0 0.19487628   0   0   0 0.08679199 0.3757019 0.5778198 0.9820557
gcs$block_identifier <- input_outcomes[,2]
gcs$t <- seq(1,nrow(gcs))

col1 <- "#f7ba02"
col2 <- "#4063bc"
palette <- rep(c(col1, col2), 300)

ggplot() +
  geom_line(data = gcs, aes(x = t, y = sig_prob)) +
  geom_point(data = gcs, aes(x = t, y = sig_prob, col = as.factor(block_identifier) )) +
  scale_color_manual(guide=FALSE, values = palette) +
  scale_y_continuous(name = "Signal Probability", limit = c(0,1))+
  theme_bw()

The complete run can be plotted with:

nblock <- max(gcs$block_identifier)

p <- ggplot(gcs)

for ( i in 1: nblock){
  dblock <- gcs[gcs$block_identifier == i,]
  col <- ifelse(i %% 2 == 0,col2,col1)
  dblock_before <- dblock[1,]
  dblock_before$t <- dblock_before$t - .5
  dblock_after <- dblock[nrow(dblock),]
  dblock_after$t <- dblock_after$t + .5
  dblock_n <- rbind(dblock, dblock_before, dblock_after)

  p <- p +
    geom_ribbon(data = dblock_n, aes(x = t, ymin = min, ymax = max), fill = col, alpha = 0.2) +
    geom_ribbon(data = dblock_n, aes(x = t, ymin = q05, ymax = q95), fill = col, alpha = 0.2) +
    geom_ribbon(data = dblock_n, aes(x = t, ymin = q25, ymax = q75), fill = col, alpha = 0.2)

}

p <- p +
  geom_line(data = gcs, aes(x = t, y = median)) +
  geom_point(data = gcs, aes( x = t, y = median, fill = as.factor(block_identifier)), size=2, pch = 21)+
  geom_hline(aes(yintercept = cusum_limit), linetype = 2) +
  theme_bw() +
  scale_y_continuous(name = "RACUSUM Distribution") +
  scale_x_continuous(name = "Sequence of Observations") +
  scale_fill_manual(values = palette, guide = FALSE) +
  labs(subtitle = "RA-GSCUSUM")
p