Adding Analyses to a Plan

Richard Aubrey White

2022-06-02

Broad technical terms
Object Description
argset A named list containing a set of arguments.
analysis

These are the fundamental units that are scheduled in plnr:

  • 1 argset
  • 1 (action) function that takes two arguments
    1. data (named list)
    2. argset (named list)
plan

This is the overarching “scheduler”:

  • 1 data pull
  • 1 list of analyses
Different types of plans
Plan Type Description
Single-function plan Same action function applied multiple times with different argsets applied to the same datasets.
Multi-function plan Different action functions applied to the same datasets.
Plan Examples
Plan Type Example
Single-function plan Multiple strata (e.g. locations, age groups) that you need to apply the same function to to (e.g. outbreak detection, trend detection, graphing).
Single-function plan Multiple variables (e.g. multiple outcomes, multiple exposures) that you need to apply the same statistical methods to (e.g. regression models, correlation plots).
Multi-function plan Creating the output for a report (e.g. multiple different tables and graphs).

Single-function plan

This approach is generally used when you:

When we apply the same function multiple times, it is preferable to add the argsets first, and then apply the analysis function just before running the analyses.

Multiple strata

In this example, we loop through multiple geographical locations and apply a graphing function to the data from each of these geographical locations.

library(ggplot2)
library(data.table)
library(magrittr)

# We begin by defining a new plan
p <- plnr::Plan$new()

# Data function
data_fn <- function(){
  return(plnr::nor_covid19_cases_by_time_location)
}

# We add sources of data
# We can add data directly
p$add_data(
  name = "covid19_cases",
  fn_name = "data_fn"
)

p$get_data()
## $covid19_cases
##        granularity_time granularity_geo country_iso3 location_code border   age
##     1:              day          county          nor  county_nor03   2020 total
##     2:              day          county          nor  county_nor03   2020 total
##     3:              day          county          nor  county_nor03   2020 total
##     4:              day          county          nor  county_nor03   2020 total
##     5:              day          county          nor  county_nor03   2020 total
##    ---                                                                         
## 11024:          isoweek          nation          nor    nation_nor   2020 total
## 11025:          isoweek          nation          nor    nation_nor   2020 total
## 11026:          isoweek          nation          nor    nation_nor   2020 total
## 11027:          isoweek          nation          nor    nation_nor   2020 total
## 11028:          isoweek          nation          nor    nation_nor   2020 total
##          sex isoyear isoweek isoyearweek    season seasonweek calyear calmonth
##     1: total    2020       8     2020-08 2019/2020         31    2020        2
##     2: total    2020       8     2020-08 2019/2020         31    2020        2
##     3: total    2020       8     2020-08 2019/2020         31    2020        2
##     4: total    2020       9     2020-09 2019/2020         32    2020        2
##     5: total    2020       9     2020-09 2019/2020         32    2020        2
##    ---                                                                        
## 11024: total    2022      14     2022-14 2021/2022         37      NA       NA
## 11025: total    2022      15     2022-15 2021/2022         38      NA       NA
## 11026: total    2022      16     2022-16 2021/2022         39      NA       NA
## 11027: total    2022      17     2022-17 2021/2022         40      NA       NA
## 11028: total    2022      18     2022-18 2021/2022         41      NA       NA
##        calyearmonth       date covid19_cases_testdate_n
##     1:     2020-M02 2020-02-21                        0
##     2:     2020-M02 2020-02-22                        0
##     3:     2020-M02 2020-02-23                        0
##     4:     2020-M02 2020-02-24                        0
##     5:     2020-M02 2020-02-25                        0
##    ---                                                 
## 11024:         <NA> 2022-04-10                     6888
## 11025:         <NA> 2022-04-17                     3635
## 11026:         <NA> 2022-04-24                     3764
## 11027:         <NA> 2022-05-01                     2243
## 11028:         <NA> 2022-05-08                      502
##        covid19_cases_testdate_pr100000
##     1:                        0.000000
##     2:                        0.000000
##     3:                        0.000000
##     4:                        0.000000
##     5:                        0.000000
##    ---                                
## 11024:                      126.961423
## 11025:                       67.001274
## 11026:                       69.379036
## 11027:                       41.343564
## 11028:                        9.252996
## 
## $hash
## $hash$current
## [1] "cbb5d442160f26df4c2d9a4fec794fd7"
## 
## $hash$current_elements
## $hash$current_elements$covid19_cases
## [1] "7f1b0a581386e75e907bffd94938a3a7"
location_codes <- p$get_data()$covid19_cases$location_code %>%
  unique() %>% 
  print()
##  [1] "county_nor03" "county_nor11" "county_nor15" "county_nor18" "county_nor30"
##  [6] "county_nor34" "county_nor38" "county_nor42" "county_nor46" "county_nor50"
## [11] "county_nor54" "nation_nor"
p$add_argset_from_list(
  plnr::expand_list(
    location_code = location_codes,
    granularity_time = "isoweek"
  )
)
# Examine the argsets that are available
p$get_argsets_as_dt()
##                            name_analysis index_analysis location_code
##  1: 66e8e88b-4bf4-4158-a43c-ba894a7f36bd              1  county_nor03
##  2: e755cfa7-7f80-4a6a-b418-f1810d556711              2  county_nor11
##  3: 98b1e86c-08a8-4e3e-a9dc-05ed0932ac11              3  county_nor15
##  4: 5315e66d-d229-4d0e-963b-7c2ec9263ca2              4  county_nor18
##  5: d4fb2115-095f-4346-8113-6bd4a807cbff              5  county_nor30
##  6: fe2cf3bf-f0e2-45bb-ab29-060f151ca647              6  county_nor34
##  7: 654741ea-3192-4617-b71f-d94f95c9ec6f              7  county_nor38
##  8: 871f9501-c9c4-4004-96e7-c7e4d1fc534b              8  county_nor42
##  9: 0938f565-1a4a-46c3-9ab9-34cbfb0bc66e              9  county_nor46
## 10: ca15c576-e187-4297-a64d-a5fffb48c30e             10  county_nor50
## 11: fea0c004-e483-4834-b7f7-1efa6c5362c1             11  county_nor54
## 12: f85ac0c1-baa2-4df3-bb52-b89350388925             12    nation_nor
##     granularity_time
##  1:          isoweek
##  2:          isoweek
##  3:          isoweek
##  4:          isoweek
##  5:          isoweek
##  6:          isoweek
##  7:          isoweek
##  8:          isoweek
##  9:          isoweek
## 10:          isoweek
## 11:          isoweek
## 12:          isoweek
# We can then add a simple analysis that returns a figure:

# To do this, we first need to create an action function
# (takes two arguments -- data and argset)
action_fn <- function(data, argset){
  if(plnr::is_run_directly()){
    data <- p$get_data()
    argset <- p$get_argset(1)
  }
  pd <- data$covid19_cases[
    location_code == argset$location_code &
    granularity_time == argset$granularity_time
  ]
  
  q <- ggplot(pd, aes(x=date, y=covid19_cases_testdate_n))
  q <- q + geom_line()
  q <- q + labs(title = argset$location_code)
  q
}

p$apply_action_fn_to_all_argsets(fn_name = "action_fn")

p$run_one(1)

q <- p$run_all()
q[[1]]

q[[2]]

Multiple variables

In this example, we loop through multiple variable combinations (1. raw numbers of Covid-19 cases vs Covid-19 cases per 100 000 population, and 2. aggregating over isoweek vs day) and apply a graphing function to the data according to each of these variable combinations.

library(ggplot2)
library(data.table)
library(magrittr)

# We begin by defining a new plan
p <- plnr::Plan$new()

# Data function
data_fn <- function(){
  return(plnr::nor_covid19_cases_by_time_location[location_code=="nation_nor"])
}

# We add sources of data
# We can add data directly
p$add_data(
  name = "covid19_cases",
  fn_name = "data_fn"
)

p$get_data()
## $covid19_cases
##      granularity_time granularity_geo country_iso3 location_code border   age
##   1:              day          nation          nor    nation_nor   2020 total
##   2:              day          nation          nor    nation_nor   2020 total
##   3:              day          nation          nor    nation_nor   2020 total
##   4:              day          nation          nor    nation_nor   2020 total
##   5:              day          nation          nor    nation_nor   2020 total
##  ---                                                                         
## 915:          isoweek          nation          nor    nation_nor   2020 total
## 916:          isoweek          nation          nor    nation_nor   2020 total
## 917:          isoweek          nation          nor    nation_nor   2020 total
## 918:          isoweek          nation          nor    nation_nor   2020 total
## 919:          isoweek          nation          nor    nation_nor   2020 total
##        sex isoyear isoweek isoyearweek    season seasonweek calyear calmonth
##   1: total    2020       8     2020-08 2019/2020         31    2020        2
##   2: total    2020       8     2020-08 2019/2020         31    2020        2
##   3: total    2020       8     2020-08 2019/2020         31    2020        2
##   4: total    2020       9     2020-09 2019/2020         32    2020        2
##   5: total    2020       9     2020-09 2019/2020         32    2020        2
##  ---                                                                        
## 915: total    2022      14     2022-14 2021/2022         37      NA       NA
## 916: total    2022      15     2022-15 2021/2022         38      NA       NA
## 917: total    2022      16     2022-16 2021/2022         39      NA       NA
## 918: total    2022      17     2022-17 2021/2022         40      NA       NA
## 919: total    2022      18     2022-18 2021/2022         41      NA       NA
##      calyearmonth       date covid19_cases_testdate_n
##   1:     2020-M02 2020-02-21                        1
##   2:     2020-M02 2020-02-22                        0
##   3:     2020-M02 2020-02-23                        0
##   4:     2020-M02 2020-02-24                        0
##   5:     2020-M02 2020-02-25                        0
##  ---                                                 
## 915:         <NA> 2022-04-10                     6888
## 916:         <NA> 2022-04-17                     3635
## 917:         <NA> 2022-04-24                     3764
## 918:         <NA> 2022-05-01                     2243
## 919:         <NA> 2022-05-08                      502
##      covid19_cases_testdate_pr100000
##   1:                      0.01863037
##   2:                      0.00000000
##   3:                      0.00000000
##   4:                      0.00000000
##   5:                      0.00000000
##  ---                                
## 915:                    126.96142312
## 916:                     67.00127367
## 917:                     69.37903551
## 918:                     41.34356447
## 919:                      9.25299570
## 
## $hash
## $hash$current
## [1] "0ad573d37712f0a8ab666846d1b721a1"
## 
## $hash$current_elements
## $hash$current_elements$covid19_cases
## [1] "07cc51795bccaf2afebe48619ce87227"
p$add_argset_from_list(
  plnr::expand_list(
    variable = c("covid19_cases_testdate_n", "covid19_cases_testdate_pr100000"),
    granularity_time = c("isoweek","day")
  )
)
# Examine the argsets that are available
p$get_argsets_as_dt()
##                           name_analysis index_analysis
## 1: 8e6c0d10-356c-456b-9c58-6e28f47164fc              1
## 2: f521b0a2-b352-45e6-8d5a-d7ee34a88bc4              2
## 3: f06a6b06-29a9-4d05-b66e-772709a4eaa0              3
## 4: 46ed0a2f-e549-4ade-aaa7-77ab12344dc1              4
##                           variable granularity_time
## 1:        covid19_cases_testdate_n          isoweek
## 2: covid19_cases_testdate_pr100000          isoweek
## 3:        covid19_cases_testdate_n              day
## 4: covid19_cases_testdate_pr100000              day
# We can then add a simple analysis that returns a figure:

# To do this, we first need to create an action function
# (takes two arguments -- data and argset)
action_fn <- function(data, argset){
  if(plnr::is_run_directly()){
    data <- p$get_data()
    argset <- p$get_argset(1)
  }
  pd <- data$covid19_cases[
    granularity_time == argset$granularity_time
  ]
  
  q <- ggplot(pd, aes_string(x="date", y=argset$variable))
  q <- q + geom_line()
  q <- q + labs(title = argset$granularity_time)
  q
}

p$apply_action_fn_to_all_argsets(fn_name = "action_fn")

p$run_one(1)

p$run_one(2)

p$run_one(3)

p$run_one(4)

Multi-function plan

This approach is generally used when you are creating the output for a report, and you need multiple different tables and graphs.

library(ggplot2)
library(data.table)
library(magrittr)

# We begin by defining a new plan
p <- plnr::Plan$new()

# Data function
data_fn <- function(){
  return(plnr::nor_covid19_cases_by_time_location)
}

# We add sources of data
# We can add data directly
p$add_data(
  name = "covid19_cases",
  fn_name = "data_fn"
)

p$get_data()
## $covid19_cases
##        granularity_time granularity_geo country_iso3 location_code border   age
##     1:              day          county          nor  county_nor03   2020 total
##     2:              day          county          nor  county_nor03   2020 total
##     3:              day          county          nor  county_nor03   2020 total
##     4:              day          county          nor  county_nor03   2020 total
##     5:              day          county          nor  county_nor03   2020 total
##    ---                                                                         
## 11024:          isoweek          nation          nor    nation_nor   2020 total
## 11025:          isoweek          nation          nor    nation_nor   2020 total
## 11026:          isoweek          nation          nor    nation_nor   2020 total
## 11027:          isoweek          nation          nor    nation_nor   2020 total
## 11028:          isoweek          nation          nor    nation_nor   2020 total
##          sex isoyear isoweek isoyearweek    season seasonweek calyear calmonth
##     1: total    2020       8     2020-08 2019/2020         31    2020        2
##     2: total    2020       8     2020-08 2019/2020         31    2020        2
##     3: total    2020       8     2020-08 2019/2020         31    2020        2
##     4: total    2020       9     2020-09 2019/2020         32    2020        2
##     5: total    2020       9     2020-09 2019/2020         32    2020        2
##    ---                                                                        
## 11024: total    2022      14     2022-14 2021/2022         37      NA       NA
## 11025: total    2022      15     2022-15 2021/2022         38      NA       NA
## 11026: total    2022      16     2022-16 2021/2022         39      NA       NA
## 11027: total    2022      17     2022-17 2021/2022         40      NA       NA
## 11028: total    2022      18     2022-18 2021/2022         41      NA       NA
##        calyearmonth       date covid19_cases_testdate_n
##     1:     2020-M02 2020-02-21                        0
##     2:     2020-M02 2020-02-22                        0
##     3:     2020-M02 2020-02-23                        0
##     4:     2020-M02 2020-02-24                        0
##     5:     2020-M02 2020-02-25                        0
##    ---                                                 
## 11024:         <NA> 2022-04-10                     6888
## 11025:         <NA> 2022-04-17                     3635
## 11026:         <NA> 2022-04-24                     3764
## 11027:         <NA> 2022-05-01                     2243
## 11028:         <NA> 2022-05-08                      502
##        covid19_cases_testdate_pr100000
##     1:                        0.000000
##     2:                        0.000000
##     3:                        0.000000
##     4:                        0.000000
##     5:                        0.000000
##    ---                                
## 11024:                      126.961423
## 11025:                       67.001274
## 11026:                       69.379036
## 11027:                       41.343564
## 11028:                        9.252996
## 
## $hash
## $hash$current
## [1] "0306cac791d5f990073167e17ed15f9b"
## 
## $hash$current_elements
## $hash$current_elements$covid19_cases
## [1] "bad75e8e213b3de3eee2b4ecbf157f46"
# Completely unique function for figure 1
p$add_analysis(
  name = "figure_1",
  fn_name = "figure_1"
)

figure_1 <- function(data, argset){
  if(plnr::is_run_directly()){
    data <- p$get_data()
    argset <- p$get_argset("figure_1")
  }
  pd <- data$covid19_cases[
    granularity_time == "isoweek"
  ]
  
  q <- ggplot(pd, aes_string(x="date", y="covid19_cases_testdate_pr100000"))
  q <- q + geom_line()
  q <- q + facet_wrap(~location_code)
  q <- q + labs(title = "Weekly covid-19 cases per 100 000 population")
  q
}

# Reusing a function for figures 2 and 3
p$add_analysis(
  name = "figure_2",
  fn_name = "plot_epicurve_by_location",
  location_code = "nation_nor"
)

# Reusing a function for figures 2 and 3
p$add_analysis(
  name = "figure_3",
  fn_name = "plot_epicurve_by_location",
  location_code = "county_nor03"
)

plot_epicurve_by_location <- function(data, argset){
  if(plnr::is_run_directly()){
    data <- p$get_data()
    argset <- p$get_argset("figure_2")
    argset <- p$get_argset("figure_3")
  }
  pd <- data$covid19_cases[
    granularity_time == "isoweek" & 
    location_code == argset$location_code
  ]
  
  q <- ggplot(pd, aes_string(x="date", y="covid19_cases_testdate_n"))
  q <- q + geom_line()
  q <- q + labs(title = argset$location_code)
  q
}

p$run_one("figure_1")

p$run_one("figure_2")

p$run_one("figure_3")