#Introduction The data used in the following examples comes from the heart disease dataset found at the UCI Machine Learning Repository.

#Load packages
require(tidyverse); require(cheese)
## Loading required package: tidyverse
## ── Attaching packages ────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0     ✔ purrr   0.2.5
## ✔ tibble  1.4.2     ✔ dplyr   0.7.8
## ✔ tidyr   0.8.2     ✔ stringr 1.3.1
## ✔ readr   1.1.1     ✔ forcats 0.3.0
## ── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## Loading required package: cheese
#Look at the top ten rows
heart_disease
## # A tibble: 303 x 9
##      Age Sex   ChestPain    BP Cholesterol BloodSugar MaximumHR
##    <dbl> <fct> <fct>     <dbl>       <dbl> <lgl>          <dbl>
##  1    63 Male  Typical …   145         233 TRUE             150
##  2    67 Male  Asymptom…   160         286 FALSE            108
##  3    67 Male  Asymptom…   120         229 FALSE            129
##  4    37 Male  Non-angi…   130         250 FALSE            187
##  5    41 Fema… Atypical…   130         204 FALSE            172
##  6    56 Male  Atypical…   120         236 FALSE            178
##  7    62 Fema… Asymptom…   140         268 FALSE            160
##  8    57 Fema… Asymptom…   120         354 FALSE            163
##  9    63 Male  Asymptom…   130         254 FALSE            147
## 10    53 Male  Asymptom…   140         203 TRUE             155
## # ... with 293 more rows, and 2 more variables:
## #   ExerciseInducedAngina <fct>, HeartDisease <fct>

#Creating a univariate table The function univariate_table allows flexible summarization and presentation of variables in a dataset. Arguments are available to customize the statistics that are computed, association metrics, stratification variables, variable labels, etc. The format argument allows the user to render any table in “html”, “latex”, “markdown”, “pandoc”, “none” (i.e. return a data.frame). The following examples are rendered in “html” (default):

##Default By default, the median (iqr), count (%), and the number of distinct values are displayed for numeric, categorical, and 'other' data types,

#Default table
heart_disease %>%
    univariate_table
Variable Level Summary
Age 56 (13)
Sex Female 97 (32.01%)
Male 206 (67.99%)
ChestPain Typical angina 23 (7.59%)
Atypical angina 50 (16.5%)
Non-anginal pain 86 (28.38%)
Asymptomatic 144 (47.52%)
BP 130 (20)
Cholesterol 241 (64)
BloodSugar 2
MaximumHR 153 (32.5)
ExerciseInducedAngina No 204 (67.33%)
Yes 99 (32.67%)
HeartDisease No 164 (54.13%)
Yes 139 (45.87%)

##Stratification variables Any number of stratification variables can be added to the table, either to the rows and/or columns with an intuitive formula interface, where the left-hand side are row strata, and the right-hand side are column strata.. The argument add_n can be set to TRUE if row and/or column strata are present, in which case the sample size of that strata will be displayed.

#Single column strata
heart_disease %>%
    univariate_table(
        strata = ~HeartDisease
    )
Variable Level No Yes
Age 52 (14.25) 58 (10)
Sex Female 72 (43.9%) 25 (17.99%)
Male 92 (56.1%) 114 (82.01%)
ChestPain Typical angina 16 (9.76%) 7 (5.04%)
Atypical angina 41 (25%) 9 (6.47%)
Non-anginal pain 68 (41.46%) 18 (12.95%)
Asymptomatic 39 (23.78%) 105 (75.54%)
BP 130 (20) 130 (25)
Cholesterol 234.5 (58.5) 249 (66)
BloodSugar 2 2
MaximumHR 161 (23.25) 142 (31.5)
ExerciseInducedAngina No 141 (85.98%) 63 (45.32%)
Yes 23 (14.02%) 76 (54.68%)
heart_disease %>%
    univariate_table(
        strata = ~HeartDisease,
        add_n = TRUE
    )
Variable Level No (N=164) Yes (N=139)
Age 52 (14.25) 58 (10)
Sex Female 72 (43.9%) 25 (17.99%)
Male 92 (56.1%) 114 (82.01%)
ChestPain Typical angina 16 (9.76%) 7 (5.04%)
Atypical angina 41 (25%) 9 (6.47%)
Non-anginal pain 68 (41.46%) 18 (12.95%)
Asymptomatic 39 (23.78%) 105 (75.54%)
BP 130 (20) 130 (25)
Cholesterol 234.5 (58.5) 249 (66)
BloodSugar 2 2
MaximumHR 161 (23.25) 142 (31.5)
ExerciseInducedAngina No 141 (85.98%) 63 (45.32%)
Yes 23 (14.02%) 76 (54.68%)
#Multiple column strata
heart_disease %>%
    univariate_table(
        strata = ~Sex + HeartDisease
    )
Variable Level Female/No Male/No Female/Yes Male/Yes
Age 54 (17.25) 52 (13) 60 (5) 57.5 (10)
ChestPain Typical angina 4 (5.56%) 12 (13.04%) 0 (0%) 7 (6.14%)
Atypical angina 16 (22.22%) 25 (27.17%) 2 (8%) 7 (6.14%)
Non-anginal pain 34 (47.22%) 34 (36.96%) 1 (4%) 17 (14.91%)
Asymptomatic 18 (25%) 21 (22.83%) 22 (88%) 83 (72.81%)
BP 130 (20.5) 130 (20) 140 (28) 130 (20)
Cholesterol 249 (78.75) 229.5 (44.25) 268 (71) 247.5 (70)
BloodSugar 2 2 2 2
MaximumHR 159 (20.5) 163 (25.75) 146 (24) 141 (31)
ExerciseInducedAngina No 64 (88.89%) 77 (83.7%) 11 (44%) 52 (45.61%)
Yes 8 (11.11%) 15 (16.3%) 14 (56%) 62 (54.39%)
heart_disease %>%
    univariate_table(
        strata = ~Sex + HeartDisease,
        add_n = TRUE,
        strata_sep = "|"
    )
Variable Level Female|No (N=72) Male|No (N=92) Female|Yes (N=25) Male|Yes (N=114)
Age 54 (17.25) 52 (13) 60 (5) 57.5 (10)
ChestPain Typical angina 4 (5.56%) 12 (13.04%) 0 (0%) 7 (6.14%)
Atypical angina 16 (22.22%) 25 (27.17%) 2 (8%) 7 (6.14%)
Non-anginal pain 34 (47.22%) 34 (36.96%) 1 (4%) 17 (14.91%)
Asymptomatic 18 (25%) 21 (22.83%) 22 (88%) 83 (72.81%)
BP 130 (20.5) 130 (20) 140 (28) 130 (20)
Cholesterol 249 (78.75) 229.5 (44.25) 268 (71) 247.5 (70)
BloodSugar 2 2 2 2
MaximumHR 159 (20.5) 163 (25.75) 146 (24) 141 (31)
ExerciseInducedAngina No 64 (88.89%) 77 (83.7%) 11 (44%) 52 (45.61%)
Yes 8 (11.11%) 15 (16.3%) 14 (56%) 62 (54.39%)
#Single row strata
heart_disease %>%
    univariate_table(
        strata = Sex~1
    )
Variable Level Summary
Female
Age 57 (13)
ChestPain Typical angina 4 (4.12%)
Atypical angina 18 (18.56%)
Non-anginal pain 35 (36.08%)
Asymptomatic 40 (41.24%)
BP 132 (20)
Cholesterol 254 (87)
BloodSugar 2
MaximumHR 157 (23)
ExerciseInducedAngina No 75 (77.32%)
Yes 22 (22.68%)
HeartDisease No 72 (74.23%)
Yes 25 (25.77%)
Male
Age 54.5 (12.75)
ChestPain Typical angina 19 (9.22%)
Atypical angina 32 (15.53%)
Non-anginal pain 51 (24.76%)
Asymptomatic 104 (50.49%)
BP 130 (20)
Cholesterol 235 (59.75)
BloodSugar 2
MaximumHR 150.5 (35.5)
ExerciseInducedAngina No 129 (62.62%)
Yes 77 (37.38%)
HeartDisease No 92 (44.66%)
Yes 114 (55.34%)
heart_disease %>%
    univariate_table(
        strata = Sex~1,
        add_n = TRUE
    )
Variable Level Summary
Female (N=97)
Age 57 (13)
ChestPain Typical angina 4 (4.12%)
Atypical angina 18 (18.56%)
Non-anginal pain 35 (36.08%)
Asymptomatic 40 (41.24%)
BP 132 (20)
Cholesterol 254 (87)
BloodSugar 2
MaximumHR 157 (23)
ExerciseInducedAngina No 75 (77.32%)
Yes 22 (22.68%)
HeartDisease No 72 (74.23%)
Yes 25 (25.77%)
Male (N=206)
Age 54.5 (12.75)
ChestPain Typical angina 19 (9.22%)
Atypical angina 32 (15.53%)
Non-anginal pain 51 (24.76%)
Asymptomatic 104 (50.49%)
BP 130 (20)
Cholesterol 235 (59.75)
BloodSugar 2
MaximumHR 150.5 (35.5)
ExerciseInducedAngina No 129 (62.62%)
Yes 77 (37.38%)
HeartDisease No 92 (44.66%)
Yes 114 (55.34%)
#Column and row strata
heart_disease %>%
    univariate_table(
        strata = Sex~HeartDisease
    )
Variable Level No Yes
Female
Age 54 (17.25) 60 (5)
ChestPain Typical angina 4 (5.56%) 0 (0%)
Atypical angina 16 (22.22%) 2 (8%)
Non-anginal pain 34 (47.22%) 1 (4%)
Asymptomatic 18 (25%) 22 (88%)
BP 130 (20.5) 140 (28)
Cholesterol 249 (78.75) 268 (71)
BloodSugar 2 2
MaximumHR 159 (20.5) 146 (24)
ExerciseInducedAngina No 64 (88.89%) 11 (44%)
Yes 8 (11.11%) 14 (56%)
Male
Age 52 (13) 57.5 (10)
ChestPain Typical angina 12 (13.04%) 7 (6.14%)
Atypical angina 25 (27.17%) 7 (6.14%)
Non-anginal pain 34 (36.96%) 17 (14.91%)
Asymptomatic 21 (22.83%) 83 (72.81%)
BP 130 (20) 130 (20)
Cholesterol 229.5 (44.25) 247.5 (70)
BloodSugar 2 2
MaximumHR 163 (25.75) 141 (31)
ExerciseInducedAngina No 77 (83.7%) 52 (45.61%)
Yes 15 (16.3%) 62 (54.39%)
heart_disease %>%
    univariate_table(
        strata = Sex~HeartDisease,
        add_n = TRUE
    )
Variable Level No (N=164) Yes (N=139)
Female (N=97)
Age 54 (17.25) 60 (5)
ChestPain Typical angina 4 (5.56%) 0 (0%)
Atypical angina 16 (22.22%) 2 (8%)
Non-anginal pain 34 (47.22%) 1 (4%)
Asymptomatic 18 (25%) 22 (88%)
BP 130 (20.5) 140 (28)
Cholesterol 249 (78.75) 268 (71)
BloodSugar 2 2
MaximumHR 159 (20.5) 146 (24)
ExerciseInducedAngina No 64 (88.89%) 11 (44%)
Yes 8 (11.11%) 14 (56%)
Male (N=206)
Age 52 (13) 57.5 (10)
ChestPain Typical angina 12 (13.04%) 7 (6.14%)
Atypical angina 25 (27.17%) 7 (6.14%)
Non-anginal pain 34 (36.96%) 17 (14.91%)
Asymptomatic 21 (22.83%) 83 (72.81%)
BP 130 (20) 130 (20)
Cholesterol 229.5 (44.25) 247.5 (70)
BloodSugar 2 2
MaximumHR 163 (25.75) 141 (31)
ExerciseInducedAngina No 77 (83.7%) 52 (45.61%)
Yes 15 (16.3%) 62 (54.39%)

##Association statistics A typical practice is to add association metrics to a summary table, such as p-values, intended to evaluate the strength of relationship between a stratification column and the variables. The argument associations takes a list of functions, evaluates them for each variable/strata combination, and places the result in the table. Metrics are only computed if column strata are present. If there are also row strata present, the metrics will be computed within each group.

#Define functions to add to table
pvalues <- 
    function(
        y, #Will be the strata variable
        x #Each other variable
    ) {

        #Different tests depending on type
        if(is(x, "numeric")) {
            wilcox.test(x~y)$p.value
        } else {
            fisher.test(table(x, y))$p.value
        }

    }

#Supply function to table
heart_disease %>%
    univariate_table(
        strata = ~HeartDisease,
        add_n = TRUE,
        associations = pvalues
    )
Variable Level No (N=164) Yes (N=139) 1
Age 52 (14.25) 58 (10) 3.91727270669452e-05
Sex Female 72 (43.9%) 25 (17.99%) 1.25894390015198e-06
Male 92 (56.1%) 114 (82.01%)
ChestPain Typical angina 16 (9.76%) 7 (5.04%) 1.08625481176028e-18
Atypical angina 41 (25%) 9 (6.47%)
Non-anginal pain 68 (41.46%) 18 (12.95%)
Asymptomatic 39 (23.78%) 105 (75.54%)
BP 130 (20) 130 (25) 0.0259721071731445
Cholesterol 234.5 (58.5) 249 (66) 0.0353592013733774
BloodSugar 2 2 0.746257200983864
MaximumHR 161 (23.25) 142 (31.5) 1.86101461129144e-13
ExerciseInducedAngina No 141 (85.98%) 63 (45.32%) 4.02917658499919e-14
Yes 23 (14.02%) 76 (54.68%)
#Make a named list to name the column
metrics <- list(`P-value` = pvalues)
heart_disease %>%
    univariate_table(
        strata = ~HeartDisease,
        add_n = TRUE,
        associations = metrics
    )
Variable Level No (N=164) Yes (N=139) P-value
Age 52 (14.25) 58 (10) 3.91727270669452e-05
Sex Female 72 (43.9%) 25 (17.99%) 1.25894390015198e-06
Male 92 (56.1%) 114 (82.01%)
ChestPain Typical angina 16 (9.76%) 7 (5.04%) 1.08625481176028e-18
Atypical angina 41 (25%) 9 (6.47%)
Non-anginal pain 68 (41.46%) 18 (12.95%)
Asymptomatic 39 (23.78%) 105 (75.54%)
BP 130 (20) 130 (25) 0.0259721071731445
Cholesterol 234.5 (58.5) 249 (66) 0.0353592013733774
BloodSugar 2 2 0.746257200983864
MaximumHR 161 (23.25) 142 (31.5) 1.86101461129144e-13
ExerciseInducedAngina No 141 (85.98%) 63 (45.32%) 4.02917658499919e-14
Yes 23 (14.02%) 76 (54.68%)
#Add additional function to list that computes the AIC of a logistic regression model
metrics$AIC <- 
    function(y, x) AIC(glm(factor(y)~x, family = "binomial"))
heart_disease %>%
    univariate_table(
        strata = ~HeartDisease,
        add_n = TRUE,
        associations = metrics
    )
Variable Level No (N=164) Yes (N=139) P-value AIC
Age 52 (14.25) 58 (10) 3.91727270669452e-05 406.53555135163
Sex Female 72 (43.9%) 25 (17.99%) 1.25894390015198e-06 397.932854578273
Male 92 (56.1%) 114 (82.01%)
ChestPain Typical angina 16 (9.76%) 7 (5.04%) 1.08625481176028e-18 339.86455352939
Atypical angina 41 (25%) 9 (6.47%)
Non-anginal pain 68 (41.46%) 18 (12.95%)
Asymptomatic 39 (23.78%) 105 (75.54%)
BP 130 (20) 130 (25) 0.0259721071731445 415.027967902517
Cholesterol 234.5 (58.5) 249 (66) 0.0353592013733774 419.776573596341
BloodSugar 2 2 0.746257200983864 421.789178189058
MaximumHR 161 (23.25) 142 (31.5) 1.86101461129144e-13 364.902105031567
ExerciseInducedAngina No 141 (85.98%) 63 (45.32%) 4.02917658499919e-14 363.537221546849
Yes 23 (14.02%) 76 (54.68%)
#Compute metrics across salary within sex
heart_disease %>%
    univariate_table(
        strata = Sex~HeartDisease,
        add_n = TRUE,
        associations = metrics
    )
Variable Level No (N=164) Yes (N=139) P-value AIC
Female (N=97)
Age 54 (17.25) 60 (5) 0.0398442516740459 110.131195630795
ChestPain Typical angina 4 (5.56%) 0 (0%) 3.06061547795029e-07 84.6909093658332
Atypical angina 16 (22.22%) 2 (8%)
Non-anginal pain 34 (47.22%) 1 (4%)
Asymptomatic 18 (25%) 22 (88%)
BP 130 (20.5) 140 (28) 0.000395706006314433 98.4612833810574
Cholesterol 249 (78.75) 268 (71) 0.103293588251319 113.103340772899
BloodSugar 2 2 0.0715461243557009 110.96313878098
MaximumHR 159 (20.5) 146 (24) 0.012407587301334 109.340618580244
ExerciseInducedAngina No 64 (88.89%) 11 (44%) 1.69588923622621e-05 95.3736844105057
Yes 8 (11.11%) 14 (56%)
Male (N=206)
Age 52 (13) 57.5 (10) 2.35520048183124e-05 270.025672435557
ChestPain Typical angina 12 (13.04%) 7 (6.14%) 4.84907141302052e-12 236.189012884494
Atypical angina 25 (27.17%) 7 (6.14%)
Non-anginal pain 34 (36.96%) 17 (14.91%)
Asymptomatic 21 (22.83%) 83 (72.81%)
BP 130 (20) 130 (20) 0.431611379617407 286.264205286785
Cholesterol 229.5 (44.25) 247.5 (70) 0.0124440883774021 281.249277382165
BloodSugar 2 2 0.446533653014522 286.479455093556
MaximumHR 163 (25.75) 141 (31) 1.61928796484918e-12 231.906181238564
ExerciseInducedAngina No 77 (83.7%) 52 (45.61%) 1.20005825650629e-08 253.896083814185
Yes 15 (16.3%) 62 (54.39%)

##Custom string templates for summary statistics It may be of interest to present summary statistics other than the default, and in a different format. The numeric_summary, categorical_summary, and other_summary arguments take character vectors which allow any number of summaries to be added to the table in any format. Values are simply requested verbatim by name (i.e. “median (iqr) | mean (sd)” will provide a column in the table where the median, iqr, mean, and standard deviation are populatd with the result).

#Add summary columns for numeric data
heart_disease %>%
    univariate_table(
        numeric_summary = c(Median = "median", Mean = "mean")
    )
Variable Level Median Mean Summary
Age 56 54.44
Sex Female 97 (32.01%)
Male 206 (67.99%)
ChestPain Typical angina 23 (7.59%)
Atypical angina 50 (16.5%)
Non-anginal pain 86 (28.38%)
Asymptomatic 144 (47.52%)
BP 130 131.69
Cholesterol 241 246.69
BloodSugar 2
MaximumHR 153 149.61
ExerciseInducedAngina No 204 (67.33%)
Yes 99 (32.67%)
HeartDisease No 164 (54.13%)
Yes 139 (45.87%)
#Add a stratification variable
heart_disease %>%
    univariate_table(
        numeric_summary = c(Median = "median", Mean = "mean"),
        strata = ~HeartDisease
    )
No
Yes
Variable Level Median Mean Summary Median Mean Summary
Age 52 52.59 58 56.63
Sex Female 72 (43.9%) 25 (17.99%)
Male 92 (56.1%) 114 (82.01%)
ChestPain Typical angina 16 (9.76%) 7 (5.04%)
Atypical angina 41 (25%) 9 (6.47%)
Non-anginal pain 68 (41.46%) 18 (12.95%)
Asymptomatic 39 (23.78%) 105 (75.54%)
BP 130 129.25 130 134.57
Cholesterol 234.5 242.64 249 251.47
BloodSugar 2 2
MaximumHR 161 158.38 142 139.26
ExerciseInducedAngina No 141 (85.98%) 63 (45.32%)
Yes 23 (14.02%) 76 (54.68%)

The following strings are available by default:

These can be placed in a string template in any format, and will be replaced with the actual value when the function is called.

##Miscellaneous features Numerous other features are available to further customize the table:

See ?univariate_table for details.

#Core functions The functions used to implement different pieces of the specialized functions above were intentionally written to be generalizable and useful in other contexts.

heart_disease %>%
    divide(
        by = "Sex"
    )
## $Female
## # A tibble: 97 x 8
##      Age ChestPain    BP Cholesterol BloodSugar MaximumHR ExerciseInduced…
##    <dbl> <fct>     <dbl>       <dbl> <lgl>          <dbl> <fct>           
##  1    41 Atypical…   130         204 FALSE            172 No              
##  2    62 Asymptom…   140         268 FALSE            160 No              
##  3    57 Asymptom…   120         354 FALSE            163 Yes             
##  4    56 Atypical…   140         294 FALSE            153 No              
##  5    48 Non-angi…   130         275 FALSE            139 No              
##  6    58 Typical …   150         283 TRUE             162 No              
##  7    50 Non-angi…   120         219 FALSE            158 No              
##  8    58 Non-angi…   120         340 FALSE            172 No              
##  9    66 Typical …   150         226 FALSE            114 No              
## 10    69 Typical …   140         239 FALSE            151 No              
## # ... with 87 more rows, and 1 more variable: HeartDisease <fct>
## 
## $Male
## # A tibble: 206 x 8
##      Age ChestPain    BP Cholesterol BloodSugar MaximumHR ExerciseInduced…
##    <dbl> <fct>     <dbl>       <dbl> <lgl>          <dbl> <fct>           
##  1    63 Typical …   145         233 TRUE             150 No              
##  2    67 Asymptom…   160         286 FALSE            108 Yes             
##  3    67 Asymptom…   120         229 FALSE            129 Yes             
##  4    37 Non-angi…   130         250 FALSE            187 No              
##  5    56 Atypical…   120         236 FALSE            178 No              
##  6    63 Asymptom…   130         254 FALSE            147 No              
##  7    53 Asymptom…   140         203 TRUE             155 Yes             
##  8    57 Asymptom…   140         192 FALSE            148 No              
##  9    56 Non-angi…   130         256 TRUE             142 Yes             
## 10    44 Atypical…   120         263 FALSE            173 No              
## # ... with 196 more rows, and 1 more variable: HeartDisease <fct>
heart_disease %>%
    stratiply(
        strata = c("Sex", "HeartDisease"),
        f = function(x) 
            x %>% 
            select_if(is.numeric) %>% 
            map(mean, na.rm = TRUE),
        bind = TRUE,
        separate = TRUE
    )   
## # A tibble: 4 x 6
##   Sex    HeartDisease   Age    BP Cholesterol MaximumHR
##   <chr>  <chr>        <dbl> <dbl>       <dbl>     <dbl>
## 1 Female No            54.6  129.        257.      154.
## 2 Male   No            51.0  130.        232.      162.
## 3 Female Yes           59.1  147.        276.      143.
## 4 Male   Yes           56.1  132.        246.      138.
#Create a frame of summaries
temp_summary <-

    heart_disease %>%
    group_by(
        Sex,
        HeartDisease,
        BloodSugar
    ) %>%
    summarise(
        Mean = mean(Age, na.rm = TRUE),
        SD = sd(Age, na.rm = TRUE),
        Median = median(Age, na.rm = TRUE)
    ) %>%
    ungroup() 

#Span summaries for each combination of Sex and BloodSugar
temp_summary %>%
    stretch(
        keys = c("Sex", "BloodSugar"),
        keep = "HeartDisease"
    )
## # A tibble: 2 x 13
##   HeartDisease Mean_Female_FAL… SD_Female_FALSE Median_Female_F…
##   <fct>                   <dbl>           <dbl>            <dbl>
## 1 No                       54.0           10.4              53.5
## 2 Yes                      59.7            3.61             61  
## # ... with 9 more variables: Mean_Male_FALSE <dbl>, SD_Male_FALSE <dbl>,
## #   Median_Male_FALSE <dbl>, Mean_Female_TRUE <dbl>, SD_Female_TRUE <dbl>,
## #   Median_Female_TRUE <dbl>, Mean_Male_TRUE <dbl>, SD_Male_TRUE <dbl>,
## #   Median_Male_TRUE <dbl>
#Clean HTML table with keys spanned over columns
result <- 
    temp_summary %>%
    stretch(
        keys = c("Sex", "BloodSugar"),
        keep = "HeartDisease",
        extract_keys_as_header = TRUE,
        keep_keys_in_header = FALSE
    )
result$.result %>%
    knitr::kable(format = "html") %>%
    kableExtra::kable_styling() %>%
    kableExtra::add_header_above(
        kableExtra::auto_index(result$.header)
    )
Female_FALSE
Male_FALSE
Female_TRUE
Male_TRUE
HeartDisease Mean SD Median Mean SD Median Mean SD Median Mean SD Median
No 54.03030 10.408532 53.5 50.21333 8.819961 51.0 60.33333 6.653320 59 54.70588 6.761700 53
Yes 59.68421 3.606362 61.0 55.75510 8.560351 57.5 57.16667 7.808115 58 58.12500 7.116881 58
heart_disease %>%
    dish(
        f =
            function(y, x) {
                mod <- lm(y ~ x)
                tibble(
                    Parameter = names(mod$coef),
                    Estimate = mod$coef
                )
            },
        left = c("Age", "BP"),
        bind = TRUE
    )
## # A tibble: 32 x 4
##    .left .right      Parameter         Estimate
##    <chr> <chr>       <chr>                <dbl>
##  1 Age   Sex         (Intercept)        55.7   
##  2 Age   Sex         xMale              -1.89  
##  3 Age   ChestPain   (Intercept)        55.9   
##  4 Age   ChestPain   xAtypical angina   -4.51  
##  5 Age   ChestPain   xNon-anginal pain  -2.17  
##  6 Age   ChestPain   xAsymptomatic      -0.147 
##  7 Age   Cholesterol (Intercept)        45.4   
##  8 Age   Cholesterol x                   0.0365
##  9 Age   BloodSugar  (Intercept)        54.0   
## 10 Age   BloodSugar  xTRUE               3.01  
## # ... with 22 more rows
absorb(
    key = c("mean", "sd", "var"),
    value = c("10", "2", "4"),
    text = 
        c("MEAN: mean, SD: sd",
          "VAR: var = sd^2",
          MEAN = "mean"
        )
)
##                                                  MEAN 
## "MEAN: 10, SD: 2"    "VAR: 4 = 2^2"              "10"
heart_disease %>%

    #Compute means and medians on numeric data
    typly(
        c("numeric", "logical"),
        list(
            mean = mean,
            median = median
        ),
        keep = TRUE,
        na.rm = TRUE
    ) %>%

    #Compute table
    typly(
        "factor",
        table,
        keep = TRUE
    )
## $Age
## $Age$mean
## [1] 54.43894
## 
## $Age$median
## [1] 56
## 
## 
## $Sex
## .x
## Female   Male 
##     97    206 
## 
## $ChestPain
## .x
##   Typical angina  Atypical angina Non-anginal pain     Asymptomatic 
##               23               50               86              144 
## 
## $BP
## $BP$mean
## [1] 131.6898
## 
## $BP$median
## [1] 130
## 
## 
## $Cholesterol
## $Cholesterol$mean
## [1] 246.6931
## 
## $Cholesterol$median
## [1] 241
## 
## 
## $BloodSugar
## $BloodSugar$mean
## [1] 0.1485149
## 
## $BloodSugar$median
## [1] FALSE
## 
## 
## $MaximumHR
## $MaximumHR$mean
## [1] 149.6073
## 
## $MaximumHR$median
## [1] 153
## 
## 
## $ExerciseInducedAngina
## .x
##  No Yes 
## 204  99 
## 
## $HeartDisease
## .x
##  No Yes 
## 164 139
heart_disease %>%
    descriptives(
        f_numeric = 
            list(
                cv = function(x, na.rm) sd(x, na.rm = na.rm)/mean(x, na.rm = na.rm)
            )
    )
## # A tibble: 102 x 7
##    .variable .key      .value .label     .level .order .combo    
##    <chr>     <chr>      <dbl> <chr>      <chr>   <int> <chr>     
##  1 Age       length    303    <NA>       <NA>       NA 303       
##  2 Age       missing     0    <NA>       <NA>       NA 0         
##  3 Age       available 303    <NA>       <NA>       NA 303       
##  4 Age       class      NA    numeric    <NA>       NA numeric   
##  5 Age       unique     41    <NA>       <NA>       NA 41        
##  6 Age       evaluated  NA    continuous <NA>       NA continuous
##  7 Age       mean       54.4  <NA>       <NA>       NA 54.44     
##  8 Age       sd          9.04 <NA>       <NA>       NA 9.04      
##  9 Age       min        29    <NA>       <NA>       NA 29        
## 10 Age       median     56    <NA>       <NA>       NA 56        
## # ... with 92 more rows
#Make a list of functions
f <-
    list(

        #Compute a univariate p-value
        `P-value` =

            function(x, y) {

                if(type_match(y, c("factor", "character"))) {

                    p <- fisher.test(factor(x), factor(y), simulate.p.value = TRUE)$p.value


                } else {

                    p <- kruskal.test(y, factor(x))$p.value

                }
                if_else(
                    p < 0.001, "<0.001", as.character(round(p, 2))
                )
            },

        #Compute difference in AIC model between null model and one predictor model
        `AIC Difference` =
            function(x, y) {

                glm(factor(x)~1, family = "binomial")$aic - 
                glm(factor(x)~y, family = "binomial")$aic

            }
    )

#1) Apply functions to Sex/HeartDisease by all other variables
heart_disease %>%
    univariate_associations(
        f = f,
        responses = c("Sex", "HeartDisease")
    )
## # A tibble: 14 x 4
##    .left        .variable             `P-value` `AIC Difference`
##    <fct>        <chr>                 <chr>                <dbl>
##  1 Sex          Age                   0.08                 0.910
##  2 Sex          ChestPain             0.08                 0.958
##  3 Sex          BP                    0.29                -0.752
##  4 Sex          Cholesterol           0.01                10.0  
##  5 Sex          BloodSugar            0.41                -1.29 
##  6 Sex          MaximumHR             0.43                -1.28 
##  7 Sex          ExerciseInducedAngina 0.01                 4.72 
##  8 HeartDisease Age                   <0.001              13.4  
##  9 HeartDisease ChestPain             <0.001              80.1  
## 10 HeartDisease BP                    0.03                 4.95 
## 11 HeartDisease Cholesterol           0.04                 0.206
## 12 HeartDisease BloodSugar            0.66                -1.81 
## 13 HeartDisease MaximumHR             <0.001              55.1  
## 14 HeartDisease ExerciseInducedAngina <0.001              56.4