fakir examples

library(fakir)
library(dplyr)
library(ggplot2)
library(sf)

Fake client database

The database fakes an after-sale client database for a Phone company. There is:

fake_ticket_client(vol = 10)
#> old-style crs object detected; please recreate object with a recent sf::st_crs()
#> # A tibble: 10 × 25
#>    ref           num_client first    last  job     age region id_dpt departement
#>    <chr>         <chr>      <chr>    <chr> <chr> <dbl> <chr>  <chr>  <chr>      
#>  1 DOSS-AMQN-002 79         Jovan    O'Ke… Gene…    22 Île-d… 77     <NA>       
#>  2 DOSS-NCKJ-010 69         Miss     Lean… Emer…    68 <NA>   25     Doubs      
#>  3 DOSS-GPBE-009 120        Odell    Stok… Engi…    24 <NA>   17     Charente-M…
#>  4 DOSS-GRLN-001 31         Loren    Lars… <NA>     NA <NA>   33     <NA>       
#>  5 DOSS-LEPJ-004 59         Maybelle Maye… Furt…    18 <NA>   33     <NA>       
#>  6 DOSS-DUCL-005 118        Jamarion Ober… Engi…    18 Langu… 48     <NA>       
#>  7 DOSS-OCED-003 77         Lee      Scha… Admi…    NA Poito… 17     Charente-M…
#>  8 DOSS-KXSJ-007 65         Demetric Auer  Cont…    21 Pays … 49     <NA>       
#>  9 DOSS-UITD-006 141        Wilfrid  Harv… Educ…    53 <NA>   81     Tarn       
#> 10 DOSS-SHKL-008 182        Addyson  Nien… Earl…    65 Poito… 17     Charente-M…
#> # ℹ 16 more variables: cb_provider <chr>, name <chr>, entry_date <dttm>,
#> #   fidelity_points <dbl>, priority_encoded <dbl>, priority <fct>,
#> #   timestamp <date>, year <dbl>, month <dbl>, day <int>, supported <chr>,
#> #   supported_encoded <int>, type <chr>, type_encoded <int>, state <fct>,
#> #   source_call <fct>
tickets_db <- fake_ticket_client(vol = 100, split = TRUE)
#> old-style crs object detected; please recreate object with a recent sf::st_crs()
tickets_db
#> $clients
#> # A tibble: 200 × 14
#>    num_client first   last     job     age region id_dpt departement cb_provider
#>  * <chr>      <chr>   <chr>    <chr> <dbl> <chr>  <chr>  <chr>       <chr>      
#>  1 1          Solomon Heaney   Civi…    53 Champ… 51     Marne       Diners Clu…
#>  2 2          Karma   William… Scie…    81 Auver… 63     Puy-de-Dôme VISA 13 di…
#>  3 3          Press   Kulas    Anim…    NA Prove… 06     Alpes-Mari… <NA>       
#>  4 4          Laken   McDermo… <NA>     NA Breta… 56     Morbihan    <NA>       
#>  5 5          Sydnie  Jaskols… Hort…    30 Centre 36     <NA>        <NA>       
#>  6 6          Clayton Runolfs… Comm…    NA Prove… 04     <NA>        Diners Clu…
#>  7 7          Roberta Purdy-W… Fina…    60 Île-d… 91     Essonne     <NA>       
#>  8 8          Dr.     RonaldM… Astr…    30 Rhône… 42     Loire       <NA>       
#>  9 9          Miss    Alondra… Occu…    18 Aquit… 24     Dordogne    Diners Clu…
#> 10 10         Vernice Ondrick… Clin…    19 Limou… 87     Haute-Vien… <NA>       
#> # ℹ 190 more rows
#> # ℹ 5 more variables: name <chr>, entry_date <dttm>, fidelity_points <dbl>,
#> #   priority_encoded <dbl>, priority <fct>
#> 
#> $tickets
#> # A tibble: 100 × 10
#>    ref            num_client  year month   day timestamp  supported type   state
#>    <chr>          <chr>      <dbl> <dbl> <int> <date>     <chr>     <chr>  <fct>
#>  1 DOSS-GFEL-0028 1           2016    12    21 2016-12-21 Non       Insta… Term…
#>  2 DOSS-UWYV-0016 22          2020    10    12 2020-10-12 Non       Insta… Atte…
#>  3 DOSS-DKFC-0073 9           2020    11    16 2020-11-16 Non       Insta… Term…
#>  4 DOSS-SAYJ-0047 8           2020    12     1 2020-12-01 Non       Box    Atte…
#>  5 DOSS-GSMZ-0080 30          2020    12    18 2020-12-18 Oui       Insta… Inte…
#>  6 DOSS-UIOZ-0085 10          2020    12    30 2020-12-30 Oui       Insta… Atte…
#>  7 DOSS-DSMI-0065 37          2021     1    27 2021-01-27 Non       Ligne  Atte…
#>  8 DOSS-JOYV-0029 37          2021     3    19 2021-03-19 Non       Box    Atte…
#>  9 DOSS-WPSG-0013 24          2021     3    26 2021-03-26 Non       <NA>   En c…
#> 10 DOSS-NHFG-0036 12          2021     4    12 2021-04-12 Non       Insta… Atte…
#> # ℹ 90 more rows
#> # ℹ 1 more variable: source_call <fct>
ggplot(tickets_db$clients) +
  aes(x = entry_date, y = fidelity_points) +
  geom_point() +
  geom_smooth()
#> `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

ggplot(tickets_db$tickets) +
  aes(x = type) +
  geom_bar()

ggplot(tickets_db$tickets) +
  aes(x = state) +
  geom_bar()

clients_map <- tickets_db$clients %>%
  group_by(id_dpt) %>%
  summarise(
    number_of_clients = n(),
    average_fidelity = mean(fidelity_points, na.rm = TRUE)
  ) %>%
  full_join(fra_sf, by = "id_dpt") %>%
  st_sf()
#> old-style crs object detected; please recreate object with a recent sf::st_crs()

ggplot(clients_map) +
  geom_sf(aes(fill = average_fidelity)) +
  scale_fill_viridis_c() +
  coord_sf(
    crs = 2154,
    datum = 4326
  )

Fake products

count(
  fake_products(10),
  category
)
#> # A tibble: 7 × 2
#>   category             n
#>   <chr>            <int>
#> 1 Awesome              1
#> 2 Entertainment        1
#> 3 Fitness              1
#> 4 Industrial           1
#> 5 Lifestyle            3
#> 6 Medical              2
#> 7 Pets and Animals     1

Fake website visits

fake_visits(
  from = "2017-01-01",
  to = "2017-01-31"
)
#> # A tibble: 31 × 8
#>    timestamp   year month   day  home about  blog contact
#>  * <date>     <dbl> <dbl> <int> <int> <int> <int>   <int>
#>  1 2017-01-01  2017     1     1   369   220   404     210
#>  2 2017-01-02  2017     1     2   159   250   414     490
#>  3 2017-01-03  2017     1     3   436   170   498     456
#>  4 2017-01-04  2017     1     4    NA   258   526     392
#>  5 2017-01-05  2017     1     5   362    NA   407     291
#>  6 2017-01-06  2017     1     6   245   145   576      90
#>  7 2017-01-07  2017     1     7    NA    NA   484     167
#>  8 2017-01-08  2017     1     8   461   103   441      NA
#>  9 2017-01-09  2017     1     9   337   113   673     379
#> 10 2017-01-10  2017     1    10    NA   169   308     139
#> # ℹ 21 more rows

Fake questionnaire on mean of transport / goal

fake_survey_answers(n = 10)
#> old-style crs object detected; please recreate object with a recent sf::st_crs()
#> # A tibble: 30 × 12
#>    id_individu   age sexe  region        id_departement nom_departement  
#>    <chr>       <int> <chr> <chr>         <chr>          <chr>            
#>  1 ID-NYDZ-010    NA <NA>  <NA>          55             <NA>             
#>  2 ID-NYDZ-010    NA <NA>  <NA>          55             <NA>             
#>  3 ID-NYDZ-010    NA <NA>  <NA>          55             <NA>             
#>  4 ID-PWLB-009    71 F     Rhône-Alpes   38             Isère            
#>  5 ID-PWLB-009    71 F     Rhône-Alpes   38             Isère            
#>  6 ID-PWLB-009    71 F     Rhône-Alpes   38             Isère            
#>  7 ID-NMQG-001    42 M     Midi-Pyrénées 82             Tarn-et-Garonne  
#>  8 ID-NMQG-001    42 M     Midi-Pyrénées 82             Tarn-et-Garonne  
#>  9 ID-NMQG-001    42 M     Midi-Pyrénées 82             Tarn-et-Garonne  
#> 10 ID-RJXN-002    71 O     <NA>          17             Charente-Maritime
#> # ℹ 20 more rows
#> # ℹ 6 more variables: question_date <dttm>, year <dbl>, type <chr>,
#> #   distance_km <dbl>, transport <fct>, temps_trajet_en_heures <dbl>
fake_survey_answers(n = 10, split = TRUE)
#> old-style crs object detected; please recreate object with a recent sf::st_crs()
#> $individus
#> # A tibble: 10 × 8
#>    id_individu   age sexe  region                 id_departement nom_departement
#>    <chr>       <int> <chr> <chr>                  <chr>          <chr>          
#>  1 ID-NYDZ-010    NA <NA>  Basse-Normandie        14             Calvados       
#>  2 ID-PWLB-009    71 F     Corse                  2A             Corse-du-Sud   
#>  3 ID-NMQG-001    42 M     <NA>                   68             Haut-Rhin      
#>  4 ID-RJXN-002    71 O     Rhône-Alpes            01             Ain            
#>  5 ID-MROK-007    41 M     Basse-Normandie        14             Calvados       
#>  6 ID-VMKS-004    33 O     Lorraine               54             Meurthe-et-Mos…
#>  7 ID-XEMZ-003    81 O     Provence-Alpes-Côte d… 84             Vaucluse       
#>  8 ID-EUDQ-005    44 M     Champagne-Ardenne      10             <NA>           
#>  9 ID-DCIZ-008    92 O     Aquitaine              64             Pyrénées-Atlan…
#> 10 ID-KPUS-006    57 O     <NA>                   54             Meurthe-et-Mos…
#> # ℹ 2 more variables: question_date <dttm>, year <dbl>
#> 
#> $answers
#> # A tibble: 30 × 5
#>    id_individu type      distance_km transport temps_trajet_en_heures
#>    <chr>       <chr>           <dbl> <fct>                      <dbl>
#>  1 ID-NYDZ-010 travail         12.2  voiture                     0.15
#>  2 ID-NYDZ-010 commerces        9.61 bus                         1.01
#>  3 ID-NYDZ-010 loisirs        549.   avion                       0.27
#>  4 ID-PWLB-009 travail         11.9  voiture                     0.14
#>  5 ID-PWLB-009 commerces       27.4  voiture                     0.34
#>  6 ID-PWLB-009 loisirs        210.   train                       0.42
#>  7 ID-NMQG-001 travail          2.38 velo                        0.43
#>  8 ID-NMQG-001 commerces       14.9  voiture                     0.18
#>  9 ID-NMQG-001 loisirs        446.   train                       0.89
#> 10 ID-RJXN-002 travail          6.18 mobylette                   0.75
#> # ℹ 20 more rows

fake transport use

answers <- fake_survey_answers(n = 30)
#> old-style crs object detected; please recreate object with a recent sf::st_crs()
answers
#> # A tibble: 90 × 12
#>    id_individu   age sexe  region             id_departement nom_departement
#>    <chr>       <int> <chr> <chr>              <chr>          <chr>          
#>  1 ID-MROK-007    NA M     Nord-Pas-de-Calais 62             Pas-de-Calais  
#>  2 ID-MROK-007    NA M     Nord-Pas-de-Calais 62             Pas-de-Calais  
#>  3 ID-MROK-007    NA M     Nord-Pas-de-Calais 62             Pas-de-Calais  
#>  4 ID-NYDZ-010    49 M     Midi-Pyrénées      82             Tarn-et-Garonne
#>  5 ID-NYDZ-010    49 M     Midi-Pyrénées      82             Tarn-et-Garonne
#>  6 ID-NYDZ-010    49 M     Midi-Pyrénées      82             Tarn-et-Garonne
#>  7 ID-HXOG-015    50 M     Bourgogne          71             <NA>           
#>  8 ID-HXOG-015    50 M     Bourgogne          71             <NA>           
#>  9 ID-HXOG-015    50 M     Bourgogne          71             <NA>           
#> 10 ID-MZNB-024    70 F     Aquitaine          47             Lot-et-Garonne 
#> # ℹ 80 more rows
#> # ℹ 6 more variables: question_date <dttm>, year <dbl>, type <chr>,
#> #   distance_km <dbl>, transport <fct>, temps_trajet_en_heures <dbl>

ggplot(answers) +
  aes(age, log(distance_km), colour = type) +
  geom_point() +
  geom_smooth() +
  facet_wrap(~type, scales = "free_y")
#> `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#> Warning: Removed 6 rows containing non-finite values (`stat_smooth()`).
#> Warning: Removed 6 rows containing missing values (`geom_point()`).