Review of the third day’s materialLuke JohnstonJoão Santiago1 / 12

Reproducible documents

2 / 12

Markdown syntax---
title: "Document title"
author: Your Name
---
# Header 1
## Header 2
### Header 3
Text **bold**, *italics*
- list 1
- list 2

3 / 12

Markdown syntax---
title: "Document title"
author: Your Name
---
# Header 1
## Header 2
### Header 3
Text **bold**, *italics*
- list 1
- list 2

1. number 1
2. number 2
Footnote[^1]
[^1]: Footnote content
![image caption](path/to/image.png)
[Link](https://google.com)
|   | Column 1 | Column 2 |
|:--|----:|--------:|
| Row 1 | Cell | Cell |
| Row 2 | Cell | Cell |

3 / 12

R Markdown code chunks

---
title: "My report"
author: "Me!"
bibliography: my_references.bib
output:
    html_document:
        theme: sandstone
---
Cite: [@Hoejsgaard2006a]
Code chunk.
```{r chunk-label, chunk.options}
# R Code here
plot(iris)
library(knitr)
# Table
kable(iris)
```

4 / 12

Efficient coding

5 / 12

read_csv repeats twice, wrangling four times

nhanes_2009 <- read_csv(here::here("data/nhanes-2009_10.csv"))
nhanes_2009 %>% 
    mutate(ProblemBMIs = !between(BMI, 18.5, 40)) %>%
    filter(!is.na(ProblemBMIs)) %>%
    select(Age, Poverty, Pulse, BPSysAve, BPDiaAve, TotChol,
           SleepHrsNight, PhysActiveDays, ProblemBMIs) %>%
    gather(Measurement, Value, -ProblemBMIs) %>%
    na.omit() %>%
    ggplot(aes(y = Value, x = Measurement, colour = ProblemBMIs)) +
    geom_jitter(position = position_dodge(width = 0.6)) +
    scale_color_viridis_d(end = 0.8) +
    labs(y = "", x = "") +
    theme_minimal() +
    theme(legend.position = c(0.85, 0.85)) +
    coord_flip()

6 / 12

jitter repeats twice

nhanes_2009 <- read_csv(here::here("data/nhanes-2009_10.csv")) 
nhanes_2009 %>% 
    mutate(ProblemBMIs = !between(BMI, 18.5, 40)) %>% 
    filter(!is.na(ProblemBMIs)) %>% 
    select(Age, Poverty, Pulse, BPSysAve, BPDiaAve, TotChol,
           SleepHrsNight, PhysActiveDays, ProblemBMIs) %>% 
    gather(Measurement, Value, -ProblemBMIs) %>% 
    na.omit() %>% 
    ggplot(aes(y = Value, x = Measurement, colour = ProblemBMIs)) +
    geom_jitter(position = position_dodge(width = 0.6)) +
    scale_color_viridis_d(end = 0.8) +
    labs(y = "", x = "") +
    theme_minimal() +
    theme(legend.position = c(0.85, 0.85)) +
    coord_flip()

7 / 12

Move code into functions

read_mutate_gather <- function(.file_path) {
    .file_path %>%
        read_csv() %>% 
        mutate(ProblemBMIs = !between(BMI, 18.5, 40)) %>% 
        filter(!is.na(ProblemBMIs)) %>% 
        select(Age, Poverty, Pulse, BPSysAve, BPDiaAve, TotChol,
               SleepHrsNight, PhysActiveDays, ProblemBMIs) %>% 
        gather(Measurement, Value, -ProblemBMIs) %>% 
        na.omit() 
} 
plot_jitter <- function(.dataset) {
    .dataset %>%
        ggplot(aes(y = Value, x = Measurement, colour = ProblemBMIs)) +
        geom_jitter(position = position_dodge(width = 0.6)) +
        scale_color_viridis_d(end = 0.8) +
        labs(y = "", x = "") +
        theme_minimal() +
        theme(legend.position = c(0.85, 0.85)) +
        coord_flip()
}

8 / 12

density repeats twice

nhanes_2009 %>% 
    mutate(ProblemBMIs = !between(BMI, 18.5, 40)) %>% 
    filter(!is.na(ProblemBMIs)) %>% 
    select(Age, Poverty, Pulse, BPSysAve, BPDiaAve, TotChol,
           SleepHrsNight, PhysActiveDays, ProblemBMIs) %>% 
    gather(Measurement, Value, -ProblemBMIs) %>% 
    na.omit() %>% 
    ggplot(aes(x = Value, fill = ProblemBMIs)) +
    geom_density(alpha = 0.35) +
    facet_wrap(~Measurement, scales = "free") +
    scale_fill_viridis_d(end = 0.8) +
    labs(y = "", x = "") +
    theme_minimal() +
    theme(legend.position = c(0.85, 0.15),
          strip.text = element_text(face = "bold"))

9 / 12

Move code into functions

plot_density <- function(.dataset) {
    .dataset %>%
        ggplot(aes(x = Value, fill = ProblemBMIs)) +
        geom_density(alpha = 0.35) +
        facet_wrap(~Measurement, scales = "free") +
        scale_fill_viridis_d(end = 0.8) +
        labs(y = "", x = "") +
        theme_minimal() +
        theme(legend.position = c(0.85, 0.15),
              strip.text = element_text(face = "bold")) 
}

10 / 12

Two dataframes, two figures each.

# Start with file paths:
files <- 
    c(here::here("data/nhanes-2009_10.csv"),
      here::here("data/nhanes-2011_12.csv"))

11 / 12

Two dataframes, two figures each.

# Start with file paths:
files <- 
    c(here::here("data/nhanes-2009_10.csv"),
      here::here("data/nhanes-2011_12.csv"))

# Apply wrangling to each data file:
data_list <- 
    files %>% 
    map(read_mutate_gather)

11 / 12

Two dataframes, two figures each.

# Start with file paths:
files <- 
    c(here::here("data/nhanes-2009_10.csv"),
      here::here("data/nhanes-2011_12.csv"))

# Apply wrangling to each data file:
data_list <- 
    files %>% 
    map(read_mutate_gather)

# Apply figure to each data file:
# Plot the jitters
map(data_list, plot_jitter)
# Plot the density
map(data_list, plot_density)

11 / 12

Parallel processing

# Start with file paths:
files <- 
    c(here::here("data/nhanes-2009_10.csv"),
      here::here("data/nhanes-2011_12.csv"))
library(furrr)
plan(multiprocess)
# Apply wrangling to each data file:
data_list <- 
    files %>% 
    future_map(read_mutate_gather)

# Apply figure to each data file:
# Plot the jitters
future_map(data_list, plot_jitter)
# Plot the density
future_map(data_list, plot_density)

↑, ←, Pg Up, k	Go to previous slide
↓, →, Pg Dn, Space, j	Go to next slide
Home	Go to first slide
End	Go to last slide
Number + Return	Go to specific slide
b / m / f	Toggle blackout / mirrored / fullscreen mode
c	Clone slideshow
p	Toggle presenter mode
t	Restart the presentation timer
?, h	Toggle this help

Review of the third day’s material

Luke Johnston

João Santiago

Reproducible documents

Markdown syntax

Markdown syntax

R Markdown code chunks

Efficient coding

read_csv repeats twice, wrangling four times

jitter repeats twice

Move code into functions

density repeats twice

Move code into functions

Two dataframes, two figures each.

Two dataframes, two figures each.

Two dataframes, two figures each.

Parallel processing

Reproducible documents

Help