+ - 0:00:00
Notes for current slide
Notes for next slide

Review of the third day’s material

Luke Johnston

João Santiago

1 / 12

Reproducible documents

2 / 12

Markdown syntax

---
title: "Document title"
author: Your Name
---
# Header 1
## Header 2
### Header 3
Text **bold**, *italics*
- list 1
- list 2
3 / 12

Markdown syntax

---
title: "Document title"
author: Your Name
---
# Header 1
## Header 2
### Header 3
Text **bold**, *italics*
- list 1
- list 2
1. number 1
2. number 2
Footnote[^1]
[^1]: Footnote content
![image caption](path/to/image.png)
[Link](https://google.com)
| | Column 1 | Column 2 |
|:--|----:|--------:|
| Row 1 | Cell | Cell |
| Row 2 | Cell | Cell |
3 / 12

R Markdown code chunks

---
title: "My report"
author: "Me!"
bibliography: my_references.bib
output:
html_document:
theme: sandstone
---
Cite: [@Hoejsgaard2006a]
Code chunk.
```{r chunk-label, chunk.options}
# R Code here
plot(iris)
library(knitr)
# Table
kable(iris)
```
4 / 12

Efficient coding

5 / 12

read_csv repeats twice, wrangling four times

nhanes_2009 <- read_csv(here::here("data/nhanes-2009_10.csv"))
nhanes_2009 %>%
mutate(ProblemBMIs = !between(BMI, 18.5, 40)) %>%
filter(!is.na(ProblemBMIs)) %>%
select(Age, Poverty, Pulse, BPSysAve, BPDiaAve, TotChol,
SleepHrsNight, PhysActiveDays, ProblemBMIs) %>%
gather(Measurement, Value, -ProblemBMIs) %>%
na.omit() %>%
ggplot(aes(y = Value, x = Measurement, colour = ProblemBMIs)) +
geom_jitter(position = position_dodge(width = 0.6)) +
scale_color_viridis_d(end = 0.8) +
labs(y = "", x = "") +
theme_minimal() +
theme(legend.position = c(0.85, 0.85)) +
coord_flip()
6 / 12

jitter repeats twice

nhanes_2009 <- read_csv(here::here("data/nhanes-2009_10.csv"))
nhanes_2009 %>%
mutate(ProblemBMIs = !between(BMI, 18.5, 40)) %>%
filter(!is.na(ProblemBMIs)) %>%
select(Age, Poverty, Pulse, BPSysAve, BPDiaAve, TotChol,
SleepHrsNight, PhysActiveDays, ProblemBMIs) %>%
gather(Measurement, Value, -ProblemBMIs) %>%
na.omit() %>%
ggplot(aes(y = Value, x = Measurement, colour = ProblemBMIs)) +
geom_jitter(position = position_dodge(width = 0.6)) +
scale_color_viridis_d(end = 0.8) +
labs(y = "", x = "") +
theme_minimal() +
theme(legend.position = c(0.85, 0.85)) +
coord_flip()
7 / 12

Move code into functions

read_mutate_gather <- function(.file_path) {
.file_path %>%
read_csv() %>%
mutate(ProblemBMIs = !between(BMI, 18.5, 40)) %>%
filter(!is.na(ProblemBMIs)) %>%
select(Age, Poverty, Pulse, BPSysAve, BPDiaAve, TotChol,
SleepHrsNight, PhysActiveDays, ProblemBMIs) %>%
gather(Measurement, Value, -ProblemBMIs) %>%
na.omit()
}
plot_jitter <- function(.dataset) {
.dataset %>%
ggplot(aes(y = Value, x = Measurement, colour = ProblemBMIs)) +
geom_jitter(position = position_dodge(width = 0.6)) +
scale_color_viridis_d(end = 0.8) +
labs(y = "", x = "") +
theme_minimal() +
theme(legend.position = c(0.85, 0.85)) +
coord_flip()
}
8 / 12

density repeats twice

nhanes_2009 %>%
mutate(ProblemBMIs = !between(BMI, 18.5, 40)) %>%
filter(!is.na(ProblemBMIs)) %>%
select(Age, Poverty, Pulse, BPSysAve, BPDiaAve, TotChol,
SleepHrsNight, PhysActiveDays, ProblemBMIs) %>%
gather(Measurement, Value, -ProblemBMIs) %>%
na.omit() %>%
ggplot(aes(x = Value, fill = ProblemBMIs)) +
geom_density(alpha = 0.35) +
facet_wrap(~Measurement, scales = "free") +
scale_fill_viridis_d(end = 0.8) +
labs(y = "", x = "") +
theme_minimal() +
theme(legend.position = c(0.85, 0.15),
strip.text = element_text(face = "bold"))
9 / 12

Move code into functions

plot_density <- function(.dataset) {
.dataset %>%
ggplot(aes(x = Value, fill = ProblemBMIs)) +
geom_density(alpha = 0.35) +
facet_wrap(~Measurement, scales = "free") +
scale_fill_viridis_d(end = 0.8) +
labs(y = "", x = "") +
theme_minimal() +
theme(legend.position = c(0.85, 0.15),
strip.text = element_text(face = "bold"))
}
10 / 12

Two dataframes, two figures each.

# Start with file paths:
files <-
c(here::here("data/nhanes-2009_10.csv"),
here::here("data/nhanes-2011_12.csv"))
11 / 12

Two dataframes, two figures each.

# Start with file paths:
files <-
c(here::here("data/nhanes-2009_10.csv"),
here::here("data/nhanes-2011_12.csv"))
# Apply wrangling to each data file:
data_list <-
files %>%
map(read_mutate_gather)
11 / 12

Two dataframes, two figures each.

# Start with file paths:
files <-
c(here::here("data/nhanes-2009_10.csv"),
here::here("data/nhanes-2011_12.csv"))
# Apply wrangling to each data file:
data_list <-
files %>%
map(read_mutate_gather)
# Apply figure to each data file:
# Plot the jitters
map(data_list, plot_jitter)
# Plot the density
map(data_list, plot_density)
11 / 12

Parallel processing

# Start with file paths:
files <-
c(here::here("data/nhanes-2009_10.csv"),
here::here("data/nhanes-2011_12.csv"))
library(furrr)
plan(multiprocess)
# Apply wrangling to each data file:
data_list <-
files %>%
future_map(read_mutate_gather)
# Apply figure to each data file:
# Plot the jitters
future_map(data_list, plot_jitter)
# Plot the density
future_map(data_list, plot_density)
12 / 12

Reproducible documents

2 / 12
Paused

Help

Keyboard shortcuts

, , Pg Up, k Go to previous slide
, , Pg Dn, Space, j Go to next slide
Home Go to first slide
End Go to last slide
Number + Return Go to specific slide
b / m / f Toggle blackout / mirrored / fullscreen mode
c Clone slideshow
p Toggle presenter mode
t Restart the presentation timer
?, h Toggle this help
Esc Back to slideshow