Lecture 10
library(patchwork)
p1 = ggplot(palmerpenguins::penguins) +
geom_boxplot(aes(x = island, y = body_mass_g))
p2 = ggplot(palmerpenguins::penguins) +
geom_boxplot(aes(x = species, y = body_mass_g))
p3 = ggplot(palmerpenguins::penguins) +
geom_point(aes(x = flipper_length_mm, y = body_mass_g, color = sex))
p4 = ggplot(palmerpenguins::penguins) +
geom_point(aes(x = bill_length_mm, y = body_mass_g, color = sex))
airq = airquality
airq$Month = month.name[airq$Month]
ggplot(
airq,
aes(Day, Temp, group = Month)
) +
geom_line() +
geom_segment(
aes(xend = 31, yend = Temp),
linetype = 2,
colour = 'grey'
) +
geom_point(size = 2) +
geom_text(
aes(x = 31.1, label = Month),
hjust = 0
) +
gganimate::transition_reveal(Day) +
coord_cartesian(clip = 'off') +
labs(
title = 'Temperature in New York',
y = 'Temperature (°F)'
) +
theme_minimal() +
theme(plot.margin = margin(5.5, 40, 5.5, 5.5))
# A tibble: 11 × 8
x1 x2 x3 x4 y1 y2 y3 y4
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 10 10 10 8 8.04 9.14 7.46 6.58
2 8 8 8 8 6.95 8.14 6.77 5.76
3 13 13 13 8 7.58 8.74 12.7 7.71
4 9 9 9 8 8.81 8.77 7.11 8.84
5 11 11 11 8 8.33 9.26 7.81 8.47
6 14 14 14 8 9.96 8.1 8.84 7.04
7 6 6 6 8 7.24 6.13 6.08 5.25
8 4 4 4 19 4.26 3.1 5.39 12.5
9 12 12 12 8 10.8 9.13 8.15 5.56
10 7 7 7 8 4.82 7.26 6.42 7.91
11 5 5 5 8 5.68 4.74 5.73 6.89
(tidy_anscombe = datasets::anscombe %>%
pivot_longer(everything(), names_sep = 1, names_to = c("var", "group")) %>%
pivot_wider(id_cols = group, names_from = var,
values_from = value, values_fn = list(value = list)) %>%
unnest(cols = c(x,y)))
# A tibble: 44 × 3
group x y
<chr> <dbl> <dbl>
1 1 10 8.04
2 1 8 6.95
3 1 13 7.58
4 1 9 8.81
5 1 11 8.33
6 1 14 9.96
7 1 6 7.24
8 1 4 4.26
9 1 12 10.8
10 1 7 4.82
# ℹ 34 more rows
tidy_anscombe %>%
group_by(group) %>%
summarize(
mean_x = mean(x), mean_y = mean(y),
sd_x = sd(x), sd_y = sd(y),
cor = cor(x,y), .groups = "drop"
)
# A tibble: 4 × 6
group mean_x mean_y sd_x sd_y cor
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 9 7.50 3.32 2.03 0.816
2 2 9 7.50 3.32 2.03 0.816
3 3 9 7.5 3.32 2.03 0.816
4 4 9 7.50 3.32 2.03 0.817
datasauRus::datasaurus_dozen %>%
group_by(dataset) %>%
summarize(mean_x = mean(x), mean_y = mean(y),
sd_x = sd(x), sd_y = sd(y),
cor = cor(x,y), .groups = "drop")
# A tibble: 13 × 6
dataset mean_x mean_y sd_x sd_y cor
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 away 54.3 47.8 16.8 26.9 -0.0641
2 bullseye 54.3 47.8 16.8 26.9 -0.0686
3 circle 54.3 47.8 16.8 26.9 -0.0683
4 dino 54.3 47.8 16.8 26.9 -0.0645
5 dots 54.3 47.8 16.8 26.9 -0.0603
6 h_lines 54.3 47.8 16.8 26.9 -0.0617
7 high_lines 54.3 47.8 16.8 26.9 -0.0685
8 slant_down 54.3 47.8 16.8 26.9 -0.0690
9 slant_up 54.3 47.8 16.8 26.9 -0.0686
10 star 54.3 47.8 16.8 26.9 -0.0630
11 v_lines 54.3 47.8 16.8 26.9 -0.0694
12 wide_lines 54.3 47.8 16.8 26.9 -0.0666
13 x_shape 54.3 47.8 16.8 26.9 -0.0656
Duke Library - Center for Data and Visualization Sciences - https://library.duke.edu/data/
Tidy tuesday - https://github.com/rfordatascience/tidytuesday
Flowing data - https://flowingdata.com/
Twitter - #dataviz, #tidytuesday
Books:
Above materials are derived in part from the following sources:
Sta 523 - Fall 2023