##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## setup ----
##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#..........................load packages.........................
library(tidyverse)
#..........................import data...........................
drought <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2021/2021-07-20/drought.csv')
##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## wrangle drought data ----
##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drought_clean <- drought |>
# select cols of interest & update names for clarity (as needed) ----
select(date = valid_start, state_abb, drought_lvl, area_pct) |>
# add year, month & day cols using {lubridate} fxns ----
# NOTE: this step isn't necessary for our plot, but I'm including as examples of how to extract different date elements from a object of class Date using {lubridate} ----
mutate(year = year(date),
month = month(date, label = TRUE, abbr = TRUE),
day = day(date)) |>
# add drought level conditions names ----
mutate(drought_lvl_long = factor(drought_lvl,
levels = c("D4", "D3", "D2", "D1","D0", "None"),
labels = c("(D4) Exceptional", "(D3) Extreme",
"(D2) Severe", "(D1) Moderate",
"(D0) Abnormally Dry",
"No Drought"))) |>
# reorder cols ----
relocate(date, year, month, day, state_abb, drought_lvl, drought_lvl_long, area_pct)
##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## create stacked area plot of CA drought conditions through time ----
##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drought_clean |>
# remove drought_lvl "None" & filter for just CA ----
filter(drought_lvl != "None",
state_abb == "CA") |>
# initialize ggplot ----
ggplot(mapping = aes(x = date, y = area_pct, fill = drought_lvl_long)) +
# reverse order of groups so level D4 is closest to x-axis ----
geom_area(position = position_stack(reverse = TRUE)) +
# update colors to match US Drought Monitor ----
# (colors identified using ColorPick Eyedropper extension on the original USDM data viz)
scale_fill_manual(values = c("#853904", "#FF0000", "#FFC100", "#FFD965", "#FFFF00")) +
# set x-axis breaks & remove padding between data and x-axis ----
scale_x_date(breaks = scales::breaks_pretty(n = 10),
expand = c(0, 0)) +
# set y-axis breaks & remove padding between data and y-axis & convert values to percentages ----
scale_y_continuous(breaks = seq(0, 100, by = 10),
expand = c(0, 0),
labels = scales::label_percent(scale = 1)) +
# add title ----
labs(title = "Drought area in California")It’s up to you to organize your own week3-discussion.qmd file (i.e. there is no template). You may (should) discuss and work through today’s exercise with a partner (or two!).
- Begin by copying the following setup, data wrangling, and ggplot code from week 2 discussion section into your
week4-discussion.qmdfile.
- Iterate on this ggplot until it closely resembles the original USDM plot. Tip: begin with a complete theme, then use
theme()to tweak plot elements from there.
Your plot will appear slightly different in the RStudio plot window vs. a zoom window vs. rendered in a Quarto doc – I recommend checking it out in the zoom window while you iterate.
Importantly, how you theme a data visualization depends on how you plan to package / present it. For example, your plot margin sizes may differ when you prepare a standalone plot vs. one that you plan to embed in a rendered Quarto doc. We’ll explore this idea more during week 6 discussion!