##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## setup ----
##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#..........................load packages.........................
library(tidyverse)
#..........................import data...........................
drought <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2021/2021-07-20/drought.csv')
##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## wrangle drought data ----
##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drought_clean <- drought |>
# select cols of interest & update names for clarity (as needed) ----
select(date = valid_start, state_abb, drought_lvl, area_pct) |>
# add year, month & day cols using {lubridate} fxns ----
# NOTE: this step isn't necessary for our plot, but I'm including as examples of how to extract different date elements from a object of class Date using {lubridate} ----
mutate(year = year(date),
month = month(date, label = TRUE, abbr = TRUE),
day = day(date)) |>
# add drought level conditions names ----
mutate(drought_lvl_long = factor(drought_lvl,
levels = c("D4", "D3", "D2", "D1","D0", "None"),
labels = c("(D4) Exceptional", "(D3) Extreme",
"(D2) Severe", "(D1) Moderate",
"(D0) Abnormally Dry",
"No Drought"))) |>
# reorder cols ----
relocate(date, year, month, day, state_abb, drought_lvl, drought_lvl_long, area_pct)
##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## create stacked area plot of CA drought conditions through time ----
##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drought_clean |>
# remove drought_lvl "None" & filter for just CA ----
filter(drought_lvl != "None",
state_abb == "CA") |>
# initialize ggplot ----
ggplot(mapping = aes(x = date, y = area_pct, fill = drought_lvl_long)) +
# reverse order of groups so level D4 is closest to x-axis ----
geom_area(position = position_stack(reverse = TRUE)) +
# update colors to match US Drought Monitor ----
# (colors identified using ColorPick Eyedropper extension on the original USDM data viz)
scale_fill_manual(values = c("#853904", "#FF0000", "#FFC100", "#FFD965", "#FFFF00")) +
# set x-axis breaks & remove padding between data and x-axis ----
scale_x_date(breaks = scales::breaks_pretty(n = 10),
expand = c(0, 0)) +
# set y-axis breaks & remove padding between data and y-axis & convert values to percentages ----
scale_y_continuous(breaks = seq(0, 100, by = 10),
expand = c(0, 0),
labels = scales::label_percent(scale = 1)) +
# add title ----
labs(title = "Drought area in California")
Note
It’s up to you to organize your own week3-discussion.qmd
file (i.e. there is no template). You may (should) discuss and work through today’s exercise with a partner (or two!).
- Begin by copying the following setup, data wrangling, and ggplot code from week 2 discussion section into your
week4-discussion.qmd
file.
- Iterate on this ggplot until it closely resembles the original USDM plot. Tip: begin with a complete theme, then use
theme()
to tweak plot elements from there.
Use the Zoom button to check out your plot as you iterate
Your plot will appear slightly different in the RStudio plot window vs. a zoom window vs. rendered in a Quarto doc – I recommend checking it out in the zoom window while you iterate.
Importantly, how you theme a data visualization depends on how you plan to package / present it. For example, your plot margin sizes may differ when you prepare a standalone plot vs. one that you plan to embed in a rendered Quarto doc. We’ll explore this idea more during week 6 discussion!