Eva’s #30DayChartChallenge Gallery

#30DayChartChallenge

#30DayChartChallenge is a global, community-driven data visualization initiative taking place annually in April. Everyone is invited to create and share one chart per day for 30 days, each based on a different theme and prompt. (Official repo)

2026

repository

Comparisons

# Set-up -----------------------------------------------------------------------
# ---------------------------------------------------------------------------- #

# Load packages
library(NHANES)

library(dplyr)

library(ggplot2)
library(ggmosaic)
library(paletteer)
library(showtext)
library(ggtext)

# Access to author info
source("commons/CreateSocialCaption.R")

# Explore data and survey years
glimpse(NHANES)
unique(NHANES$SurveyYr)

target_survey <- "2011_12"

# LOAD DATA as a data frame

# It's a big dataset, so table() is your friend first
plot_dat <- data.frame(NHANES$SurveyYr, NHANES$Education, NHANES$HealthGen)
colnames(plot_dat) <- c("survey_year", "education", "health")

# Subset for selected year and keep only complete cases
plot_dat <- plot_dat |>
  filter(survey_year == target_survey)

plot_dat <- plot_dat[rowSums(is.na(plot_dat)) == 0, ]

# Make health column a factor with foxed order -> order in the plot
# Before that, control how "very good" is spelled
plot_dat$health <- case_when(
  plot_dat$health == "Vgood" ~ "Very good",
  TRUE ~ paste(plot_dat$health)
)

health_levels <- c(
  "Excellent",
  "Very good",
  "Good",
  "Fair",
  "Poor"
)

plot_dat$health <- factor(plot_dat$health, levels = rev(health_levels))


# Plot -------------------------------------------------------------------------
# ---------------------------------------------------------------------------- #

p_mosaic <- ggplot(plot_dat) +
  geom_mosaic(aes(x = product(education), fill = health)) +
  scale_fill_manual(values = rev(paletteer::paletteer_d("nord::silver_mine"))) +
  coord_cartesian(expand = F) +
  labs(
    title = paste("Education level vs self-reported health status \nacross",
                  format(nrow(plot_dat), big.mark = " "), "US respondents (Dec 2011)"),
    x = "",
    y = ""
  ) +
  theme_mosaic() +
  theme(
    plot.title = element_text(size = 14, face = "bold", lineheight = 0.35),
    axis.title = element_blank(),
    axis.text.x = element_text(
      size = 14, colour = "black", face = "bold",
      angle = 90, hjust = 1, vjust = 0.5
    ),
    axis.text.y = element_text(size = 14, colour = "black", face = "bold"),
    axis.ticks = element_blank(),
    aspect.ratio = 1,
    legend.position = "none"
  )

p_mosaic

# Include author details in the graphic
author_cap <- CreateSocialCaption()

font_add(family = "monospace", regular = "cour.ttf")

p_mosaic <- p_mosaic +
  labs(caption = paste(
    "**Data source**: <span style='font-family:monospace;'>{NHANES}</span> package ",
    "&nbsp;&nbsp;&nbsp;&nbsp; **|**",
    author_cap
  )) +
  theme(plot.caption = ggtext::element_markdown(size = 13, lineheight = 1.5, vjust = -2.5))

p_mosaic


# Export
# ---- #
ggsave(plot = p_mosaic, filename = "2026/plots/03-Mosaic.png", width = 1200, height = 1200, units = "px", scale = 0.65)

# SETUP ------------------------------------------------------------------------
# ---------------------------------------------------------------------------- #

# Preprocessed dataset - loaded as 'prepro_pop'
load("2026/data/preprocessed/SYB61_253_Population_Growth_Rates.RData")

# Packages
library(dplyr)
library(rnaturalearth)

library(ggplot2)
library(ggrepel)
library(scales)
library(cowplot)
library(gridtext)

# Author details fxn
source("commons/CreateSocialCaption.R")


# SUBSET DATA: European countries ----------------------------------------------
# ---------------------------------------------------------------------------- #

# World data will be source of all European countries
world_map <- ne_countries(scale = 50, returnclass = "sf")
glimpse(world_map)

ecountries <- world_map |>
  filter(continent == "Europe") |>
  pull(sovereignt)

acountries <- world_map |>
  filter(continent == "Africa") |>
  pull(sovereignt)

# How many are present in the population data?
length(ecountries)

length(intersect(ecountries, unique(prepro_pop$region)))
setdiff(ecountries, unique(prepro_pop$region))

# This is a manual step for as complete dataset as possible
# Kosovo is missing in population growth data
# Moldova is Republic of Moldova, Republic of Serbia is Serbia, North Macedonia is TFYR of Macedonia
# Vatican City is a capital of Holy See - since it has only a single observation, it will not be included

# Update countries vector for filtering
ecountries <- c(ecountries, "Moldova", "Republic of Serbia", "TFYR of Macedonia")

length(acountries)

length(intersect(acountries, unique(prepro_pop$region)))
setdiff(acountries, unique(prepro_pop$region))

acountries <- c(
  acountries,
  "United Rep. of Tanzania", "Eswatini", "Somalia", "Sao Tome and Principe",
  "Dem. Rep. of the Congo", "Congo"
)

# Filtering for both continents
eur_pop <- prepro_pop |>
  filter(region %in% ecountries)

n_distinct(eur_pop$region)

urban_pop <- eur_pop |>
  filter(
    pop_type == "Urban population",
    pop_unit == "percent"
  ) |>
  # keep entries with min. 2 time points
  group_by(region) |>
  mutate(n_records = n()) |>
  filter(n_records >= 2) |>
  select(-n_records)

afr_pop <- prepro_pop |>
  filter(region %in% acountries)

n_distinct(afr_pop$region)

urban_pop_a <- afr_pop |>
  filter(
    pop_type == "Urban population",
    pop_unit == "percent"
  ) |>
  # keep entries with min. 2 time points
  group_by(region) |>
  mutate(n_records = n()) |>
  filter(n_records >= 2) |>
  select(-n_records)


# PLOT changes Europe vs Africa ------------------------------------------------
# ---------------------------------------------------------------------------- #

# --- Europe ---

# countires with change of min. 10%
e_tops <- urban_pop |>
  tidyr::pivot_wider(id_cols = "region", values_from = "value", names_from = "year") |>
  mutate(growth = `2018` - `2005`) |>
  filter(growth >= 10) |>
  pull(region)

# Isolate top growers and top in 2018 into a separate data frame -> access to labels
df_e_tops <- urban_pop |>
  filter(region %in% e_tops, year == 2018)

# Continent shape
# Including some filtering and cropping to keep "known" shape
bg_eur <- world_map |>
  filter(continent == "Europe", sovereignt != "Russia") |>
  ggplot() +
  geom_sf(fill = "#fcbf49", colour = NA, alpha = 0.25) +
  # labs(caption = paste("**Data source:** unstats.un.org", "**|**", CreateSocialCaption())) +
  scale_x_continuous(limits = c(-10, 40)) +
  scale_y_continuous(limits = c(35, 80)) +
  theme_void() +
  theme(plot.caption = ggtext::element_markdown(),
        aspect.ratio = 0.75)

p_eur <- ggplot(urban_pop, aes(x = year, y = value / 100, group = region)) +
  geom_line(colour = "grey80", alpha = 0.6, linewidth = 0.35) +
  # max and min
  geom_line(data = filter(urban_pop, region == "Liechtenstein"), colour = "#003DA5", linewidth = 0.5) +
  geom_text(
    data = filter(urban_pop, region == "Liechtenstein", year == 2018),
    label = "Liechtenstein", colour = "#003DA5",
    nudge_x = 0.2, hjust = 0, size = 3
  ) +
  geom_line(data = filter(urban_pop, region == "Belgium"), colour = "#C8102E", linewidth = 0.5) +
  geom_text(
    data = filter(urban_pop, region == "Belgium", year == 2018),
    label = "Belgium", colour = "#C8102E",
    nudge_x = 0.2, hjust = 0, size = 3
  ) +
  # growth above 10%
  geom_line(data = filter(urban_pop, region %in% e_tops), colour = "black", linewidth = 0.5) +
  geom_text(
    data = filter(urban_pop, region %in% e_tops, year == 2018),
    aes(label = region), colour = "black",
    nudge_x = 0.2, hjust = 0, size = 3
  ) +
  coord_cartesian(xlim = c(2005, 2018), ylim = c(0.1, 1), expand = F, clip = "off") +
  scale_x_continuous(breaks = unique(urban_pop$year)) +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(
    x = "year",
    y = "urban population"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.margin = margin(0, 80, 0, 0),
    plot.background = element_rect(fill = NA),
    panel.background = element_rect(fill = NA),
    panel.grid = element_blank(),
    plot.caption = ggtext::element_markdown(),
    axis.title = element_text(colour = "black"),
    axis.text = element_text(colour = "black"),
    axis.line = element_line(colour = "black"),
    axis.ticks = element_line(colour = "black", linewidth = 0.5),
    axis.ticks.length = unit(-5, "pt"),
    aspect.ratio = 0.75
  )

# Align plots
aligned_plots <- align_patches(bg_eur, p_eur)

# Draw them in order and fine tune alignment
p <- cowplot::ggdraw(aligned_plots[[1]])
p_eur_assembled <- p + cowplot::draw_plot(aligned_plots[[2]], valign = 0.2, scale = 1)

p_title <- ggdraw() +
  draw_label(
    stringr::str_wrap(
      "A single European country experienced >10 percentage points increase in urban population between 2005 and 2018",
      width = 60
      ),
    fontface = 'bold',
    size = 11,
    x = 0.5
  )

# Include the title to arranged plot assemblies
p_final <- plot_grid(p_title, p_eur_assembled, ncol = 1, rel_heights = c(0.05, 0.9))
# And a caption-like data & author note
p_final_png <- ggdraw(p_final) +
  draw_grob(
    gridtext::richtext_grob(
      paste("**Data source:** unstats.un.org", "<br>", CreateSocialCaption()),
      gp = grid::gpar(fontsize = 10),
      x = 0.5,
      hjust = 0.5,
      vjust = 7, use_markdown = T
    )
  )

print(p_final_png)

# https://stackoverflow.com/questions/75020376/save-plot-exactly-as-previewed-in-the-plots-panel
# Create a temporary file
tmp <- tempfile()

# Put the current plot into the tempfile in the svg format
dev.print(svg,tmp)

# Convert the svg temp file to png and store it in a png file
rsvg::rsvg_png(tmp, "2026/plots/04-Slope.png", height = 800, width = 900)


# # --- Africa ---
#
# a_tops <- urban_pop_a |>
#   tidyr::pivot_wider(id_cols = "region", values_from = "value", names_from = "year") |>
#   mutate(growth = `2018` - `2005`) |>
#   filter(growth >= 10) |>
#   pull(region)
#
# df_a_tops <- urban_pop |>
#   filter(region %in% a_tops, year == 2018)
#
# # Continent shape
# bg_afr <- world_map |>
#   filter(continent == "Africa") |>
#   ggplot() +
#   geom_sf(fill = "grey90", colour = "grey90") +
#   theme_void()
#
# p_afr <- ggplot(data = urban_pop_a, aes(x = year, y = value / 100, group = region)) +
#   geom_line(colour = "grey80", alpha = 0.6, linewidth = 0.65) +
#   # top two in 2018
#   geom_line(data = filter(urban_pop_a, region == "Western Sahara"), colour = "#007A3D", linewidth = 0.5) +
#   geom_text(
#     data = filter(urban_pop_a, region == "Western Sahara", year == 2018),
#     label = "Western Sahara", colour = "#007A3D",
#     nudge_x = 0.2, hjust = 0
#   ) +
#   geom_line(data = filter(urban_pop_a, region == "Gabon"), colour = "#4664B2", linewidth = 0.5) +
#   geom_text(
#     data = filter(urban_pop_a, region == "Gabon", year == 2018),
#     label = "Gabon", colour = "#4664B2",
#     nudge_x = 0.2, hjust = 0
#   ) +
#   # min urban population
#   geom_line(data = filter(urban_pop_a, region == "Burundi"), colour = "#1EB53A", linewidth = 0.5) +
#   geom_text(
#     data = filter(urban_pop_a, region == "Burundi", year == 2018),
#     label = "Burudni", colour = "#1EB53A",
#     nudge_x = 0.2, hjust = 0
#   ) +
#   # growth above 10%
#   geom_line(data = filter(urban_pop_a, region %in% a_tops), colour = "black", linewidth = 0.5) +
#   coord_cartesian(xlim = c(2005, 2018), ylim = c(0.1, 1), expand = F, clip = "off") +
#   scale_x_continuous(breaks = unique(urban_pop_a$year)) +
#   scale_y_continuous(labels = scales::percent_format()) +
#   labs(
#     x = "year",
#     y = ""
#   ) +
#   theme_minimal(base_size = 14) +
#   theme(
#     plot.margin = margin(0, 50, 0, 0),
#     plot.background = element_rect(fill = NA),
#     panel.background = element_rect(fill = NA),
#     panel.grid = element_blank(),
#     axis.title = element_text(size = 14, colour = "black"),
#     axis.text = element_text(size = 14, colour = "black"),
#     axis.line = element_line(colour = "black"),
#     axis.ticks = element_line(colour = "black", linewidth = 0.5),
#     axis.ticks.length = unit(-5, "pt"),
#     aspect.ratio = 0.75
#   )
#
# # Align plots
# aligned_plots <- align_patches(bg_afr, p_afr)
#
# # Draw them in order and fine tune alignment
# p <- cowplot::ggdraw(aligned_plots[[1]])
# p_afr_assembled <- p + cowplot::draw_plot(aligned_plots[[2]], valign = 0.05, scale = 1.15)
#
#
# # Export both continents
# # -------------------- #
#
# # Prepare a joint title
# p_title <- ggdraw() +
#   draw_label(
#     "Urban population in course of 13 years in Europe and Africa",
#     fontface = 'bold',
#     x = 0,
#     hjust = 0,
#     vjust = 2
#   ) +
#   theme(
#     # title is aligned with left edge of first plot
#     plot.margin = margin(0, 0, 0, 7),
#     plot.background = element_rect(fill = "white", colour = "white"),
#     panel.background = element_rect(fill = "white", colour = "white")
#   )
#
# # Include arranged plot assemblies
# p_final <- plot_grid(p_title, plot_grid(p_eur_assembled + p_afr_assembled, align = "hv"), ncol = 1, rel_heights = c(-1, -1))
#
# ggsave(
#   plot = p_final,
#   filename = "2026/plots/04-Slope.png", width = 1200, height = 600, units = "px", scale = 2
# )

Distributions

Day 12: Data day: reporters without borders

# Set-up -----------------------------------------------------------------------
# ---------------------------------------------------------------------------- #

library(janitor)
library(dplyr)
library(tidyr)
library(countrycode)

library(ggflags)
library(ggplot2)
library(ggtext)

library(showtext)
library(glue)

source("commons/CreateSocialCaption.R")

# Get & pre-process data--------------------------------------------------------
# ---------------------------------------------------------------------------- #

rep_dat <- read.delim("2026/data/reporters_without_borders_2025.csv", sep = ";", dec = ",", header = T)
rep_dat <- janitor::clean_names(rep_dat)

score_dat <- rep_dat |> 
  filter(zone == "UE Balkans") |> 
  select(iso, country_en, score_2025, score_n_1) |> 
  pivot_longer(cols = contains("score"), names_to = "year", values_to = "score") |> 
  mutate(year = case_when(
    grepl("2025", year) ~ 2025,
    grepl("n_1", year) ~ 2024,
    TRUE ~ NA
  ))

score_dat$iso2 <- countrycode(score_dat$country_en, "country.name", "iso2c")
score_dat$iso2 <- case_when(
  score_dat$country_en == "Kosovo" ~ "XK",
  score_dat$country_en == "Northern Cyprus" ~ NA,
  TRUE ~ score_dat$iso2
  )
score_dat$iso2 <- tolower(score_dat$iso2)

# For segment lengths in the dumbbell plot
df_scores1 <- filter(score_dat, year == 2024)
df_scores2 <- filter(score_dat, year == 2025)

# Sort countries by the latest result
countries_order <- df_scores2 |> 
  arrange(desc(score)) |> 
  pull(country_en)

df_scores1 <- rep_dat |> 
  filter(zone == "UE Balkans") |> 
  select(country_en, score_evolution) |> 
  left_join(df_scores1, by = "country_en") |> 
  mutate(score_evo_dir = case_when(
    score_evolution > 0 ~ "gain",
    score_evolution < 0 ~ "loss",
    TRUE ~ NA
  ))

score_dat <- left_join(score_dat, select(df_scores1, country_en, score_evo_dir))
score_dat$country_en <- factor(score_dat$country_en, levels = rev(countries_order))
df_scores1$country_en <- factor(df_scores1$country_en, levels = rev(countries_order))

# Plot -------------------------------------------------------------------------
# ---------------------------------------------------------------------------- #

font_add_google(name = "Nunito", family = "nunito")
showtext_auto()

p_title <- "World Press Freedom Index of European countries in 2024 and 2025"

n_gain <- sum(df_scores1$score_evo_dir == "gain")
n_loss <- sum(df_scores1$score_evo_dir == "loss")

p_subtitle <- glue(
  "The purpose of the index is to compare the level of freedom enjoyed by journalists and media in individual countries.\n
  In {n_gain} countries, the index <span style='color:#a7c957;'>**improved**</span>, but {n_loss} countries experienced <span style='color:#d4a373;'>**a decline**</span>.
  \n(Index values from 2024 are shown as dots, 2025 values are represented by country flags.)"
  )

p <- ggplot() +
  geom_rect(aes(xmin = 85, xmax = 100, ymin = -Inf, ymax = Inf), fill = "grey80") +
  geom_rect(aes(xmin = 70, xmax = 85, ymin = -Inf, ymax = Inf), fill = "grey60") +
  geom_rect(aes(xmin = 55, xmax = 70, ymin = -Inf, ymax = Inf), fill = "grey40") +
  geom_rect(aes(xmin = 40, xmax = 55, ymin = -Inf, ymax = Inf), fill = "grey20") +
  geom_rect(aes(xmin = 30, xmax = 40, ymin = -Inf, ymax = Inf), fill = "grey5") +
  geom_vline(xintercept = c(85, 70, 55, 40), linetype = "dashed", colour = "grey80") +
  annotate(geom = "text", x = c(40, 55, 70, 85, 100), y = "Norway", hjust = 1.5,
           label = c("very serious", "difficult", "problematic", "satisfactory", "good"), colour = "white", fontface = "bold", size = 8.5) +
  geom_segment(data = df_scores1,
               aes(x = score, y = country_en,
                   xend = df_scores2$score, yend = df_scores2$country_en,
                   colour = score_evo_dir),
               linewidth = 3.5,
               alpha = 0.5) +
  scale_colour_manual(values = c("loss" = "#d4a373", "gain" = "#a7c957")) +
  geom_point(data = score_dat,
             aes(x = score, y = country_en, group = country_en, colour = score_evo_dir),
             size = 4) +
  geom_flag(data = filter(score_dat, year == 2025),
            aes(x = score, y = country_en, country = iso2), size = 4) +
  scale_y_discrete(sec.axis = dup_axis(name = "")) +
  labs(title = p_title,
       subtitle = p_subtitle,
       caption = paste0(
         "**Data source:** Reporters without borders",
         "**Design:**", CreateSocialCaption())) +
  scale_x_continuous(breaks = c(40, 55, 70, 85)) +
  theme_void() +
  theme(plot.margin = margin(2, 2, 2, 2),
        text = element_text(family = "nunito", color = "#4a4e4d"),
        plot.title = element_text(face = "bold", size = 32, margin = margin(0, 0, 5, 25)),
        plot.title.position = "panel",
        plot.subtitle = element_markdown(size = 26, lineheight = 0.85, margin = margin(0, 0, 5, 25)),
        plot.caption = element_markdown(size = 22, lineheight = 1),
        axis.text.x = element_text(colour = "black", size = 22, hjust = 0.5),
        axis.text.y = element_text(colour = "black", size = 22, hjust = 1),
        axis.text.y.right = element_text(colour = "black", size = 22, hjust = 0),
        aspect.ratio = 0.65,
        legend.position = "none")

print(p)

ggsave(plot = p, filename = "2026/plots/12-DataDay.png", width = 1200, height = 800, units = "px", bg = "white", scale = 2.5)

Relationships

# Set-up -----------------------------------------------------------------------
# ---------------------------------------------------------------------------- #

library(dplyr)
library(tidyr)

library(ggplot2)
library(ggblur)
library(ggtext)
library(showtext)
library(glue)
library(cowplot)

source("commons/CreateSocialCaption.R")

# Create input data ------------------------------------------------------------
# ---------------------------------------------------------------------------- #

# Suitable sources would have been secondhandsongs.org and/or musicbrainz.com
# But APIs were difficult, therefore a compilation by  ChatGPT
# which seems to kind of reflect the true state of the databases

elvis_covers <- data.frame(
  song = c(
    # Early rock & breakthrough (1950s)
    "That's All Right",
    "Blue Suede Shoes",
    "Hound Dog",
    "Heartbreak Hotel",
    "Don't Be Cruel",
    "All Shook Up",
    "Love Me Tender",
    "Jailhouse Rock",
    "Too Much",
    "Teddy Bear",
    
    # Late 1950s / early 1960s transition
    "Wear My Ring Around Your Neck",
    "It's Now or Never",
    "Are You Lonesome Tonight?",
    "Stuck on You",
    "A Big Hunk o' Love",
    
    # Early–mid 1960s film era
    "Can't Help Falling in Love",
    "Return to Sender",
    "She's Not You",
    "Viva Las Vegas",
    "Kiss Me Quick",
    
    # Late 1960s comeback era
    "Suspicious Minds",
    "In the Ghetto",
    "Kentucky Rain",
    
    # 1970s ballads & Vegas era
    "The Wonder of You",
    "Always on My Mind",
    "Burning Love",
    "An American Trilogy",
    "My Way",
    
    # Lesser-covered / deeper cuts
    "I Want You, I Need You, I Love You",
    "One Night",
    "Little Sister",
    "If I Can Dream",
    "Polk Salad Annie"
  ),
  
  release_year = c(
    1954, 1956, 1956, 1956, 1956, 1957, 1956, 1957, 1957, 1957,
    1958, 1960, 1960, 1960, 1961,
    1961, 1962, 1962, 1964, 1963,
    1969, 1969, 1969,
    1970, 1972, 1972, 1972, 1973,
    1956, 1958, 1961, 1968, 1969
  ),
  
  estimated_covers = c(
    # early rock (widely covered standards)
    120, 300, 500, 200, 250, 175, 250, 200, 140, 160,
    
    # transitional hits
    80, 200, 150, 120, 90,
    
    # film era staples
    300, 130, 110, 200, 90,
    
    # comeback era
    150, 180, 100,
    
    # 1970s
    150, 400, 220, 140, 300,
    
    # deep cuts (less covered)
    90, 110, 95, 160, 80
  ),
  
  genre = c(
    # early rock
    "Rockabilly", "Rockabilly", "Rock & Roll", "Rock & Roll", "Rock & Roll",
    "Rock & Roll", "Ballad", "Rock & Roll", "Rock & Roll", "Rock & Roll",
    
    # transition
    "Rock & Roll", "Ballad", "Ballad", "Rock & Roll", "Rock & Roll",
    
    # film era
    "Ballad", "Pop", "Pop", "Rock & Roll", "Pop",
    
    # comeback
    "Rock Ballad", "Soul", "Country Ballad",
    
    # 1970s
    "Ballad", "Ballad", "Rock", "Gospel/Orchestral", "Ballad",
    
    # deep cuts
    "Ballad", "Rock & Roll", "Rock & Roll", "Gospel", "Blues Rock"
  )
)



# Plot -------------------------------------------------------------------------
# ---------------------------------------------------------------------------- #

# Setup: custom fonts, sourced from Google
font_add_google(name = c("Lobster", "League Spartan"), family = c("lobster", "spartan"))
showtext_auto()

# Plot background: an LP in the background & title with caption

circ_sequence <- seq(1, 150, by = 5)

circles <- data.frame(
  size = circ_sequence,
  x = rep(1, length(circ_sequence)),
  y = rep(1, length(circ_sequence))
)

plot_cap <- paste0(
  "<span style='color:white;'>**Data source:** SecondHandSongs & MusicBrainz, compilation by ChatGPT</span>",
  "<br>",
  CreateSocialCaption(github_icon_color = "#F2D649", linkedin_icon_color = "#F2D649", text_color = "white")
  )

p_bg <- ggplot(data = circles, aes(x = 1, y = 1, size = size)) +
  geom_point(shape = 21, fill = NA, colour = "white") +
  scale_size_continuous(range = c(10, 100)) +
  scale_x_continuous(limits = c(-2, 5)) +
  scale_y_continuous(limits = c(-151, 152)) +
  labs(title = "Most Covered Songs of the King",
       subtitle = "Elvis Presley’s songs from the mid-1950s and early 1970s are his most covered works",
       caption = plot_cap) +
  theme_void() +
  theme(panel.background = element_rect(fill = "black"),
        plot.background = element_rect(fill = "black"),
        plot.margin = margin(10, 0, 0, 0),
        plot.title = element_text(family = "lobster", size = 30, colour = "white", hjust = 0.5),
        plot.subtitle = element_text(family = "spartan", size = 14, colour = "white",
                                     hjust = 0.5, margin = margin(10, 0, 10, 0)),
        plot.caption = element_markdown(family = "spartan", lineheight = 1.15,
                                        padding = unit(10, "pt")),
        aspect.ratio = 0.5,
        legend.position = "none")

# arrange data by year 
# > fix what row 1 references to & ultimately text placement in the plot
elvis_covers <- elvis_covers |> 
  arrange(-estimated_covers)

# colours from palette https://color.adobe.com/s-Presley-color-theme-1560137/

p_covers <- ggplot(data = elvis_covers, aes(x = release_year, y = estimated_covers)) +
  geom_point_blur(aes(size = estimated_covers), colour = "#F2D649", blur_size = 10) +
  geom_point(aes(size = estimated_covers), colour = "#F2C84B") +
  annotate(geom = "text", x = elvis_covers[1, ]$release_year, y = elvis_covers[1, ]$estimated_covers,
           label = glue("Covered {elvis_covers[1, ]$estimated_covers}x"),
           size = 3, colour = "white", family = "spartan",
           hjust = -0.5, vjust = -0.5) +
  ggrepel::geom_text_repel(data = elvis_covers[2:nrow(elvis_covers), ],
            aes(x = release_year, y = estimated_covers, label = paste0(estimated_covers, "x")),
            size = 3, colour = "white", family = "spartan", nudge_y = 1.5) +
  ggrepel::geom_text_repel(data = elvis_covers[elvis_covers$estimated_covers >= 400, ],
                           aes(x = release_year, y = estimated_covers, label = song),
                           size = 4, colour = "#F2D649", family = "spartan",
                           nudge_y = -30) +
  scale_size_continuous(range = c(1, 10)) +
  scale_x_continuous(limits = c(1950, 1980), expand = expansion(add = c(0, 0))) +
  scale_y_continuous(expand = expansion(add = c(35, 25))) +
  theme_void() +
  theme(plot.background = element_rect(fill = fill_alpha("black", 0.75)),
        axis.line = element_line(colour = "white", linewidth = 1),
        axis.text.x = element_text(family = "spartan", colour = "white", size = 14),
        axis.ticks.x = element_line(colour = "white", linewidth = 1),
        axis.ticks.length.x = unit(-10, "pt"),
        aspect.ratio = 0.65,
        legend.position = "none")

p_covers

# Data as insert
# Freeze the background as grob before to avoid unexpected shifting of spacing of elements
p_bg_fixed <- ggplotGrob(p_bg)
p_assembled <- ggdraw() + draw_grob(p_bg_fixed) +
  draw_plot(p_covers, x = -0.05, y = -0.025, scale = 0.65)

p_assembled

# Export
# ggsave(plot = p_assembled,
#        filename = "2026/plots/18-Remake.png",
#        width = 6,
#        height = 3.6,
#        dpi = 300,
#        bg = "black",
#        device = ragg::agg_png)

# Saved from Plots pane this time.

Day 18: Theme day: south china morning post

# SETUP ------------------------------------------------------------------------
# ---------------------------------------------------------------------------- #

# Packages
library(dplyr)
library(tidyr)
library(rnaturalearth)
library(countrycode)

library(ggplot2)
library(showtext)

# Author details fxn
source("commons/CreateSocialCaption.R")

rep_dat <- read.delim("2026/data/reporters_without_borders_2025.csv", sep = ";", dec = ",", header = T)
rep_dat <- janitor::clean_names(rep_dat)

score_dat <- rep_dat |> 
  filter(zone == "Asie-Pacifique") |> 
  select(iso, country_en, score_2025, score_n_1) |> 
  pivot_longer(cols = contains("score"), names_to = "year", values_to = "score") |> 
  mutate(year = case_when(
    grepl("2025", year) ~ 2025,
    grepl("n_1", year) ~ 2024,
    TRUE ~ NA
  ))

score_dat$iso2 <- countrycode(score_dat$country_en, "country.name", "iso2c")

# PLOT -------------------------------------------------------------------------
# ---------------------------------------------------------------------------- #

# South China Morning Post colours
scmp_blue <- "#001246"
scmp_yellow <- "#FFCA05"
scmp_gold <- "#c79a00"

# and fonts

sysfonts::font_add_google("Merriweather", "merriweather")
sysfonts::font_add_google("Roboto", "roboto")

# top 5% by increase
top_incs <- rep_dat |> 
  filter(zone == "Asie-Pacifique") |> 
  select(country_en, score_evolution) |> 
  distinct() |> 
  slice_max(n = 5, order_by = score_evolution) |> 
  pull(country_en)

# and top 5% by decrease
top_decs <- rep_dat |> 
  filter(zone == "Asie-Pacifique") |> 
  select(country_en, score_evolution) |> 
  distinct() |> 
  slice_min(n = 5, order_by = score_evolution) |> 
  pull(country_en)

# categorize
score_dat <- score_dat |> 
  mutate(category = case_when(
    country_en %in% top_incs ~ "top",
    country_en %in% top_decs ~ "bottom",
    TRUE ~ "others"
  ),
  # and a numerical helper for plotting
  num_category = case_when(
    country_en %in% top_incs ~ 2,
    country_en %in% top_decs ~ 2,
    TRUE ~ 1
  ))



p_subt <- glue(
  "**The purpose of the index is to compare the level of freedom <br>enjoyed by journalists and media in individual countries.** <br> The five countries with biggest year-to-year <span style=color:{scmp_yellow};>**IMPROVEMENT**</span> are problematic countries. <br> In contrary, countries with largest <span style=color:{scmp_blue};>**SCORE DECLINE**</span> are a mixture of well and poorly scoring countries."
)

p_cap <- paste0(
  "**Data source:** Reporters without borders <br>",
  "**Design:**", CreateSocialCaption(github_icon_color = scmp_gold, linkedin_icon_color = scmp_gold)
  )

p_asia <- ggplot(data = score_dat, aes(x = as.factor(year), y = score)) +
  geom_path(aes(colour = category, linewidth = num_category, group = country_en)) +
  geom_point(aes(colour = category, size = score)) +
  ggrepel::geom_text_repel(data = filter(score_dat, country_en %in% c(top_incs, top_decs), year == 2025),
            aes(label = country_en), size = 4, nudge_x = 0.35, segment.linetype = "dashed",
            family = "roboto") +
  scale_colour_manual(values = c("bottom" = scmp_blue, "top" = scmp_yellow, "others" = "grey80")) +
  scale_size_continuous(range = c(2, 6)) +
  scale_linewidth_continuous(range = c(1, 2)) +
  scale_x_discrete(expand = expansion(add = c(0.02, 0.05))) +
  labs(
    title = "World Press Freedom Index of Asian countries in 2024 vs 2025",
    subtitle = p_subt,
    caption = p_cap,
    x = " ",
    y = "score"
  ) +
  theme_void() +
  theme(
    plot.margin = margin(0, 50, 0, 0),
    plot.background = element_rect(fill = NA),
    panel.background = element_rect(fill = NA),
    panel.grid = element_blank(),
    plot.title = element_text(family = "merriweather", color = scmp_gold, margin = margin(2.5, 0, 5, 0)),
    plot.subtitle = ggtext::element_markdown(family = "roboto", lineheight = 1.15, size = 10,
                                             margin = margin(2.5, 0, 2.5, 0)),
    plot.caption = ggtext::element_markdown(family = "roboto", size = 10),
    axis.title.y = element_text(colour = "black", angle = 90, family = "roboto"),
    axis.text = element_text(colour = "black", family = "roboto"),
    axis.line.x.bottom = element_line(colour = "black"),
    axis.line.y.left  = element_line(colour = "black"),
    axis.ticks = element_line(colour = "black", linewidth = 0.5),
    axis.ticks.length = unit(5, "pt"),
    aspect.ratio = 1,
    legend.position = "none"
  )

p_asia

Uncertainties

# SETUP ------------------------------------------------------------------------
# ---------------------------------------------------------------------------- #

# Packages
library(Kendall)

library(dplyr)
library(ggplot2)
library(ggtext)
library(showtext)

# Custom author/data details function
source("commons/CreateSocialCaption.R")

# Load and explore data
data(PrecipGL)
str(PrecipGL)

# Preprocess for plotting
dat_precip <- data.frame(
  year = 1900:1986,
  "precip_in" = PrecipGL[1:length(PrecipGL)]
  )

glimpse(dat_precip)

# PLOT -------------------------------------------------------------------------
# ---------------------------------------------------------------------------- #

# Custom fonts
# fontawesome icons
font_add(family = "fa-solid", regular = "commons/fonts/Font Awesome 7 Free-Solid-900.otf")
# title and plain text
font_add_google(name = "Source Code Pro", family = "monospace")
font_add_google(name = "Winky Rough", family = "winky")
# this is so that fonts are accessible to ggplot2
showtext_auto()

# Title
p_title <- "Great Lakes Annual Precipitation (1900–1986)"
# Caption is combination of data source and author details
p_caption = paste0(
  "**Data source:**", " PrecipGL dataset from the {Kendall} R package",
  "**Visualization:**", CreateSocialCaption(github_icon_color = "#A89797FF", linkedin_icon_color = "#007FFFFF")
  )

# Actual plot is created here
ggplot(data = dat_precip, aes(x = year, y = precip_in)) +
  geom_path(linetype = "dotted", linewidth = 0.85, colour = "#007FFFFF", alpha = 0.5) +
  # display waterdrop at place of individual data points
  geom_richtext(aes(label = "<span style='font-family:fa-solid'>&#xf043;</span>", size = precip_in),
                label.colour = NA,
                label.padding = unit(0, "pt"),
                label.margin = unit(0, "pt"),
                show.legend = F,
                col = "#007FFFFF",
                fill = NA) +
  # label every fifth year - should be easier to navigate across years than usual axis breaks
  geom_text(data = filter(dat_precip, year %in% seq(1905, 1990, by = 5)) |> 
              mutate(nudge = case_when(
                precip_in <= 30 ~ -1.15,
                TRUE ~ 1.15
              )),
            aes(x = year, y = precip_in, label = year, nudge_y = nudge),
            angle = 90, size = 3.5, family = "winky", fontface = "bold", colour = "#A89797FF") +
  labs(title = p_title,
       y = "precipitation [in]",
       caption = p_caption) +
  scale_size_continuous(range = c(1.5, 5)) +
  scale_x_continuous(limits = c(1900, 1986), expand = expansion(add = c(0.5, 1))) +
  scale_y_continuous(limits = c(22, max(dat_precip$precip_in) + 2.55),
                     breaks = seq(20, max(dat_precip$precip_in), by = 5),
                     expand = F,
                     # secondary axis is to include metric units in the same plot
                     sec.axis = sec_axis(transform = ~ . * 25.4, name = "precipitation [mm]")) +
  theme_void() +
  theme(plot.margin = margin(5, 5, 5, 5),
        plot.background = element_rect(fill = alpha("#FFEFB2FF", 0.35)),
        plot.title = element_text(colour = "#A89797FF", family = "winky",
                                  size = 22, hjust = 0.5),
        plot.caption = element_markdown(family = "winky"),
        axis.title.y.left = element_text(colour = "#A89797FF", family = "winky",
                                         angle = 90, margin = margin(0, 10, 0, 0)),
        axis.title.y.right = element_text(colour = "#A89797FF", family = "winky",
                                          angle = 90, margin = margin(0, 0, 0, 10)),
        axis.line.x = element_line(colour = "#A89797FF", linewidth = 1),
        axis.line.y = element_line(colour = "#A89797FF", linewidth = 1),
        axis.ticks.y = element_line(colour = "#A89797FF", linewidth = 1),
        axis.ticks.length.y = unit(5, "pt"),
        axis.text.y = element_text(colour = "#A89797FF", family = "winky"),
        aspect.ratio = 0.5)

# Note: colour choice is based on palette "tvthemes::Arryn"

# Set-up -----------------------------------------------------------------------
# ---------------------------------------------------------------------------- #

# Load packages
library(dplyr)

library(ggplot2)
library(showtext)
library(scales)
library(ggtext)
library(ungeviz) # data source

# Access to author info
source("commons/CreateSocialCaption.R")

# Data pre-processing ----------------------------------------------------------
# ---------------------------------------------------------------------------- #
cacao <- cacao |> 
  filter_out(is.na(location)| is.na(cocoa_percent)) |> 
  # mutate(bean_origin = case_when(
  #   bean_origin == "Domincan Republic" ~ "Dominican Republic",
  #   TRUE ~ bean_origin
  # )) |> 
  group_by(location) |> 
  mutate(n_records = n()) |> 
  filter(n_records >= 5) |> 
  mutate(cocoa_percent = as.numeric(gsub("%", "", cocoa_percent)))

locations_sorted <- cacao |> 
  group_by(location) |> 
  summarize(median_prcnt = median(cocoa_percent)) |> 
  arrange(median_prcnt) |> 
  pull(location)

cacao$location <- factor(cacao$location, levels = locations_sorted)


# Plot -------------------------------------------------------------------------
# ---------------------------------------------------------------------------- #

sysfonts::font_add_google(name = "Finger Paint", family = "fpaint")
showtext_auto()

p_cap <- paste("**Visualization:**", CreateSocialCaption(github_icon_color = "white", linkedin_icon_color = "white", text_color = "white"))

ggplot(cacao, aes(x = cocoa_percent / 100, y = location)) +
  geom_boxplot(colour = "white", linewidth = 0.55, outlier.shape = NA) +
  geom_jitter(colour = "white", size = 1.05, height = 0.35) +
  labs(title = stringr::str_wrap("Cocoa content in dark chocolate bars of the world", width = 30),
       subtitle = stringr::str_wrap("Globally, the median cocoa content in dark chocolate bars is 70%. No matter manufacturer's location, however, surprises can happen - pay attention to product labeling. Data source: cacao dataset from the {ungeviz} package (originally compiled and still updated by flavorsofcacao.com)", width = 80),
       caption = p_cap,
       x = "",
       y = "") +
  scale_x_continuous(labels = scales::percent_format()) +
  scale_y_discrete(expand = expansion(add = c(1.5, 1.5))) +
  coord_cartesian(clip = "off") +
  theme_void() +
  theme(panel.background = element_rect(fill = "#664436", colour = "#664436"),
        plot.background = element_rect(fill = "#664436", colour = "#664436"),
        plot.title.position = "plot",
        plot.title = element_text(colour = "white", hjust = 0.5,
                                  family = "fpaint", size = 24),
        plot.subtitle = element_text(colour = "white", hjust = 0.5,
                                     family = "sains", size = 11),
        plot.caption = element_markdown(margin = margin(2.5, 10, 2.5, 0), size = 10, colour = "white"),
        axis.text.x = element_text(colour = "white", hjust = 0.5,
                                 family = "fpaint", size = 10),
        axis.text.y = element_text(colour = "white", hjust = 1,
                                   family = "fpaint", size = 8.5, margin = margin(0, 0, 0, 10)),
        axis.line.x = element_line(colour = "white", linewidth = 1),
        axis.ticks.x = element_line(colour = "white", linewidth = 1),
        axis.ticks.length.x = unit(5, "pt"),
        aspect.ratio = 1.55)

Resources

data from {NHANES} package for R
PrecipGL data from the {Kendall} package for R
population growth data by [UN(]https://data.un.org/) (file SYB61_253_Population_Growth_Rates_in Urban areas and Capital cities.csv)
cacao dataset from the {ungeviz} package (original source: flavorsofcacao.com[https://flavorsofcacao.com/])

2025

repository

Comparisons

library(ggplot2)
library(cowplot)

# Load preprocessed data
load("data/preprocessed_internet_users.RData")

# Users by year
p_times <- ggplot(global_internet_users, aes(x = year, y = number_of_internet_users)) +
  geom_point(aes(size = number_of_internet_users),
             shape = 21, alpha = 0.75, colour = "black", fill = "#035AA6FF") +
  scale_x_continuous(breaks = seq(min(global_internet_users$year),
                                  max(global_internet_users$year), 10)) +
  scale_y_continuous(limits = c(0, 5000000000), expand = c(0, 0),
                     breaks = seq(1e9, 5e9, 1e9),
                     labels = scales::label_number(scale = 1e-9, suffix = " B")) +
  scale_size(range = c(1, 10)) +
  theme_classic() +
  theme(panel.grid.major.x = element_line(color = "grey", size = 0.2),
        axis.text.x = element_text(face = "bold"),
        axis.title.x = element_text(face = "bold"),
        axis.title.y = element_blank(),
        aspect.ratio = 0.65,
        legend.position = "none")

# Pie chart for 2021
world_pop_2021 <- 7954448391
users_2021 <- global_internet_users[global_internet_users$year == 2021, ]$number_of_internet_users

df_global_2021 <- data.frame(
  "internet_user" = c("users", "non-users"),
  "n_people" = c(users_2021,
                world_pop_2021 - users_2021)
)

# a label
df_global_2021$frx_people <- paste0(round(df_global_2021$n_people / world_pop_2021 * 100, 0), "%")

p_2021 <- ggplot(df_global_2021, aes(x = "", y = n_people, fill = internet_user)) +
  geom_bar(stat = "identity", width = 1) +
  scale_fill_manual(values = c("#F2F2F2FF", "#035AA6FF")) +
  geom_text(aes(y = n_people, label = paste(frx_people, "\n", internet_user)),
            position = position_stack(vjust = 0.5), size = 3) +
  scale_y_continuous(labels = scales::comma) +
  coord_polar(theta = "y") +
  labs(title = "Population with Access \n in 2021") + 
  theme_void() +
  theme(plot.title = element_text(size = 8, vjust = 0.5, hjust = 0.5),
        legend.position = "none")


# combined plot
p_combined <- ggdraw(p_times) +
  draw_plot(p_2021, x = 0.01, y = 0.5, width = 0.45, height = 0.45) +
  draw_label("Global Acess to the Internet between 1990 and 2021",
             x = 0.5, y = 1, size = 11, fontface = "bold", vjust = 1)

# export
ggsave(plot = p_combined, filename = "plots/03-Circles.png")

library(dplyr)

library(ggplot2)
library(scales)
library(gridExtra)

# Load preprocessed data
load("data/preprocessed_internet_users.RData")

# color scale by continent - as in the Internet data
continent_colors <- c(c("Africa" = "#524595", "Asia" = "#0392cf",
                        "Europe" = "#e86af0", "North America" = "#ffbf00",
                        "Oceania" = "#95CA3E", "South America" = "#F26923"))

# Population data has both Americas together
continent_colors_merged <- c(c("Africa" = "#524595", "Asia" = "#0392cf",
                        "Europe" = "#e86af0", "Americas" = "#a7882e",
                        "Oceania" = "#95CA3E"))

### USERS BY THE END OF EACH DECADE
# =================================

# For number of user at a given decade, take last year of the decade
decade_data <- internet_users_by_continent |>
  # first meaningful data start in 1990s -> keep only those and later
  filter(year %in% c(1999, 2009, 2019, 2021)) |>
  group_by(year) |> 
  arrange(number_of_internet_users) |> 
  mutate(decade = floor(year / 10) * 10)


### ABSOLUTE NUMBERS
# ------------------

p_ls <- list()

for (d in sort(unique(decade_data$decade))) {
  input <- filter(decade_data, decade == d)
  
  p <- ggplot(input, aes(x = reorder(entity, number_of_internet_users), y = number_of_internet_users, group = entity)) +
    geom_col(aes(fill = entity), position = "dodge") +
    scale_fill_manual(values = continent_colors) +
    scale_y_continuous(labels = ifelse(max(input$number_of_internet_users > 1e9), label_number(scale = 1e-9, suffix = " B"),
                                  label_number(scale = 1e-6, suffix = " M")),
                                expand = c(0, 0)) +
    labs(title = paste0(d, "s")) +
    theme_classic() +
    theme(plot.title = element_text(hjust = 0.5, vjust = 1, face = "bold"),
          axis.title = element_blank(),
          axis.ticks.x = element_blank(),
          axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
          aspect.ratio = 0.65,
          legend.position = "none")
  
  p_ls[[as.character(d)]] <- p
}

# export
ggsave(plot = grid.arrange(grobs = p_ls, nrow = 1, top = "Number of Internet Users \n at the End of Each Decade and in 2021"),
      filename = "plots/05-Ranking-Absolute.png", width = 7, height = 4)

### PERCENTAGE OF POPULATION
# --------------------------

# For population, take last year of the decade
load("data/preprocessed_population.RData")

population_decades <- population_by_continent |>
  filter(year %in% c(1999, 2009, 2019, 2021)) |>
  mutate(decade = floor(year / 10) * 10)

decade_data <- decade_data |> 
  mutate(entity = case_when(
    grepl("America", entity) ~ "Americas",
    TRUE ~ entity)) |> 
  # Americas must be summer up
  group_by(entity, decade) |>
  summarise(number_of_internet_users = sum(number_of_internet_users))

decade_data_pop <- merge.data.frame(decade_data, population_decades, by = c("decade", "entity"))
# percentage of continent's population with internet users
decade_data_pop$prcnt <- decade_data_pop$number_of_internet_users / decade_data_pop$all_years

p_ls <- list()

for (d in sort(unique(decade_data_pop$decade))) {
  input <- filter(decade_data_pop, decade == d)
  
  p <- ggplot(input, aes(x = reorder(entity, prcnt), y = prcnt, group = entity)) +
    geom_col(aes(fill = entity), position = "dodge") +
    scale_fill_manual(values = continent_colors_merged) +
    geom_text(aes(x = entity, y = ifelse(prcnt > 0.05, prcnt + 0.05, 0.05), label = paste0(round(prcnt * 100, 0), "%")),
                  position = position_dodge(width = 0.5), size = 3) +
    scale_y_continuous(expand = c(0, 0), limits = c(0, max(input$prcnt) * 1.25)) +
    labs(title = paste0(d, "s")) +
    theme_classic() +
    theme(plot.title = element_text(hjust = 0.5, vjust = 1, face = "bold"),
          axis.title = element_blank(),
          axis.ticks.x = element_blank(),
          axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
          axis.ticks.y = element_blank(),
          axis.line.y = element_blank(),
          axis.text.y = element_blank(),
          aspect.ratio = 0.65,
          legend.position = "none")
  
  p_ls[[as.character(d)]] <- p
}

# export
ggsave(plot = grid.arrange(grobs = p_ls, nrow = 1, top = "Population with Internet Access \n at the End of Each Decade and in 2021"),
      filename = "plots/05-Ranking-Percentage.png", width = 7, height = 4)

Distributions

library(dplyr)
library(janitor)

library(ggplot2)
library(ggtext)
library(glue)
library(paletteer)
library(ggpubr)
library(gridExtra)

occurences <- read.delim("data/data-gov/ROMOFish_TroutData_Occurrence.csv", sep = ",", header = TRUE)
occurences <- clean_names(occurences)

### PREPROCESS DATA

# Assign year of observation
occurences$year <- case_when(
  grepl("-2021", occurences$event_id) ~ "2021",
  grepl("-2022", occurences$event_id) ~ "2022",
  TRUE ~ NA
)
# check
sum(is.na(occurences$year)) == 0

occurences[is.na(occurences$scientific_name), ] # all observation without species are in a single year but very few -> drop them
occurences <- occurences[!is.na(occurences$scientific_name), ]

### Polis scientific names
# remove the scitnitis, year part
occurences$scientific_name <- gsub("\\s*\\(.*?\\)", "", occurences$scientific_name)

# there is a type for S. fontinalis in year 2022 -> unify spelling
occurences$scientific_name[occurences$scientific_name == "Salvelinus fontilis"] <- "Salvelinus fontinalis"

### PLOTS

theme_set(
  theme_classic() +
  theme(
    text = element_text(size = 14),
    axis.text = element_text(size = 14)
  )
)

# 1. Observations by species and year
occurences_counts <- occurences |>
  group_by(scientific_name, year) |>
  summarise(n_individuals = n())

# create colour scales by species and year
col_species <- paletteer_d("Manu::Kereru")
names(col_species) <- unique(sort(occurences_counts$scientific_name))

col_years <- c("2021" = "#E35205FF", "2022" = "#5C88DAFF")

# Trout counts by species and year
p_species <- ggplot(occurences_counts, aes(x = year, y = n_individuals, colour = scientific_name)) +
  geom_jitter(aes(size = n_individuals),
    position = position_jitter(width = 0.15, seed = 123)
  ) +
  geom_line(aes(group = scientific_name, colour = scientific_name),
    position = position_jitter(width = 0.15, seed = 123)
  ) +
  scale_y_continuous(limits = c(0, 1050), expand = c(0, 0)) +
  scale_size(range = c(1, 5), guide = "none") +
  scale_colour_manual(values = col_species, name = "Species") +
  labs(
    x = "Year",
    y = "Observations"
  ) +
  guides(colour = guide_legend(ncol = 2)) +
  theme_classic() +
  theme(
    axis.title = element_text(face = "bold"),
    axis.ticks.x = element_blank(),
    legend.title = element_text(face = "bold"),
    legend.text = element_text(size = 10, face = "italic"),
    legend.position = "bottom",
    aspect.ratio = 1.5
  )

# Comparison between years makes sense only if there were spotting on both years
species_subset <- unique(occurences_counts[occurences_counts$year == "2022" & occurences_counts$n_individuals > 0, ]$scientific_name)

# A joint plot for both fish weight and length
plot_title <- glue('Weight and Length of Selected Trout Species In Years <span style = "color:{col_years["2021"]}"> 2021 </span>
  and <span style = "color:{col_years["2022"]}"> 2022 </span>')

p_title <- ggplot() +
  geom_blank() +
  labs(
    title = plot_title,
    subtitle = "(Rocky Mountain National Park, USA)"
  ) +
  theme(
    plot.background = element_rect(fill = "white"),
    plot.title = element_markdown(size = 16, face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = 14, face = "bold", hjust = 0.5),
    aspect.ratio = 0.15
  )

p_mass <- ggplot(
  filter(occurences, scientific_name %in% species_subset),
  aes(x = mass)
) +
  geom_density(aes(fill = year), alpha = 0.65) +
  facet_wrap(~scientific_name) +
  scale_fill_manual(values = col_years) +
  scale_x_continuous(expand = c(0, 0)) +
  scale_y_continuous(expand = c(0, 0)) +
  labs(
    x = "weight [g]"
  ) +
  theme_classic() +
  theme(
    panel.background = element_rect(fill = "white"),
    strip.background = element_blank(),
    strip.text = element_text(face = "bold.italic", size = 11),
    axis.title = element_text(face = "bold"),
    legend.position = "none"
  )

p_length <- ggplot(
  filter(occurences, scientific_name %in% species_subset),
  aes(x = length)
) +
  geom_density(aes(fill = year), alpha = 0.65) +
  facet_wrap(~scientific_name) +
  scale_fill_manual(values = col_years) +
  scale_x_continuous(expand = c(0, 0)) +
  scale_y_continuous(expand = c(0, 0)) +
  labs(x = "length [mm]") +
  theme_classic() +
  theme(
    panel.background = element_rect(fill = "white"),
    strip.background = element_blank(),
    strip.text = element_text(face = "bold.italic", size = 11),
    axis.title = element_text(face = "bold"),
    legend.position = "none"
  )

# combine for export
p_combined <- ggarrange(
  p_title,
  ggarrange(p_species, arrangeGrob(p_mass, p_length, ncol = 1), ncol = 2, heights = c(1, 1)),
  ncol = 1,
  heights = c(0.1, 0.9)
)

p_combined

ggsave(p_combined, filename = "plots/12-data-gov.png", width = 8, height = 7, scale = 1.15, bg = "white")

Timeseries

# Packages
library(dplyr)
library(janitor)

library(ggplot2)
library(ggrepel)
library(scales)
library(ggtext)
library(glue)
library(ggpubr)
library(gridExtra)

# Load data
all_data <- read.delim("data/urban-share-european-commission/urban-share-european-commission.csv", sep = ",", header = T)
all_data <- clean_names(all_data)
# make percentages fractions between 0 and 1 -> nice axis labels with {scales} package
all_data$share_of_population_living_in_urban_areas <- all_data$share_of_population_living_in_urban_areas / 100

# Keep only years with data
past_data <- all_data[all_data$year < 2025, ]

# Explore data
summary(past_data)

sort(unique(past_data$entity))

### PLOTS
continents <- c("Africa", "Asia", "Europe", "North America", "South America", "Oceania")

continent_colors <- c(c("Africa" = "#524595", "Asia" = "#0392cf",
                        "Europe" = "#e86af0", "North America" = "#ffbf00",
                        "Oceania" = "#95CA3E", "South America" = "#F26923"))

# World - continents
global_trend <- past_data |> 
  filter(entity %in% continents) |> 
  group_by(year) |> 
  summarise(global_urban = mean(share_of_population_living_in_urban_areas, na.rm = TRUE))
# for plot label later on
max_global <- max(global_trend$global_urban)

# Plot
p_world <- ggplot() +
  # individual continents
  geom_line(data = filter(past_data, entity %in% continents),
            aes(x = year, y = share_of_population_living_in_urban_areas, colour = entity), linewidth = 1) +
  scale_colour_manual(values = continent_colors) +
    # global trend line (smooth)
  geom_smooth(data = global_trend, aes(x = year, y = global_urban),
              color = "grey30", se = F, linewidth = 1.2) +
  # annotate - global
  geom_text_repel(data = global_trend[nrow(global_trend), ], aes(x = 2020, y = global_urban, label = "World"),
                  color = "grey30", size = 4, nudge_x = 8, nudge_y = 0.005,
                  xlim = c(2021, NA), segment.size = 0.75, segment.linetype = "dotted", fontface = "bold") +
  # annotate - continents
  geom_text_repel(data = filter(past_data, entity %in% continents, year == 2020),
                aes(x = 2020, y = share_of_population_living_in_urban_areas, label = entity,
                color = entity), size = 4, nudge_x = 10, nudge_y = -0.01,
                xlim = c(2024, NA), segment.size = 0.75, segment.linetype = "dotted",
                segment.ncp = 4,
                box.padding = 0.4,
                segment.curvature = -0.2,
                segment.angle = 45) +
  scale_x_continuous(expand = c(0, 0), limits = c(1975, 2040), breaks = seq(1970, 2020, 10)) +
  scale_y_continuous(labels = scales::label_percent(), expand = c(0, 0),
                    limits = c(0.55, 0.85), breaks = seq(0.55, 0.85, 0.05)) +
  labs(title = "Urbanization development across continents \nand the global trend",
       x = "Year",
       y = "Population in urban areas") +
  theme_classic() +
  theme(plot.title = element_text(size = 12, face = "bold", hjust = 0),
        legend.position = "none",
        aspect.ratio = 0.65)

# Identify least and fastest growing countries
# 1. Calculate slope for each entity
# Do not inlcude world and per-contine tdata
slopes <- past_data |> 
  filter(entity != "World", !entity %in% continents) |>
  group_by(entity) |> 
  summarise(slope = coef(lm(share_of_population_living_in_urban_areas ~ year))[2])

# 2. Identify least and fastest growing countries
fastest_growing <- slopes |> 
  top_n(3, slope) |>
  pull(entity)

# sorting will ensure consistenty of order/colours in title and geom_line()
fastest_growing <- sort(fastest_growing)

least_growing <- slopes |> 
  top_n(3, -slope) |>
  pull(entity)
least_growing <- sort(least_growing)

col_fastest <- c("#E07529FF", "#FAAE32FF", "#7F7991FF")
names(col_fastest) <- fastest_growing

fastest_title <- glue("<span style = 'color: {col_fastest[1]}'> Cayman Islands </span>,
  <span style = 'color: {col_fastest[2]}'> Saint Barthelemy </span> and <br>
  <span style = 'color: {col_fastest[3]}'> Turks and Caicos Islands </span> <br>
  have seen the fastest growth in urbanization")

p_growth <- ggplot() +
  geom_line(data = filter(past_data, entity %in% fastest_growing),
            aes(x = year, y = share_of_population_living_in_urban_areas, colour = entity), linewidth = 1.5) +
  scale_colour_manual(values = col_fastest) +
  scale_x_continuous(expand = c(0, 0), limits = c(1975, 2025), breaks = seq(1970, 2020, 10)) +
  scale_y_continuous(labels = scales::label_percent(), limits = c(0, 1),  expand = c(0, 0)) +
  labs(title = fastest_title,
    x = "Year",
    y = "Population in urban areas") +
  theme_classic() +
  theme(plot.title = element_markdown(size = 12, hjust = 0, face = "bold"),
        legend.position = "none",
        aspect.ratio = 0.65) +
    # add explanatory text
    geom_curve(aes(x = 2006, y = 0.75, xend = 2001, yend = 0.8),
              arrow = arrow(length = unit(0.2, "cm"), type = "open"), curvature = 0.3) +
    geom_curve(aes(x = 2006, y = 0.7, xend = 2003, yend = 0.3),
              arrow = arrow(length = unit(0.2, "cm"), type = "open"), curvature = 0.3) +
    annotate("text", x = 2010, y = 0.73, label = "boom of tourism")
  

col_least <- c("#A84A00FF", "#5D4F36FF", "#B39085FF")
names(col_least) <- least_growing

slowest_title <- glue("<span style = 'color: {col_least[1]}'> Montserrat </span>,
  <span style = 'color: {col_least[2]}'> Palau </span> and <br>
  <span style = 'color: {col_least[3]}'> Saint Pierre and Miquelon </span> <br>
  have seen marked drops in urbanization")

p_decline <- ggplot() +
  geom_line(data = filter(past_data, entity %in% least_growing),
            aes(x = year, y = share_of_population_living_in_urban_areas, color = entity), linewidth = 1.5) +
  scale_colour_manual(values = col_least) +
  scale_x_continuous(expand = c(0, 0), limits = c(1975, 2025), breaks = seq(1970, 2020, 10)) +
  scale_y_continuous(labels = scales::label_percent(), limits = c(0, 1),  expand = c(0, 0)) +
  labs(title = slowest_title,
    x = "Year",
    y = "Population in urban areas") +
  theme_classic() +
  theme(plot.title = element_markdown(size = 12, hjust = 0, face = "bold"),
        legend.position = "none",
        aspect.ratio = 0.65) +
  # add explanatory text
  geom_curve(aes(x = 2001, y = 0.9, xend = 1997, yend = 0.85),
            arrow = arrow(length = unit(0.2, "cm"), type = "open"), curvature = 0.3) +
  annotate("text", x = 1999, y = 0.78, colour = "#A84A00FF",
          label = "1997: Soufrière Hills volcano \n eruption destroyed \n the capital city Plymouth",
          hjust = 0)
  
# combine for export
p_combined <- ggarrange(
  ggarrange(p_world, arrangeGrob(p_growth, p_decline, ncol = 2), nrow = 2, widths = c(1, 1)),
  ncol = 2,
  widths = c(10, 2),
  heights = c(1, 2)
)

p_combined

ggsave(p_combined, filename = "plots/20-Urbanization.png",
       width = 8, height = 6, scale = 1.5, bg = "white")