Eva’s #30DayChartChallenge Gallery
Note#30DayChartChallenge
#30DayChartChallenge is a global, community-driven data visualization initiative taking place annually in April. Everyone is invited to create and share one chart per day for 30 days, each based on a different theme and prompt. (Official repo)
2026
Comparisons
# Set-up -----------------------------------------------------------------------
# ---------------------------------------------------------------------------- #
# Load packages
library(NHANES)
library(dplyr)
library(ggplot2)
library(ggmosaic)
library(paletteer)
library(showtext)
library(ggtext)
# Access to author info
source("commons/CreateSocialCaption.R")
# Explore data and survey years
glimpse(NHANES)
unique(NHANES$SurveyYr)
target_survey <- "2011_12"
# LOAD DATA as a data frame
# It's a big dataset, so table() is your friend first
plot_dat <- data.frame(NHANES$SurveyYr, NHANES$Education, NHANES$HealthGen)
colnames(plot_dat) <- c("survey_year", "education", "health")
# Subset for selected year and keep only complete cases
plot_dat <- plot_dat |>
filter(survey_year == target_survey)
plot_dat <- plot_dat[rowSums(is.na(plot_dat)) == 0, ]
# Make health column a factor with foxed order -> order in the plot
# Before that, control how "very good" is spelled
plot_dat$health <- case_when(
plot_dat$health == "Vgood" ~ "Very good",
TRUE ~ paste(plot_dat$health)
)
health_levels <- c(
"Excellent",
"Very good",
"Good",
"Fair",
"Poor"
)
plot_dat$health <- factor(plot_dat$health, levels = rev(health_levels))
# Plot -------------------------------------------------------------------------
# ---------------------------------------------------------------------------- #
p_mosaic <- ggplot(plot_dat) +
geom_mosaic(aes(x = product(education), fill = health)) +
scale_fill_manual(values = rev(paletteer::paletteer_d("nord::silver_mine"))) +
coord_cartesian(expand = F) +
labs(
title = paste("Education level vs self-reported health status \nacross",
format(nrow(plot_dat), big.mark = " "), "US respondents (Dec 2011)"),
x = "",
y = ""
) +
theme_mosaic() +
theme(
plot.title = element_text(size = 14, face = "bold", lineheight = 0.35),
axis.title = element_blank(),
axis.text.x = element_text(
size = 14, colour = "black", face = "bold",
angle = 90, hjust = 1, vjust = 0.5
),
axis.text.y = element_text(size = 14, colour = "black", face = "bold"),
axis.ticks = element_blank(),
aspect.ratio = 1,
legend.position = "none"
)
p_mosaic
# Include author details in the graphic
author_cap <- CreateSocialCaption()
font_add(family = "monospace", regular = "cour.ttf")
p_mosaic <- p_mosaic +
labs(caption = paste(
"**Data source**: <span style='font-family:monospace;'>{NHANES}</span> package ",
" **|**",
author_cap
)) +
theme(plot.caption = ggtext::element_markdown(size = 13, lineheight = 1.5, vjust = -2.5))
p_mosaic
# Export
# ---- #
ggsave(plot = p_mosaic, filename = "2026/plots/03-Mosaic.png", width = 1200, height = 1200, units = "px", scale = 0.65)# SETUP ------------------------------------------------------------------------
# ---------------------------------------------------------------------------- #
# Preprocessed dataset - loaded as 'prepro_pop'
load("2026/data/preprocessed/SYB61_253_Population_Growth_Rates.RData")
# Packages
library(dplyr)
library(rnaturalearth)
library(ggplot2)
library(ggrepel)
library(scales)
library(cowplot)
library(gridtext)
# Author details fxn
source("commons/CreateSocialCaption.R")
# SUBSET DATA: European countries ----------------------------------------------
# ---------------------------------------------------------------------------- #
# World data will be source of all European countries
world_map <- ne_countries(scale = 50, returnclass = "sf")
glimpse(world_map)
ecountries <- world_map |>
filter(continent == "Europe") |>
pull(sovereignt)
acountries <- world_map |>
filter(continent == "Africa") |>
pull(sovereignt)
# How many are present in the population data?
length(ecountries)
length(intersect(ecountries, unique(prepro_pop$region)))
setdiff(ecountries, unique(prepro_pop$region))
# This is a manual step for as complete dataset as possible
# Kosovo is missing in population growth data
# Moldova is Republic of Moldova, Republic of Serbia is Serbia, North Macedonia is TFYR of Macedonia
# Vatican City is a capital of Holy See - since it has only a single observation, it will not be included
# Update countries vector for filtering
ecountries <- c(ecountries, "Moldova", "Republic of Serbia", "TFYR of Macedonia")
length(acountries)
length(intersect(acountries, unique(prepro_pop$region)))
setdiff(acountries, unique(prepro_pop$region))
acountries <- c(
acountries,
"United Rep. of Tanzania", "Eswatini", "Somalia", "Sao Tome and Principe",
"Dem. Rep. of the Congo", "Congo"
)
# Filtering for both continents
eur_pop <- prepro_pop |>
filter(region %in% ecountries)
n_distinct(eur_pop$region)
urban_pop <- eur_pop |>
filter(
pop_type == "Urban population",
pop_unit == "percent"
) |>
# keep entries with min. 2 time points
group_by(region) |>
mutate(n_records = n()) |>
filter(n_records >= 2) |>
select(-n_records)
afr_pop <- prepro_pop |>
filter(region %in% acountries)
n_distinct(afr_pop$region)
urban_pop_a <- afr_pop |>
filter(
pop_type == "Urban population",
pop_unit == "percent"
) |>
# keep entries with min. 2 time points
group_by(region) |>
mutate(n_records = n()) |>
filter(n_records >= 2) |>
select(-n_records)
# PLOT changes Europe vs Africa ------------------------------------------------
# ---------------------------------------------------------------------------- #
# --- Europe ---
# countires with change of min. 10%
e_tops <- urban_pop |>
tidyr::pivot_wider(id_cols = "region", values_from = "value", names_from = "year") |>
mutate(growth = `2018` - `2005`) |>
filter(growth >= 10) |>
pull(region)
# Isolate top growers and top in 2018 into a separate data frame -> access to labels
df_e_tops <- urban_pop |>
filter(region %in% e_tops, year == 2018)
# Continent shape
# Including some filtering and cropping to keep "known" shape
bg_eur <- world_map |>
filter(continent == "Europe", sovereignt != "Russia") |>
ggplot() +
geom_sf(fill = "#fcbf49", colour = NA, alpha = 0.25) +
# labs(caption = paste("**Data source:** unstats.un.org", "**|**", CreateSocialCaption())) +
scale_x_continuous(limits = c(-10, 40)) +
scale_y_continuous(limits = c(35, 80)) +
theme_void() +
theme(plot.caption = ggtext::element_markdown(),
aspect.ratio = 0.75)
p_eur <- ggplot(urban_pop, aes(x = year, y = value / 100, group = region)) +
geom_line(colour = "grey80", alpha = 0.6, linewidth = 0.35) +
# max and min
geom_line(data = filter(urban_pop, region == "Liechtenstein"), colour = "#003DA5", linewidth = 0.5) +
geom_text(
data = filter(urban_pop, region == "Liechtenstein", year == 2018),
label = "Liechtenstein", colour = "#003DA5",
nudge_x = 0.2, hjust = 0, size = 3
) +
geom_line(data = filter(urban_pop, region == "Belgium"), colour = "#C8102E", linewidth = 0.5) +
geom_text(
data = filter(urban_pop, region == "Belgium", year == 2018),
label = "Belgium", colour = "#C8102E",
nudge_x = 0.2, hjust = 0, size = 3
) +
# growth above 10%
geom_line(data = filter(urban_pop, region %in% e_tops), colour = "black", linewidth = 0.5) +
geom_text(
data = filter(urban_pop, region %in% e_tops, year == 2018),
aes(label = region), colour = "black",
nudge_x = 0.2, hjust = 0, size = 3
) +
coord_cartesian(xlim = c(2005, 2018), ylim = c(0.1, 1), expand = F, clip = "off") +
scale_x_continuous(breaks = unique(urban_pop$year)) +
scale_y_continuous(labels = scales::percent_format()) +
labs(
x = "year",
y = "urban population"
) +
theme_minimal(base_size = 13) +
theme(
plot.margin = margin(0, 80, 0, 0),
plot.background = element_rect(fill = NA),
panel.background = element_rect(fill = NA),
panel.grid = element_blank(),
plot.caption = ggtext::element_markdown(),
axis.title = element_text(colour = "black"),
axis.text = element_text(colour = "black"),
axis.line = element_line(colour = "black"),
axis.ticks = element_line(colour = "black", linewidth = 0.5),
axis.ticks.length = unit(-5, "pt"),
aspect.ratio = 0.75
)
# Align plots
aligned_plots <- align_patches(bg_eur, p_eur)
# Draw them in order and fine tune alignment
p <- cowplot::ggdraw(aligned_plots[[1]])
p_eur_assembled <- p + cowplot::draw_plot(aligned_plots[[2]], valign = 0.2, scale = 1)
p_title <- ggdraw() +
draw_label(
stringr::str_wrap(
"A single European country experienced >10 percentage points increase in urban population between 2005 and 2018",
width = 60
),
fontface = 'bold',
size = 11,
x = 0.5
)
# Include the title to arranged plot assemblies
p_final <- plot_grid(p_title, p_eur_assembled, ncol = 1, rel_heights = c(0.05, 0.9))
# And a caption-like data & author note
p_final_png <- ggdraw(p_final) +
draw_grob(
gridtext::richtext_grob(
paste("**Data source:** unstats.un.org", "<br>", CreateSocialCaption()),
gp = grid::gpar(fontsize = 10),
x = 0.5,
hjust = 0.5,
vjust = 7, use_markdown = T
)
)
print(p_final_png)
# https://stackoverflow.com/questions/75020376/save-plot-exactly-as-previewed-in-the-plots-panel
# Create a temporary file
tmp <- tempfile()
# Put the current plot into the tempfile in the svg format
dev.print(svg,tmp)
# Convert the svg temp file to png and store it in a png file
rsvg::rsvg_png(tmp, "2026/plots/04-Slope.png", height = 800, width = 900)
# # --- Africa ---
#
# a_tops <- urban_pop_a |>
# tidyr::pivot_wider(id_cols = "region", values_from = "value", names_from = "year") |>
# mutate(growth = `2018` - `2005`) |>
# filter(growth >= 10) |>
# pull(region)
#
# df_a_tops <- urban_pop |>
# filter(region %in% a_tops, year == 2018)
#
# # Continent shape
# bg_afr <- world_map |>
# filter(continent == "Africa") |>
# ggplot() +
# geom_sf(fill = "grey90", colour = "grey90") +
# theme_void()
#
# p_afr <- ggplot(data = urban_pop_a, aes(x = year, y = value / 100, group = region)) +
# geom_line(colour = "grey80", alpha = 0.6, linewidth = 0.65) +
# # top two in 2018
# geom_line(data = filter(urban_pop_a, region == "Western Sahara"), colour = "#007A3D", linewidth = 0.5) +
# geom_text(
# data = filter(urban_pop_a, region == "Western Sahara", year == 2018),
# label = "Western Sahara", colour = "#007A3D",
# nudge_x = 0.2, hjust = 0
# ) +
# geom_line(data = filter(urban_pop_a, region == "Gabon"), colour = "#4664B2", linewidth = 0.5) +
# geom_text(
# data = filter(urban_pop_a, region == "Gabon", year == 2018),
# label = "Gabon", colour = "#4664B2",
# nudge_x = 0.2, hjust = 0
# ) +
# # min urban population
# geom_line(data = filter(urban_pop_a, region == "Burundi"), colour = "#1EB53A", linewidth = 0.5) +
# geom_text(
# data = filter(urban_pop_a, region == "Burundi", year == 2018),
# label = "Burudni", colour = "#1EB53A",
# nudge_x = 0.2, hjust = 0
# ) +
# # growth above 10%
# geom_line(data = filter(urban_pop_a, region %in% a_tops), colour = "black", linewidth = 0.5) +
# coord_cartesian(xlim = c(2005, 2018), ylim = c(0.1, 1), expand = F, clip = "off") +
# scale_x_continuous(breaks = unique(urban_pop_a$year)) +
# scale_y_continuous(labels = scales::percent_format()) +
# labs(
# x = "year",
# y = ""
# ) +
# theme_minimal(base_size = 14) +
# theme(
# plot.margin = margin(0, 50, 0, 0),
# plot.background = element_rect(fill = NA),
# panel.background = element_rect(fill = NA),
# panel.grid = element_blank(),
# axis.title = element_text(size = 14, colour = "black"),
# axis.text = element_text(size = 14, colour = "black"),
# axis.line = element_line(colour = "black"),
# axis.ticks = element_line(colour = "black", linewidth = 0.5),
# axis.ticks.length = unit(-5, "pt"),
# aspect.ratio = 0.75
# )
#
# # Align plots
# aligned_plots <- align_patches(bg_afr, p_afr)
#
# # Draw them in order and fine tune alignment
# p <- cowplot::ggdraw(aligned_plots[[1]])
# p_afr_assembled <- p + cowplot::draw_plot(aligned_plots[[2]], valign = 0.05, scale = 1.15)
#
#
# # Export both continents
# # -------------------- #
#
# # Prepare a joint title
# p_title <- ggdraw() +
# draw_label(
# "Urban population in course of 13 years in Europe and Africa",
# fontface = 'bold',
# x = 0,
# hjust = 0,
# vjust = 2
# ) +
# theme(
# # title is aligned with left edge of first plot
# plot.margin = margin(0, 0, 0, 7),
# plot.background = element_rect(fill = "white", colour = "white"),
# panel.background = element_rect(fill = "white", colour = "white")
# )
#
# # Include arranged plot assemblies
# p_final <- plot_grid(p_title, plot_grid(p_eur_assembled + p_afr_assembled, align = "hv"), ncol = 1, rel_heights = c(-1, -1))
#
# ggsave(
# plot = p_final,
# filename = "2026/plots/04-Slope.png", width = 1200, height = 600, units = "px", scale = 2
# )Distributions
# Set-up -----------------------------------------------------------------------
# ---------------------------------------------------------------------------- #
library(janitor)
library(dplyr)
library(tidyr)
library(countrycode)
library(ggflags)
library(ggplot2)
library(ggtext)
library(showtext)
library(glue)
source("commons/CreateSocialCaption.R")
# Get & pre-process data--------------------------------------------------------
# ---------------------------------------------------------------------------- #
rep_dat <- read.delim("2026/data/reporters_without_borders_2025.csv", sep = ";", dec = ",", header = T)
rep_dat <- janitor::clean_names(rep_dat)
score_dat <- rep_dat |>
filter(zone == "UE Balkans") |>
select(iso, country_en, score_2025, score_n_1) |>
pivot_longer(cols = contains("score"), names_to = "year", values_to = "score") |>
mutate(year = case_when(
grepl("2025", year) ~ 2025,
grepl("n_1", year) ~ 2024,
TRUE ~ NA
))
score_dat$iso2 <- countrycode(score_dat$country_en, "country.name", "iso2c")
score_dat$iso2 <- case_when(
score_dat$country_en == "Kosovo" ~ "XK",
score_dat$country_en == "Northern Cyprus" ~ NA,
TRUE ~ score_dat$iso2
)
score_dat$iso2 <- tolower(score_dat$iso2)
# For segment lengths in the dumbbell plot
df_scores1 <- filter(score_dat, year == 2024)
df_scores2 <- filter(score_dat, year == 2025)
# Sort countries by the latest result
countries_order <- df_scores2 |>
arrange(desc(score)) |>
pull(country_en)
df_scores1 <- rep_dat |>
filter(zone == "UE Balkans") |>
select(country_en, score_evolution) |>
left_join(df_scores1, by = "country_en") |>
mutate(score_evo_dir = case_when(
score_evolution > 0 ~ "gain",
score_evolution < 0 ~ "loss",
TRUE ~ NA
))
score_dat <- left_join(score_dat, select(df_scores1, country_en, score_evo_dir))
score_dat$country_en <- factor(score_dat$country_en, levels = rev(countries_order))
df_scores1$country_en <- factor(df_scores1$country_en, levels = rev(countries_order))
# Plot -------------------------------------------------------------------------
# ---------------------------------------------------------------------------- #
font_add_google(name = "Nunito", family = "nunito")
showtext_auto()
p_title <- "World Press Freedom Index of European countries in 2024 and 2025"
n_gain <- sum(df_scores1$score_evo_dir == "gain")
n_loss <- sum(df_scores1$score_evo_dir == "loss")
p_subtitle <- glue(
"The purpose of the index is to compare the level of freedom enjoyed by journalists and media in individual countries.\n
In {n_gain} countries, the index <span style='color:#a7c957;'>**improved**</span>, but {n_loss} countries experienced <span style='color:#d4a373;'>**a decline**</span>.
\n(Index values from 2024 are shown as dots, 2025 values are represented by country flags.)"
)
p <- ggplot() +
geom_rect(aes(xmin = 85, xmax = 100, ymin = -Inf, ymax = Inf), fill = "grey80") +
geom_rect(aes(xmin = 70, xmax = 85, ymin = -Inf, ymax = Inf), fill = "grey60") +
geom_rect(aes(xmin = 55, xmax = 70, ymin = -Inf, ymax = Inf), fill = "grey40") +
geom_rect(aes(xmin = 40, xmax = 55, ymin = -Inf, ymax = Inf), fill = "grey20") +
geom_rect(aes(xmin = 30, xmax = 40, ymin = -Inf, ymax = Inf), fill = "grey5") +
geom_vline(xintercept = c(85, 70, 55, 40), linetype = "dashed", colour = "grey80") +
annotate(geom = "text", x = c(40, 55, 70, 85, 100), y = "Norway", hjust = 1.5,
label = c("very serious", "difficult", "problematic", "satisfactory", "good"), colour = "white", fontface = "bold", size = 8.5) +
geom_segment(data = df_scores1,
aes(x = score, y = country_en,
xend = df_scores2$score, yend = df_scores2$country_en,
colour = score_evo_dir),
linewidth = 3.5,
alpha = 0.5) +
scale_colour_manual(values = c("loss" = "#d4a373", "gain" = "#a7c957")) +
geom_point(data = score_dat,
aes(x = score, y = country_en, group = country_en, colour = score_evo_dir),
size = 4) +
geom_flag(data = filter(score_dat, year == 2025),
aes(x = score, y = country_en, country = iso2), size = 4) +
scale_y_discrete(sec.axis = dup_axis(name = "")) +
labs(title = p_title,
subtitle = p_subtitle,
caption = paste0(
"**Data source:** Reporters without borders",
"**Design:**", CreateSocialCaption())) +
scale_x_continuous(breaks = c(40, 55, 70, 85)) +
theme_void() +
theme(plot.margin = margin(2, 2, 2, 2),
text = element_text(family = "nunito", color = "#4a4e4d"),
plot.title = element_text(face = "bold", size = 32, margin = margin(0, 0, 5, 25)),
plot.title.position = "panel",
plot.subtitle = element_markdown(size = 26, lineheight = 0.85, margin = margin(0, 0, 5, 25)),
plot.caption = element_markdown(size = 22, lineheight = 1),
axis.text.x = element_text(colour = "black", size = 22, hjust = 0.5),
axis.text.y = element_text(colour = "black", size = 22, hjust = 1),
axis.text.y.right = element_text(colour = "black", size = 22, hjust = 0),
aspect.ratio = 0.65,
legend.position = "none")
print(p)
ggsave(plot = p, filename = "2026/plots/12-DataDay.png", width = 1200, height = 800, units = "px", bg = "white", scale = 2.5)Relationships
# Set-up -----------------------------------------------------------------------
# ---------------------------------------------------------------------------- #
library(dplyr)
library(tidyr)
library(ggplot2)
library(ggblur)
library(ggtext)
library(showtext)
library(glue)
library(cowplot)
source("commons/CreateSocialCaption.R")
# Create input data ------------------------------------------------------------
# ---------------------------------------------------------------------------- #
# Suitable sources would have been secondhandsongs.org and/or musicbrainz.com
# But APIs were difficult, therefore a compilation by ChatGPT
# which seems to kind of reflect the true state of the databases
elvis_covers <- data.frame(
song = c(
# Early rock & breakthrough (1950s)
"That's All Right",
"Blue Suede Shoes",
"Hound Dog",
"Heartbreak Hotel",
"Don't Be Cruel",
"All Shook Up",
"Love Me Tender",
"Jailhouse Rock",
"Too Much",
"Teddy Bear",
# Late 1950s / early 1960s transition
"Wear My Ring Around Your Neck",
"It's Now or Never",
"Are You Lonesome Tonight?",
"Stuck on You",
"A Big Hunk o' Love",
# Early–mid 1960s film era
"Can't Help Falling in Love",
"Return to Sender",
"She's Not You",
"Viva Las Vegas",
"Kiss Me Quick",
# Late 1960s comeback era
"Suspicious Minds",
"In the Ghetto",
"Kentucky Rain",
# 1970s ballads & Vegas era
"The Wonder of You",
"Always on My Mind",
"Burning Love",
"An American Trilogy",
"My Way",
# Lesser-covered / deeper cuts
"I Want You, I Need You, I Love You",
"One Night",
"Little Sister",
"If I Can Dream",
"Polk Salad Annie"
),
release_year = c(
1954, 1956, 1956, 1956, 1956, 1957, 1956, 1957, 1957, 1957,
1958, 1960, 1960, 1960, 1961,
1961, 1962, 1962, 1964, 1963,
1969, 1969, 1969,
1970, 1972, 1972, 1972, 1973,
1956, 1958, 1961, 1968, 1969
),
estimated_covers = c(
# early rock (widely covered standards)
120, 300, 500, 200, 250, 175, 250, 200, 140, 160,
# transitional hits
80, 200, 150, 120, 90,
# film era staples
300, 130, 110, 200, 90,
# comeback era
150, 180, 100,
# 1970s
150, 400, 220, 140, 300,
# deep cuts (less covered)
90, 110, 95, 160, 80
),
genre = c(
# early rock
"Rockabilly", "Rockabilly", "Rock & Roll", "Rock & Roll", "Rock & Roll",
"Rock & Roll", "Ballad", "Rock & Roll", "Rock & Roll", "Rock & Roll",
# transition
"Rock & Roll", "Ballad", "Ballad", "Rock & Roll", "Rock & Roll",
# film era
"Ballad", "Pop", "Pop", "Rock & Roll", "Pop",
# comeback
"Rock Ballad", "Soul", "Country Ballad",
# 1970s
"Ballad", "Ballad", "Rock", "Gospel/Orchestral", "Ballad",
# deep cuts
"Ballad", "Rock & Roll", "Rock & Roll", "Gospel", "Blues Rock"
)
)
# Plot -------------------------------------------------------------------------
# ---------------------------------------------------------------------------- #
# Setup: custom fonts, sourced from Google
font_add_google(name = c("Lobster", "League Spartan"), family = c("lobster", "spartan"))
showtext_auto()
# Plot background: an LP in the background & title with caption
circ_sequence <- seq(1, 150, by = 5)
circles <- data.frame(
size = circ_sequence,
x = rep(1, length(circ_sequence)),
y = rep(1, length(circ_sequence))
)
plot_cap <- paste0(
"<span style='color:white;'>**Data source:** SecondHandSongs & MusicBrainz, compilation by ChatGPT</span>",
"<br>",
CreateSocialCaption(github_icon_color = "#F2D649", linkedin_icon_color = "#F2D649", text_color = "white")
)
p_bg <- ggplot(data = circles, aes(x = 1, y = 1, size = size)) +
geom_point(shape = 21, fill = NA, colour = "white") +
scale_size_continuous(range = c(10, 100)) +
scale_x_continuous(limits = c(-2, 5)) +
scale_y_continuous(limits = c(-151, 152)) +
labs(title = "Most Covered Songs of the King",
subtitle = "Elvis Presley’s songs from the mid-1950s and early 1970s are his most covered works",
caption = plot_cap) +
theme_void() +
theme(panel.background = element_rect(fill = "black"),
plot.background = element_rect(fill = "black"),
plot.margin = margin(10, 0, 0, 0),
plot.title = element_text(family = "lobster", size = 30, colour = "white", hjust = 0.5),
plot.subtitle = element_text(family = "spartan", size = 14, colour = "white",
hjust = 0.5, margin = margin(10, 0, 10, 0)),
plot.caption = element_markdown(family = "spartan", lineheight = 1.15,
padding = unit(10, "pt")),
aspect.ratio = 0.5,
legend.position = "none")
# arrange data by year
# > fix what row 1 references to & ultimately text placement in the plot
elvis_covers <- elvis_covers |>
arrange(-estimated_covers)
# colours from palette https://color.adobe.com/s-Presley-color-theme-1560137/
p_covers <- ggplot(data = elvis_covers, aes(x = release_year, y = estimated_covers)) +
geom_point_blur(aes(size = estimated_covers), colour = "#F2D649", blur_size = 10) +
geom_point(aes(size = estimated_covers), colour = "#F2C84B") +
annotate(geom = "text", x = elvis_covers[1, ]$release_year, y = elvis_covers[1, ]$estimated_covers,
label = glue("Covered {elvis_covers[1, ]$estimated_covers}x"),
size = 3, colour = "white", family = "spartan",
hjust = -0.5, vjust = -0.5) +
ggrepel::geom_text_repel(data = elvis_covers[2:nrow(elvis_covers), ],
aes(x = release_year, y = estimated_covers, label = paste0(estimated_covers, "x")),
size = 3, colour = "white", family = "spartan", nudge_y = 1.5) +
ggrepel::geom_text_repel(data = elvis_covers[elvis_covers$estimated_covers >= 400, ],
aes(x = release_year, y = estimated_covers, label = song),
size = 4, colour = "#F2D649", family = "spartan",
nudge_y = -30) +
scale_size_continuous(range = c(1, 10)) +
scale_x_continuous(limits = c(1950, 1980), expand = expansion(add = c(0, 0))) +
scale_y_continuous(expand = expansion(add = c(35, 25))) +
theme_void() +
theme(plot.background = element_rect(fill = fill_alpha("black", 0.75)),
axis.line = element_line(colour = "white", linewidth = 1),
axis.text.x = element_text(family = "spartan", colour = "white", size = 14),
axis.ticks.x = element_line(colour = "white", linewidth = 1),
axis.ticks.length.x = unit(-10, "pt"),
aspect.ratio = 0.65,
legend.position = "none")
p_covers
# Data as insert
# Freeze the background as grob before to avoid unexpected shifting of spacing of elements
p_bg_fixed <- ggplotGrob(p_bg)
p_assembled <- ggdraw() + draw_grob(p_bg_fixed) +
draw_plot(p_covers, x = -0.05, y = -0.025, scale = 0.65)
p_assembled
# Export
# ggsave(plot = p_assembled,
# filename = "2026/plots/18-Remake.png",
# width = 6,
# height = 3.6,
# dpi = 300,
# bg = "black",
# device = ragg::agg_png)
# Saved from Plots pane this time.# SETUP ------------------------------------------------------------------------
# ---------------------------------------------------------------------------- #
# Packages
library(dplyr)
library(tidyr)
library(rnaturalearth)
library(countrycode)
library(ggplot2)
library(showtext)
# Author details fxn
source("commons/CreateSocialCaption.R")
rep_dat <- read.delim("2026/data/reporters_without_borders_2025.csv", sep = ";", dec = ",", header = T)
rep_dat <- janitor::clean_names(rep_dat)
score_dat <- rep_dat |>
filter(zone == "Asie-Pacifique") |>
select(iso, country_en, score_2025, score_n_1) |>
pivot_longer(cols = contains("score"), names_to = "year", values_to = "score") |>
mutate(year = case_when(
grepl("2025", year) ~ 2025,
grepl("n_1", year) ~ 2024,
TRUE ~ NA
))
score_dat$iso2 <- countrycode(score_dat$country_en, "country.name", "iso2c")
# PLOT -------------------------------------------------------------------------
# ---------------------------------------------------------------------------- #
# South China Morning Post colours
scmp_blue <- "#001246"
scmp_yellow <- "#FFCA05"
scmp_gold <- "#c79a00"
# and fonts
sysfonts::font_add_google("Merriweather", "merriweather")
sysfonts::font_add_google("Roboto", "roboto")
# top 5% by increase
top_incs <- rep_dat |>
filter(zone == "Asie-Pacifique") |>
select(country_en, score_evolution) |>
distinct() |>
slice_max(n = 5, order_by = score_evolution) |>
pull(country_en)
# and top 5% by decrease
top_decs <- rep_dat |>
filter(zone == "Asie-Pacifique") |>
select(country_en, score_evolution) |>
distinct() |>
slice_min(n = 5, order_by = score_evolution) |>
pull(country_en)
# categorize
score_dat <- score_dat |>
mutate(category = case_when(
country_en %in% top_incs ~ "top",
country_en %in% top_decs ~ "bottom",
TRUE ~ "others"
),
# and a numerical helper for plotting
num_category = case_when(
country_en %in% top_incs ~ 2,
country_en %in% top_decs ~ 2,
TRUE ~ 1
))
p_subt <- glue(
"**The purpose of the index is to compare the level of freedom <br>enjoyed by journalists and media in individual countries.** <br> The five countries with biggest year-to-year <span style=color:{scmp_yellow};>**IMPROVEMENT**</span> are problematic countries. <br> In contrary, countries with largest <span style=color:{scmp_blue};>**SCORE DECLINE**</span> are a mixture of well and poorly scoring countries."
)
p_cap <- paste0(
"**Data source:** Reporters without borders <br>",
"**Design:**", CreateSocialCaption(github_icon_color = scmp_gold, linkedin_icon_color = scmp_gold)
)
p_asia <- ggplot(data = score_dat, aes(x = as.factor(year), y = score)) +
geom_path(aes(colour = category, linewidth = num_category, group = country_en)) +
geom_point(aes(colour = category, size = score)) +
ggrepel::geom_text_repel(data = filter(score_dat, country_en %in% c(top_incs, top_decs), year == 2025),
aes(label = country_en), size = 4, nudge_x = 0.35, segment.linetype = "dashed",
family = "roboto") +
scale_colour_manual(values = c("bottom" = scmp_blue, "top" = scmp_yellow, "others" = "grey80")) +
scale_size_continuous(range = c(2, 6)) +
scale_linewidth_continuous(range = c(1, 2)) +
scale_x_discrete(expand = expansion(add = c(0.02, 0.05))) +
labs(
title = "World Press Freedom Index of Asian countries in 2024 vs 2025",
subtitle = p_subt,
caption = p_cap,
x = " ",
y = "score"
) +
theme_void() +
theme(
plot.margin = margin(0, 50, 0, 0),
plot.background = element_rect(fill = NA),
panel.background = element_rect(fill = NA),
panel.grid = element_blank(),
plot.title = element_text(family = "merriweather", color = scmp_gold, margin = margin(2.5, 0, 5, 0)),
plot.subtitle = ggtext::element_markdown(family = "roboto", lineheight = 1.15, size = 10,
margin = margin(2.5, 0, 2.5, 0)),
plot.caption = ggtext::element_markdown(family = "roboto", size = 10),
axis.title.y = element_text(colour = "black", angle = 90, family = "roboto"),
axis.text = element_text(colour = "black", family = "roboto"),
axis.line.x.bottom = element_line(colour = "black"),
axis.line.y.left = element_line(colour = "black"),
axis.ticks = element_line(colour = "black", linewidth = 0.5),
axis.ticks.length = unit(5, "pt"),
aspect.ratio = 1,
legend.position = "none"
)
p_asiaUncertainties
# SETUP ------------------------------------------------------------------------
# ---------------------------------------------------------------------------- #
# Packages
library(Kendall)
library(dplyr)
library(ggplot2)
library(ggtext)
library(showtext)
# Custom author/data details function
source("commons/CreateSocialCaption.R")
# Load and explore data
data(PrecipGL)
str(PrecipGL)
# Preprocess for plotting
dat_precip <- data.frame(
year = 1900:1986,
"precip_in" = PrecipGL[1:length(PrecipGL)]
)
glimpse(dat_precip)
# PLOT -------------------------------------------------------------------------
# ---------------------------------------------------------------------------- #
# Custom fonts
# fontawesome icons
font_add(family = "fa-solid", regular = "commons/fonts/Font Awesome 7 Free-Solid-900.otf")
# title and plain text
font_add_google(name = "Source Code Pro", family = "monospace")
font_add_google(name = "Winky Rough", family = "winky")
# this is so that fonts are accessible to ggplot2
showtext_auto()
# Title
p_title <- "Great Lakes Annual Precipitation (1900–1986)"
# Caption is combination of data source and author details
p_caption = paste0(
"**Data source:**", " PrecipGL dataset from the {Kendall} R package",
"**Visualization:**", CreateSocialCaption(github_icon_color = "#A89797FF", linkedin_icon_color = "#007FFFFF")
)
# Actual plot is created here
ggplot(data = dat_precip, aes(x = year, y = precip_in)) +
geom_path(linetype = "dotted", linewidth = 0.85, colour = "#007FFFFF", alpha = 0.5) +
# display waterdrop at place of individual data points
geom_richtext(aes(label = "<span style='font-family:fa-solid'></span>", size = precip_in),
label.colour = NA,
label.padding = unit(0, "pt"),
label.margin = unit(0, "pt"),
show.legend = F,
col = "#007FFFFF",
fill = NA) +
# label every fifth year - should be easier to navigate across years than usual axis breaks
geom_text(data = filter(dat_precip, year %in% seq(1905, 1990, by = 5)) |>
mutate(nudge = case_when(
precip_in <= 30 ~ -1.15,
TRUE ~ 1.15
)),
aes(x = year, y = precip_in, label = year, nudge_y = nudge),
angle = 90, size = 3.5, family = "winky", fontface = "bold", colour = "#A89797FF") +
labs(title = p_title,
y = "precipitation [in]",
caption = p_caption) +
scale_size_continuous(range = c(1.5, 5)) +
scale_x_continuous(limits = c(1900, 1986), expand = expansion(add = c(0.5, 1))) +
scale_y_continuous(limits = c(22, max(dat_precip$precip_in) + 2.55),
breaks = seq(20, max(dat_precip$precip_in), by = 5),
expand = F,
# secondary axis is to include metric units in the same plot
sec.axis = sec_axis(transform = ~ . * 25.4, name = "precipitation [mm]")) +
theme_void() +
theme(plot.margin = margin(5, 5, 5, 5),
plot.background = element_rect(fill = alpha("#FFEFB2FF", 0.35)),
plot.title = element_text(colour = "#A89797FF", family = "winky",
size = 22, hjust = 0.5),
plot.caption = element_markdown(family = "winky"),
axis.title.y.left = element_text(colour = "#A89797FF", family = "winky",
angle = 90, margin = margin(0, 10, 0, 0)),
axis.title.y.right = element_text(colour = "#A89797FF", family = "winky",
angle = 90, margin = margin(0, 0, 0, 10)),
axis.line.x = element_line(colour = "#A89797FF", linewidth = 1),
axis.line.y = element_line(colour = "#A89797FF", linewidth = 1),
axis.ticks.y = element_line(colour = "#A89797FF", linewidth = 1),
axis.ticks.length.y = unit(5, "pt"),
axis.text.y = element_text(colour = "#A89797FF", family = "winky"),
aspect.ratio = 0.5)
# Note: colour choice is based on palette "tvthemes::Arryn"# Set-up -----------------------------------------------------------------------
# ---------------------------------------------------------------------------- #
# Load packages
library(dplyr)
library(ggplot2)
library(showtext)
library(scales)
library(ggtext)
library(ungeviz) # data source
# Access to author info
source("commons/CreateSocialCaption.R")
# Data pre-processing ----------------------------------------------------------
# ---------------------------------------------------------------------------- #
cacao <- cacao |>
filter_out(is.na(location)| is.na(cocoa_percent)) |>
# mutate(bean_origin = case_when(
# bean_origin == "Domincan Republic" ~ "Dominican Republic",
# TRUE ~ bean_origin
# )) |>
group_by(location) |>
mutate(n_records = n()) |>
filter(n_records >= 5) |>
mutate(cocoa_percent = as.numeric(gsub("%", "", cocoa_percent)))
locations_sorted <- cacao |>
group_by(location) |>
summarize(median_prcnt = median(cocoa_percent)) |>
arrange(median_prcnt) |>
pull(location)
cacao$location <- factor(cacao$location, levels = locations_sorted)
# Plot -------------------------------------------------------------------------
# ---------------------------------------------------------------------------- #
sysfonts::font_add_google(name = "Finger Paint", family = "fpaint")
showtext_auto()
p_cap <- paste("**Visualization:**", CreateSocialCaption(github_icon_color = "white", linkedin_icon_color = "white", text_color = "white"))
ggplot(cacao, aes(x = cocoa_percent / 100, y = location)) +
geom_boxplot(colour = "white", linewidth = 0.55, outlier.shape = NA) +
geom_jitter(colour = "white", size = 1.05, height = 0.35) +
labs(title = stringr::str_wrap("Cocoa content in dark chocolate bars of the world", width = 30),
subtitle = stringr::str_wrap("Globally, the median cocoa content in dark chocolate bars is 70%. No matter manufacturer's location, however, surprises can happen - pay attention to product labeling. Data source: cacao dataset from the {ungeviz} package (originally compiled and still updated by flavorsofcacao.com)", width = 80),
caption = p_cap,
x = "",
y = "") +
scale_x_continuous(labels = scales::percent_format()) +
scale_y_discrete(expand = expansion(add = c(1.5, 1.5))) +
coord_cartesian(clip = "off") +
theme_void() +
theme(panel.background = element_rect(fill = "#664436", colour = "#664436"),
plot.background = element_rect(fill = "#664436", colour = "#664436"),
plot.title.position = "plot",
plot.title = element_text(colour = "white", hjust = 0.5,
family = "fpaint", size = 24),
plot.subtitle = element_text(colour = "white", hjust = 0.5,
family = "sains", size = 11),
plot.caption = element_markdown(margin = margin(2.5, 10, 2.5, 0), size = 10, colour = "white"),
axis.text.x = element_text(colour = "white", hjust = 0.5,
family = "fpaint", size = 10),
axis.text.y = element_text(colour = "white", hjust = 1,
family = "fpaint", size = 8.5, margin = margin(0, 0, 0, 10)),
axis.line.x = element_line(colour = "white", linewidth = 1),
axis.ticks.x = element_line(colour = "white", linewidth = 1),
axis.ticks.length.x = unit(5, "pt"),
aspect.ratio = 1.55)Resources
- data from
{NHANES}package for R
PrecipGLdata from the{Kendall}package for R
- population growth data by [UN(]https://data.un.org/) (file
SYB61_253_Population_Growth_Rates_in Urban areas and Capital cities.csv)
cacaodataset from the{ungeviz}package (original source: flavorsofcacao.com[https://flavorsofcacao.com/])
2025
Comparisons
library(ggplot2)
library(cowplot)
# Load preprocessed data
load("data/preprocessed_internet_users.RData")
# Users by year
p_times <- ggplot(global_internet_users, aes(x = year, y = number_of_internet_users)) +
geom_point(aes(size = number_of_internet_users),
shape = 21, alpha = 0.75, colour = "black", fill = "#035AA6FF") +
scale_x_continuous(breaks = seq(min(global_internet_users$year),
max(global_internet_users$year), 10)) +
scale_y_continuous(limits = c(0, 5000000000), expand = c(0, 0),
breaks = seq(1e9, 5e9, 1e9),
labels = scales::label_number(scale = 1e-9, suffix = " B")) +
scale_size(range = c(1, 10)) +
theme_classic() +
theme(panel.grid.major.x = element_line(color = "grey", size = 0.2),
axis.text.x = element_text(face = "bold"),
axis.title.x = element_text(face = "bold"),
axis.title.y = element_blank(),
aspect.ratio = 0.65,
legend.position = "none")
# Pie chart for 2021
world_pop_2021 <- 7954448391
users_2021 <- global_internet_users[global_internet_users$year == 2021, ]$number_of_internet_users
df_global_2021 <- data.frame(
"internet_user" = c("users", "non-users"),
"n_people" = c(users_2021,
world_pop_2021 - users_2021)
)
# a label
df_global_2021$frx_people <- paste0(round(df_global_2021$n_people / world_pop_2021 * 100, 0), "%")
p_2021 <- ggplot(df_global_2021, aes(x = "", y = n_people, fill = internet_user)) +
geom_bar(stat = "identity", width = 1) +
scale_fill_manual(values = c("#F2F2F2FF", "#035AA6FF")) +
geom_text(aes(y = n_people, label = paste(frx_people, "\n", internet_user)),
position = position_stack(vjust = 0.5), size = 3) +
scale_y_continuous(labels = scales::comma) +
coord_polar(theta = "y") +
labs(title = "Population with Access \n in 2021") +
theme_void() +
theme(plot.title = element_text(size = 8, vjust = 0.5, hjust = 0.5),
legend.position = "none")
# combined plot
p_combined <- ggdraw(p_times) +
draw_plot(p_2021, x = 0.01, y = 0.5, width = 0.45, height = 0.45) +
draw_label("Global Acess to the Internet between 1990 and 2021",
x = 0.5, y = 1, size = 11, fontface = "bold", vjust = 1)
# export
ggsave(plot = p_combined, filename = "plots/03-Circles.png")library(dplyr)
library(ggplot2)
library(scales)
library(gridExtra)
# Load preprocessed data
load("data/preprocessed_internet_users.RData")
# color scale by continent - as in the Internet data
continent_colors <- c(c("Africa" = "#524595", "Asia" = "#0392cf",
"Europe" = "#e86af0", "North America" = "#ffbf00",
"Oceania" = "#95CA3E", "South America" = "#F26923"))
# Population data has both Americas together
continent_colors_merged <- c(c("Africa" = "#524595", "Asia" = "#0392cf",
"Europe" = "#e86af0", "Americas" = "#a7882e",
"Oceania" = "#95CA3E"))
### USERS BY THE END OF EACH DECADE
# =================================
# For number of user at a given decade, take last year of the decade
decade_data <- internet_users_by_continent |>
# first meaningful data start in 1990s -> keep only those and later
filter(year %in% c(1999, 2009, 2019, 2021)) |>
group_by(year) |>
arrange(number_of_internet_users) |>
mutate(decade = floor(year / 10) * 10)
### ABSOLUTE NUMBERS
# ------------------
p_ls <- list()
for (d in sort(unique(decade_data$decade))) {
input <- filter(decade_data, decade == d)
p <- ggplot(input, aes(x = reorder(entity, number_of_internet_users), y = number_of_internet_users, group = entity)) +
geom_col(aes(fill = entity), position = "dodge") +
scale_fill_manual(values = continent_colors) +
scale_y_continuous(labels = ifelse(max(input$number_of_internet_users > 1e9), label_number(scale = 1e-9, suffix = " B"),
label_number(scale = 1e-6, suffix = " M")),
expand = c(0, 0)) +
labs(title = paste0(d, "s")) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5, vjust = 1, face = "bold"),
axis.title = element_blank(),
axis.ticks.x = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
aspect.ratio = 0.65,
legend.position = "none")
p_ls[[as.character(d)]] <- p
}
# export
ggsave(plot = grid.arrange(grobs = p_ls, nrow = 1, top = "Number of Internet Users \n at the End of Each Decade and in 2021"),
filename = "plots/05-Ranking-Absolute.png", width = 7, height = 4)
### PERCENTAGE OF POPULATION
# --------------------------
# For population, take last year of the decade
load("data/preprocessed_population.RData")
population_decades <- population_by_continent |>
filter(year %in% c(1999, 2009, 2019, 2021)) |>
mutate(decade = floor(year / 10) * 10)
decade_data <- decade_data |>
mutate(entity = case_when(
grepl("America", entity) ~ "Americas",
TRUE ~ entity)) |>
# Americas must be summer up
group_by(entity, decade) |>
summarise(number_of_internet_users = sum(number_of_internet_users))
decade_data_pop <- merge.data.frame(decade_data, population_decades, by = c("decade", "entity"))
# percentage of continent's population with internet users
decade_data_pop$prcnt <- decade_data_pop$number_of_internet_users / decade_data_pop$all_years
p_ls <- list()
for (d in sort(unique(decade_data_pop$decade))) {
input <- filter(decade_data_pop, decade == d)
p <- ggplot(input, aes(x = reorder(entity, prcnt), y = prcnt, group = entity)) +
geom_col(aes(fill = entity), position = "dodge") +
scale_fill_manual(values = continent_colors_merged) +
geom_text(aes(x = entity, y = ifelse(prcnt > 0.05, prcnt + 0.05, 0.05), label = paste0(round(prcnt * 100, 0), "%")),
position = position_dodge(width = 0.5), size = 3) +
scale_y_continuous(expand = c(0, 0), limits = c(0, max(input$prcnt) * 1.25)) +
labs(title = paste0(d, "s")) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5, vjust = 1, face = "bold"),
axis.title = element_blank(),
axis.ticks.x = element_blank(),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
axis.ticks.y = element_blank(),
axis.line.y = element_blank(),
axis.text.y = element_blank(),
aspect.ratio = 0.65,
legend.position = "none")
p_ls[[as.character(d)]] <- p
}
# export
ggsave(plot = grid.arrange(grobs = p_ls, nrow = 1, top = "Population with Internet Access \n at the End of Each Decade and in 2021"),
filename = "plots/05-Ranking-Percentage.png", width = 7, height = 4)Distributions
library(dplyr)
library(janitor)
library(ggplot2)
library(ggtext)
library(glue)
library(paletteer)
library(ggpubr)
library(gridExtra)
occurences <- read.delim("data/data-gov/ROMOFish_TroutData_Occurrence.csv", sep = ",", header = TRUE)
occurences <- clean_names(occurences)
### PREPROCESS DATA
# Assign year of observation
occurences$year <- case_when(
grepl("-2021", occurences$event_id) ~ "2021",
grepl("-2022", occurences$event_id) ~ "2022",
TRUE ~ NA
)
# check
sum(is.na(occurences$year)) == 0
occurences[is.na(occurences$scientific_name), ] # all observation without species are in a single year but very few -> drop them
occurences <- occurences[!is.na(occurences$scientific_name), ]
### Polis scientific names
# remove the scitnitis, year part
occurences$scientific_name <- gsub("\\s*\\(.*?\\)", "", occurences$scientific_name)
# there is a type for S. fontinalis in year 2022 -> unify spelling
occurences$scientific_name[occurences$scientific_name == "Salvelinus fontilis"] <- "Salvelinus fontinalis"
### PLOTS
theme_set(
theme_classic() +
theme(
text = element_text(size = 14),
axis.text = element_text(size = 14)
)
)
# 1. Observations by species and year
occurences_counts <- occurences |>
group_by(scientific_name, year) |>
summarise(n_individuals = n())
# create colour scales by species and year
col_species <- paletteer_d("Manu::Kereru")
names(col_species) <- unique(sort(occurences_counts$scientific_name))
col_years <- c("2021" = "#E35205FF", "2022" = "#5C88DAFF")
# Trout counts by species and year
p_species <- ggplot(occurences_counts, aes(x = year, y = n_individuals, colour = scientific_name)) +
geom_jitter(aes(size = n_individuals),
position = position_jitter(width = 0.15, seed = 123)
) +
geom_line(aes(group = scientific_name, colour = scientific_name),
position = position_jitter(width = 0.15, seed = 123)
) +
scale_y_continuous(limits = c(0, 1050), expand = c(0, 0)) +
scale_size(range = c(1, 5), guide = "none") +
scale_colour_manual(values = col_species, name = "Species") +
labs(
x = "Year",
y = "Observations"
) +
guides(colour = guide_legend(ncol = 2)) +
theme_classic() +
theme(
axis.title = element_text(face = "bold"),
axis.ticks.x = element_blank(),
legend.title = element_text(face = "bold"),
legend.text = element_text(size = 10, face = "italic"),
legend.position = "bottom",
aspect.ratio = 1.5
)
# Comparison between years makes sense only if there were spotting on both years
species_subset <- unique(occurences_counts[occurences_counts$year == "2022" & occurences_counts$n_individuals > 0, ]$scientific_name)
# A joint plot for both fish weight and length
plot_title <- glue('Weight and Length of Selected Trout Species In Years <span style = "color:{col_years["2021"]}"> 2021 </span>
and <span style = "color:{col_years["2022"]}"> 2022 </span>')
p_title <- ggplot() +
geom_blank() +
labs(
title = plot_title,
subtitle = "(Rocky Mountain National Park, USA)"
) +
theme(
plot.background = element_rect(fill = "white"),
plot.title = element_markdown(size = 16, face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = 14, face = "bold", hjust = 0.5),
aspect.ratio = 0.15
)
p_mass <- ggplot(
filter(occurences, scientific_name %in% species_subset),
aes(x = mass)
) +
geom_density(aes(fill = year), alpha = 0.65) +
facet_wrap(~scientific_name) +
scale_fill_manual(values = col_years) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
labs(
x = "weight [g]"
) +
theme_classic() +
theme(
panel.background = element_rect(fill = "white"),
strip.background = element_blank(),
strip.text = element_text(face = "bold.italic", size = 11),
axis.title = element_text(face = "bold"),
legend.position = "none"
)
p_length <- ggplot(
filter(occurences, scientific_name %in% species_subset),
aes(x = length)
) +
geom_density(aes(fill = year), alpha = 0.65) +
facet_wrap(~scientific_name) +
scale_fill_manual(values = col_years) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
labs(x = "length [mm]") +
theme_classic() +
theme(
panel.background = element_rect(fill = "white"),
strip.background = element_blank(),
strip.text = element_text(face = "bold.italic", size = 11),
axis.title = element_text(face = "bold"),
legend.position = "none"
)
# combine for export
p_combined <- ggarrange(
p_title,
ggarrange(p_species, arrangeGrob(p_mass, p_length, ncol = 1), ncol = 2, heights = c(1, 1)),
ncol = 1,
heights = c(0.1, 0.9)
)
p_combined
ggsave(p_combined, filename = "plots/12-data-gov.png", width = 8, height = 7, scale = 1.15, bg = "white")Timeseries
# Packages
library(dplyr)
library(janitor)
library(ggplot2)
library(ggrepel)
library(scales)
library(ggtext)
library(glue)
library(ggpubr)
library(gridExtra)
# Load data
all_data <- read.delim("data/urban-share-european-commission/urban-share-european-commission.csv", sep = ",", header = T)
all_data <- clean_names(all_data)
# make percentages fractions between 0 and 1 -> nice axis labels with {scales} package
all_data$share_of_population_living_in_urban_areas <- all_data$share_of_population_living_in_urban_areas / 100
# Keep only years with data
past_data <- all_data[all_data$year < 2025, ]
# Explore data
summary(past_data)
sort(unique(past_data$entity))
### PLOTS
continents <- c("Africa", "Asia", "Europe", "North America", "South America", "Oceania")
continent_colors <- c(c("Africa" = "#524595", "Asia" = "#0392cf",
"Europe" = "#e86af0", "North America" = "#ffbf00",
"Oceania" = "#95CA3E", "South America" = "#F26923"))
# World - continents
global_trend <- past_data |>
filter(entity %in% continents) |>
group_by(year) |>
summarise(global_urban = mean(share_of_population_living_in_urban_areas, na.rm = TRUE))
# for plot label later on
max_global <- max(global_trend$global_urban)
# Plot
p_world <- ggplot() +
# individual continents
geom_line(data = filter(past_data, entity %in% continents),
aes(x = year, y = share_of_population_living_in_urban_areas, colour = entity), linewidth = 1) +
scale_colour_manual(values = continent_colors) +
# global trend line (smooth)
geom_smooth(data = global_trend, aes(x = year, y = global_urban),
color = "grey30", se = F, linewidth = 1.2) +
# annotate - global
geom_text_repel(data = global_trend[nrow(global_trend), ], aes(x = 2020, y = global_urban, label = "World"),
color = "grey30", size = 4, nudge_x = 8, nudge_y = 0.005,
xlim = c(2021, NA), segment.size = 0.75, segment.linetype = "dotted", fontface = "bold") +
# annotate - continents
geom_text_repel(data = filter(past_data, entity %in% continents, year == 2020),
aes(x = 2020, y = share_of_population_living_in_urban_areas, label = entity,
color = entity), size = 4, nudge_x = 10, nudge_y = -0.01,
xlim = c(2024, NA), segment.size = 0.75, segment.linetype = "dotted",
segment.ncp = 4,
box.padding = 0.4,
segment.curvature = -0.2,
segment.angle = 45) +
scale_x_continuous(expand = c(0, 0), limits = c(1975, 2040), breaks = seq(1970, 2020, 10)) +
scale_y_continuous(labels = scales::label_percent(), expand = c(0, 0),
limits = c(0.55, 0.85), breaks = seq(0.55, 0.85, 0.05)) +
labs(title = "Urbanization development across continents \nand the global trend",
x = "Year",
y = "Population in urban areas") +
theme_classic() +
theme(plot.title = element_text(size = 12, face = "bold", hjust = 0),
legend.position = "none",
aspect.ratio = 0.65)
# Identify least and fastest growing countries
# 1. Calculate slope for each entity
# Do not inlcude world and per-contine tdata
slopes <- past_data |>
filter(entity != "World", !entity %in% continents) |>
group_by(entity) |>
summarise(slope = coef(lm(share_of_population_living_in_urban_areas ~ year))[2])
# 2. Identify least and fastest growing countries
fastest_growing <- slopes |>
top_n(3, slope) |>
pull(entity)
# sorting will ensure consistenty of order/colours in title and geom_line()
fastest_growing <- sort(fastest_growing)
least_growing <- slopes |>
top_n(3, -slope) |>
pull(entity)
least_growing <- sort(least_growing)
col_fastest <- c("#E07529FF", "#FAAE32FF", "#7F7991FF")
names(col_fastest) <- fastest_growing
fastest_title <- glue("<span style = 'color: {col_fastest[1]}'> Cayman Islands </span>,
<span style = 'color: {col_fastest[2]}'> Saint Barthelemy </span> and <br>
<span style = 'color: {col_fastest[3]}'> Turks and Caicos Islands </span> <br>
have seen the fastest growth in urbanization")
p_growth <- ggplot() +
geom_line(data = filter(past_data, entity %in% fastest_growing),
aes(x = year, y = share_of_population_living_in_urban_areas, colour = entity), linewidth = 1.5) +
scale_colour_manual(values = col_fastest) +
scale_x_continuous(expand = c(0, 0), limits = c(1975, 2025), breaks = seq(1970, 2020, 10)) +
scale_y_continuous(labels = scales::label_percent(), limits = c(0, 1), expand = c(0, 0)) +
labs(title = fastest_title,
x = "Year",
y = "Population in urban areas") +
theme_classic() +
theme(plot.title = element_markdown(size = 12, hjust = 0, face = "bold"),
legend.position = "none",
aspect.ratio = 0.65) +
# add explanatory text
geom_curve(aes(x = 2006, y = 0.75, xend = 2001, yend = 0.8),
arrow = arrow(length = unit(0.2, "cm"), type = "open"), curvature = 0.3) +
geom_curve(aes(x = 2006, y = 0.7, xend = 2003, yend = 0.3),
arrow = arrow(length = unit(0.2, "cm"), type = "open"), curvature = 0.3) +
annotate("text", x = 2010, y = 0.73, label = "boom of tourism")
col_least <- c("#A84A00FF", "#5D4F36FF", "#B39085FF")
names(col_least) <- least_growing
slowest_title <- glue("<span style = 'color: {col_least[1]}'> Montserrat </span>,
<span style = 'color: {col_least[2]}'> Palau </span> and <br>
<span style = 'color: {col_least[3]}'> Saint Pierre and Miquelon </span> <br>
have seen marked drops in urbanization")
p_decline <- ggplot() +
geom_line(data = filter(past_data, entity %in% least_growing),
aes(x = year, y = share_of_population_living_in_urban_areas, color = entity), linewidth = 1.5) +
scale_colour_manual(values = col_least) +
scale_x_continuous(expand = c(0, 0), limits = c(1975, 2025), breaks = seq(1970, 2020, 10)) +
scale_y_continuous(labels = scales::label_percent(), limits = c(0, 1), expand = c(0, 0)) +
labs(title = slowest_title,
x = "Year",
y = "Population in urban areas") +
theme_classic() +
theme(plot.title = element_markdown(size = 12, hjust = 0, face = "bold"),
legend.position = "none",
aspect.ratio = 0.65) +
# add explanatory text
geom_curve(aes(x = 2001, y = 0.9, xend = 1997, yend = 0.85),
arrow = arrow(length = unit(0.2, "cm"), type = "open"), curvature = 0.3) +
annotate("text", x = 1999, y = 0.78, colour = "#A84A00FF",
label = "1997: Soufrière Hills volcano \n eruption destroyed \n the capital city Plymouth",
hjust = 0)
# combine for export
p_combined <- ggarrange(
ggarrange(p_world, arrangeGrob(p_growth, p_decline, ncol = 2), nrow = 2, widths = c(1, 1)),
ncol = 2,
widths = c(10, 2),
heights = c(1, 2)
)
p_combined
ggsave(p_combined, filename = "plots/20-Urbanization.png",
width = 8, height = 6, scale = 1.5, bg = "white")Resources
- Our World in Data: https://ourworldindata.org/grapher/number-of-internet-users?tab=table
- Our World in Data: https://ourworldindata.org/how-urban-is-the-world
- Fisheries Inventory, Trout Data, at Rocky Mountain National Park 2021-2022 by data.gov
- GHS-COUNTRY-STATS R2024A - GHSL Country Statistics by Degree of Urbanization, multitemporal (1975-2030)