#| output: false
#| message: false
#| warning: false
rm(list = ls())
# set up
pacman::p_load(dplyr, haven, tidyverse, ggplot2, kableExtra, readxl, janitor, tools, purrr, sf, classInt, tmap, patchwork, scales, gtsummary, webshot2, patchwork, cowplot, lubridate, gt, kableExtra, margins, broom, modelsummary, fixest, marginaleffects, broom.mixed, lme4, tibble, DiagrammeR, DiagrammeRsvg, rsvg, flextable, officer, ggspatial, rnaturalearth, ggrepel, viridis)
# helper functions
# extract_round function
extract_round <- function(filename) {
str_extract(filename, regex("r[0-9]+", ignore_case = TRUE)) %>%
str_to_lower()
}
makeVlist <- function(x) {
labels <- sapply(x, function(col) {
lbl <- attr(col, "label")
if (is.null(lbl)) NA_character_ else as.character(lbl)
})
tibble(name = names(labels), label = labels)
}
# record_missing function
recode_missing <- function(x, extra = NULL) {
miss_vals <- c(-1, 98, 99, 998, 999)
if (!is.null(extra)) miss_vals <- c(miss_vals, extra)
ifelse(x %in% miss_vals, NA_real_, x)
}
# read data
data_dir <- "Data"
files <- list.files(data_dir, pattern = "\\.(sav|xlsx?)$", full.names = TRUE)
excel_files <- files[file_ext(files) %in% c("xls", "xlsx")]
sav_files <- files[file_ext(files) == "sav"]
excel_list <- map(excel_files, ~ {
read_excel(.x, col_types = "text") %>% # ← force everything to character
clean_names() %>%
mutate(source_file = basename(.x),
round = extract_round(basename(.x)))
})
sav_list <- map(sav_files, ~ {
haven::read_sav(.x) %>%
clean_names() %>%
mutate(source_file = basename(.x),
round = extract_round(basename(.x)))
})
# extract labels
Labels <- map_dfr(sav_list, makeVlist) %>%
distinct(name, .keep_all = TRUE) %>%
filter(!is.na(label))
# standardize
afro_list <- c(excel_list, sav_list) %>%
lapply(function(x) {
x[] <- lapply(x, as.character)
x
})
# combine
afro_combined <- bind_rows(afro_list)
# code book function
make_codebook <- function(df) {
map_dfr(names(df), function(var) {
col <- df[[var]]
var_label <- attr(col, "label")
value_labels <- attr(col, "labels")
if (!is.null(value_labels)) {
tibble(
variable = var,
question = if (is.null(var_label)) NA_character_ else as.character(var_label),
value = unname(value_labels),
value_label = names(value_labels)
)
} else {
tibble(
variable = var,
question = if (is.null(var_label)) NA_character_ else as.character(var_label),
value = NA_real_,
value_label = NA_character_
)
}
})
}
# build code book from all sav files and deduplicate
codebook <- map_dfr(sav_list, make_codebook) %>%
distinct(variable, value, .keep_all = TRUE)
# cleaning labels
coef_labels <- c(
"lpi_catLow lived poverty" = "Low Lived Poverty",
"lpi_catModerate lived poverty" = "Moderate Lived Poverty",
"lpi_catHigh lived poverty" = "High Lived Poverty",
"education_catMedium Education" = "Medium Education",
"education_catHigh Education" = "High Education",
"employment1" = "Employed",
"sexFemale" = "Female",
"urbanityRural" = "Rural (vs Urban)",
"roundr5" = "Round 5",
"roundr6" = "Round 6",
"roundr7" = "Round 7",
"roundr8" = "Round 8",
"roundr9" = "Round 9"
)
# set chord
UTM29N <- 32629
# set theme
theme_thesis <- function(base_size = 11) {
theme_minimal(base_size = base_size) +
theme(
plot.title = element_text(size = base_size + 2, face = "bold",
margin = margin(b = 4)),
plot.subtitle = element_text(size = base_size - 0.5, colour = "grey40",
margin = margin(b = 10)),
plot.caption = element_text(size = base_size - 2, colour = "grey50",
hjust = 0, margin = margin(t = 8)),
axis.title = element_text(size = base_size - 0.5, colour = "grey30"),
axis.text = element_text(size = base_size - 1.5, colour = "grey40"),
legend.title = element_text(size = base_size - 0.5, face = "bold"),
legend.text = element_text(size = base_size - 1),
panel.grid.major = element_line(colour = "grey92", linewidth = 0.4),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
legend.position = "bottom",
legend.key.width = unit(1.2, "cm"),
plot.margin = margin(12, 12, 8, 12)
)
}
# Colour palette for consistent across all figures
COL_TREAT <- "#1D6B8F" # treated / near grid — steel blue
COL_CONTROL <- "#B0B0B0" # control / far — grey
COL_POST <- "#C14B28" # post-treatment marker — rust
COL_ACCENT <- "#E8A838" # accent — amber
COL_CONTROL <- "#B0B0B0"
COL_ALT <- "#C0392B"
COL_MAIN <- "#4C7EAF"
COL_POST <- "#C14B28"
COL_PRE <- "#4C7EAF"
COL_REF <- "grey40"
COL_ROB <- "#4CAF82"
#bring in grid files
GRID_DIR <- "liberia_transmissiongridmapping"
lines <- st_read(file.path(GRID_DIR, "Liberia_Powerline.shp"), quiet = TRUE) |> st_transform(UTM29N)
towers <- st_read(file.path(GRID_DIR, "Liberia_Powertower_withDEM.shp"), quiet = TRUE) |> st_transform(UTM29N)
plants <- st_read(file.path(GRID_DIR, "Liberia_PowerPlant.shp"), quiet = TRUE) |> st_transform(UTM29N)
stations <- st_read(file.path(GRID_DIR, "Liberia_Substation.shp"), quiet = TRUE) |> st_transform(UTM29N)
# Filter to operational infrastructure only
# Drop 2 under-construction 225KV CLSG lines (regional interconnector)
lines_op <- lines |>
filter(is.na(Comment) |
!str_detect(Comment, regex("under construction", ignore_case = TRUE)))
stations_op <- stations |>
filter(!str_detect(Business, regex("under construction", ignore_case = TRUE)))
# bring shape boundaries shape file
liberia_adm1 <- st_read("Geo/liberia-county-boundary/", quiet = TRUE) |>
st_transform(UTM29N)
# data prep and cleaning
# filter by rounds
round_4 <- afro_combined |>
filter(round=="r4")
round_5 <- afro_combined |>
filter(round=="r5")
round_6 <- afro_combined |>
filter(round=="r6")
round_7 <- afro_combined |>
filter(round=="r7")
round_8 <- afro_combined |>
filter(round=="r8")
round_9 <- afro_combined |>
filter(round=="r9")
# cleaning
afro_combined <- afro_combined |>
mutate(latitude = coalesce(latitude, ea_gps_la)) |>
mutate(longitude = coalesce(longitude, ea_gps_lo)) |>
mutate(q94 = coalesce(q94, q97))
# export
# writexl::write_xlsx(afro_combined, "Clean/final_clean_data.xlsx")
# write_csv(afro_combined, "Clean/final_clean_data.csv")
# write_rds(afro_combined, "Clean/final_clean_data.rds")
# bring in clean data
# dependent variable
df <- afro_combined |>
mutate(
electricity_access = ifelse(ea_svc_a ==1, 1, 0),
urbanity = factor(urbrur, levels = 1:2, labels = c("Urban", "Rural")))
# education
df <- df %>%
mutate(
round = as.character(round),
edu_src = case_when(
round == "r4" ~ as.numeric(q89),
round == "r5" ~ as.numeric(q97),
round == "r6" ~ as.numeric(q97),
round == "r7" ~ as.numeric(q97),
round == "r8" ~ as.numeric(q97),
round == "r9" ~ as.numeric(q94),
TRUE ~ NA_real_
)
)
# recode missings
df <- df %>%
mutate(education = recode_missing(edu_src))
# Build Categorical Education Variable
df <- df |>
mutate(
education_cat = case_when(
education %in% 0:3 ~ 1, # low
education %in% 4:6 ~ 2, # medium
education %in% 7:9 ~ 3, # high
TRUE ~ NA_real_
),
education_cat = factor(
education_cat,
levels = c(1, 2, 3),
labels = c("Low Education", "Medium Education", "High Education")
)
)
# employment
df <- df %>%
mutate(
round = as.character(round),
employ = case_when(
round == "r4" ~ as.numeric(q94),
round == "r5" ~ as.numeric(q96),
round == "r6" ~ as.numeric(q95),
round == "r7" ~ as.numeric(q94),
round == "r8" ~ as.numeric(q95a),
round == "r9" ~ as.numeric(q93a),
TRUE ~ NA_real_
)
)
# recode missings and build employed variable
df <- df |>
mutate(
employment = recode_missing(employ),
employed = case_when(
employment %in% c(2, 3, 4, 5) ~ 1L, # employed (any)
employment %in% c(0, 1) ~ 0L, # not employed
TRUE ~ NA_integer_
)
)
# asset ownership
# TV
df <- df |>
mutate(
round = as.character(round),
tv = case_when(
round == "r4" ~ as.numeric(q92b),
round == "r5" ~ as.numeric(q90b),
round == "r6" ~ as.numeric(q91b),
round == "r7" ~ as.numeric(q89b),
round == "r8" ~ as.numeric(q92b),
round == "r9" ~ as.numeric(q90b),
TRUE ~ NA_real_
)
)
# recode missings and build TV variable
df <- df |>
mutate(
tv = recode_missing(tv, extra = c(9)),
tv_own = case_when(
tv %in% c(1, 2) ~ 1L, # own TV
tv %in% c(0) ~ 0L, # No TV
TRUE ~ NA_integer_
)
)
# gender
# TV
df <- df |>
mutate(
round = as.character(round),
gender = case_when(
round == "r4" ~ as.numeric(q101),
round == "r5" ~ as.numeric(q101),
round == "r6" ~ as.numeric(q101),
round == "r7" ~ as.numeric(q101),
round == "r8" ~ as.numeric(q101),
round == "r9" ~ as.numeric(q100),
TRUE ~ NA_real_
)
)
# recode missings and build gender variable
df <- df |>
mutate(
gender = recode_missing(gender),
sex = case_when(
gender == 1 ~ "Male",
gender == 2 ~ "Female",
TRUE ~ NA_character_
)
)
# build county variable
df <- df |>
mutate(
county = case_when(
region == 380 ~ "Bomi",
region == 381 ~ "Bong",
region == 382 ~ "Gbarpolu",
region == 383 ~ "Grand Bassa",
region == 384 ~ "Grand Cape Mount",
region == 385 ~ "Grand Gedeh",
region == 386 ~ "Grand Kru",
region == 387 ~ "Lofa",
region == 388 ~ "Margibi",
region == 389 ~ "Maryland",
region == 390 ~ "Montserrado",
region == 391 ~ "Nimba",
region == 392 ~ "Rivercess",
region == 393 ~ "River Gee",
region == 394 ~ "Sinoe",
TRUE ~ NA_character_
)
)
# poverty
## food
df <- df |>
mutate(
round = as.character(round),
food = case_when(
round == "r4" ~ as.numeric(q8a),
round == "r5" ~ as.numeric(q8a),
round == "r6" ~ as.numeric(q8a),
round == "r7" ~ as.numeric(q8a),
round == "r8" ~ as.numeric(q7a),
round == "r9" ~ as.numeric(q6a),
TRUE ~ NA_real_
)
)
## water
df <- df |>
mutate(
round = as.character(round),
water = case_when(
round == "r4" ~ as.numeric(q8b),
round == "r5" ~ as.numeric(q8b),
round == "r6" ~ as.numeric(q8b),
round == "r7" ~ as.numeric(q8b),
round == "r8" ~ as.numeric(q7b),
round == "r9" ~ as.numeric(q6b),
TRUE ~ NA_real_
)
)
## medicine
df <- df |>
mutate(
round = as.character(round),
medicine = case_when(
round == "r4" ~ as.numeric(q8c),
round == "r5" ~ as.numeric(q8c),
round == "r6" ~ as.numeric(q8c),
round == "r7" ~ as.numeric(q8c),
round == "r8" ~ as.numeric(q7c),
round == "r9" ~ as.numeric(q6c),
TRUE ~ NA_real_
)
)
## fuel
df <- df |>
mutate(
round = as.character(round),
fuel = case_when(
round == "r4" ~ as.numeric(q8d),
round == "r5" ~ as.numeric(q8d),
round == "r6" ~ as.numeric(q8d),
round == "r7" ~ as.numeric(q8d),
round == "r8" ~ as.numeric(q7d),
round == "r9" ~ as.numeric(q6d),
TRUE ~ NA_real_
)
)
## cash
df <- df |>
mutate(
round = as.character(round),
cash = case_when(
round == "r4" ~ as.numeric(q8e),
round == "r5" ~ as.numeric(q8e),
round == "r6" ~ as.numeric(q8e),
round == "r7" ~ as.numeric(q8e),
round == "r8" ~ as.numeric(q7e),
round == "r9" ~ as.numeric(q6e),
TRUE ~ NA_real_
)
)
## build LPI
build_lpi <- function(df) {
df %>%
mutate(
lpi_food = recode_missing(food, extra = c(8, 9)),
lpi_water = recode_missing(water, extra = c(8, 9)),
lpi_med = recode_missing(medicine, extra = c(8, 9)),
lpi_fuel = recode_missing(fuel, extra = c(8, 9)),
lpi_cash = recode_missing(cash, extra = c(8, 9)),
# ── Count of valid (non-missing) items per respondent ──
lpi_n_valid = rowSums(!is.na(cbind(
lpi_food, lpi_water, lpi_med, lpi_fuel, lpi_cash
))),
lpi = ifelse(
lpi_n_valid == 5,
rowMeans(cbind(lpi_food, lpi_water, lpi_med, lpi_fuel, lpi_cash),
na.rm = FALSE),
NA_real_
),
lpi_cat = case_when(
is.na(lpi) ~ NA_character_,
lpi == 0 ~ "No lived poverty",
lpi <= 1.0 ~ "Low lived poverty",
lpi <= 2.0 ~ "Moderate lived poverty",
lpi > 2.0 ~ "High lived poverty"
),
lpi_cat = factor(lpi_cat, levels = c(
"No lived poverty",
"Low lived poverty",
"Moderate lived poverty",
"High lived poverty"
)),
lpi_high = case_when(
is.na(lpi) ~ NA_real_,
lpi >= 2.20 ~ 1,
TRUE ~ 0
),
freq_food = if_else(lpi_food >= 3, 1L, 0L, missing = NA_integer_),
freq_water = if_else(lpi_water >= 3, 1L, 0L, missing = NA_integer_),
freq_med = if_else(lpi_med >= 3, 1L, 0L, missing = NA_integer_),
freq_fuel = if_else(lpi_fuel >= 3, 1L, 0L, missing = NA_integer_),
freq_cash = if_else(lpi_cash >= 3, 1L, 0L, missing = NA_integer_)
)
}
# add to dataframe
df <- df %>%
rename_with(tolower) %>%
build_lpi()
## merge
df <- df %>%
mutate(
# Merge the two weight column names into one
withinwt = coalesce(
suppressWarnings(as.numeric(withinwt)),
suppressWarnings(as.numeric(withinwt_hh))
),
wt_imputed = is.na(withinwt),
withinwt = if_else(is.na(withinwt) | withinwt <= 0, 1, withinwt)
)
# verify
df %>%
group_by(round) %>%
summarise(
wt_class = class(withinwt)[1],
wt_min = min(withinwt),
wt_max = max(withinwt),
pct_imputed = mean(wt_imputed) * 100,
.groups = "drop"
) %>%
kable(
digits = 2,
caption = "Round Weights Verification",
col.names = c("Round", "Weight Class", "Min Weight", "Max Weight", "% Imputed")
) %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
row_spec(0, bold = TRUE)