Bright Zone, Dark Zone: Spatial Electricity Inequality in Liberia

Replication Materials

Author
Affiliation

Vermon Washington

Hertie School

Published

April 28, 2026

Instructions and Overview

To complete this replication, you will need the following datasets: 1. Liberia’s Afrobarometer, Rounds 4-9 (geocoded version available upon request from Afrobarometer) 2. ESMAP files from the World Bank, and 3. Liberia’s shape files from HDX.

The code is structure according to the order that tables and figures appear in the thesis. If you have any questions, please do not hesitant to send an email. Thank you.

Code
#| output: false
#| message: false
#| warning: false

rm(list = ls()) 

# set up
pacman::p_load(dplyr, haven, tidyverse, ggplot2, kableExtra, readxl, janitor, tools, purrr, sf, classInt, tmap, patchwork, scales, gtsummary, webshot2, patchwork, cowplot, lubridate, gt, kableExtra, margins, broom, modelsummary, fixest, marginaleffects, broom.mixed, lme4, tibble, DiagrammeR, DiagrammeRsvg, rsvg, flextable, officer, ggspatial, rnaturalearth, ggrepel, viridis)

# helper functions 

# extract_round function 

extract_round <- function(filename) {
  str_extract(filename, regex("r[0-9]+", ignore_case = TRUE)) %>%
    str_to_lower()
}

makeVlist <- function(x) {
  labels <- sapply(x, function(col) {
    lbl <- attr(col, "label")
    if (is.null(lbl)) NA_character_ else as.character(lbl)
  })
  tibble(name = names(labels), label = labels)
}


# record_missing function  
recode_missing <- function(x, extra = NULL) {
  miss_vals <- c(-1, 98, 99, 998, 999)
  if (!is.null(extra)) miss_vals <- c(miss_vals, extra)
  ifelse(x %in% miss_vals, NA_real_, x)
}



# read data 

data_dir <- "Data"
files    <- list.files(data_dir, pattern = "\\.(sav|xlsx?)$", full.names = TRUE)

excel_files <- files[file_ext(files) %in% c("xls", "xlsx")]
sav_files   <- files[file_ext(files) == "sav"]

excel_list <- map(excel_files, ~ {
  read_excel(.x, col_types = "text") %>%   # ← force everything to character
    clean_names() %>%
    mutate(source_file = basename(.x),
           round       = extract_round(basename(.x)))
})

sav_list <- map(sav_files, ~ {
  haven::read_sav(.x) %>%
    clean_names() %>%
    mutate(source_file = basename(.x),
           round       = extract_round(basename(.x)))
})


# extract labels 

Labels <- map_dfr(sav_list, makeVlist) %>%
  distinct(name, .keep_all = TRUE) %>%
  filter(!is.na(label))

# standardize 

afro_list <- c(excel_list, sav_list) %>%
  lapply(function(x) {
    x[] <- lapply(x, as.character)
    x
  })

# combine
afro_combined <- bind_rows(afro_list)



# code book function 

make_codebook <- function(df) {
  map_dfr(names(df), function(var) {
    col <- df[[var]]
    
    var_label   <- attr(col, "label")       
    value_labels <- attr(col, "labels")   
    
    if (!is.null(value_labels)) {
      tibble(
        variable    = var,
        question    = if (is.null(var_label)) NA_character_ else as.character(var_label),
        value       = unname(value_labels),
        value_label = names(value_labels)
      )
    } else {
      tibble(
        variable    = var,
        question    = if (is.null(var_label)) NA_character_ else as.character(var_label),
        value       = NA_real_,
        value_label = NA_character_
      )
    }
  })
}

# build code book from all sav files and deduplicate
codebook <- map_dfr(sav_list, make_codebook) %>%
  distinct(variable, value, .keep_all = TRUE)


# cleaning labels 
coef_labels <- c(
  "lpi_catLow lived poverty" = "Low Lived Poverty",
  "lpi_catModerate lived poverty" = "Moderate Lived Poverty",
  "lpi_catHigh lived poverty" = "High Lived Poverty",
  "education_catMedium Education" = "Medium Education",
  "education_catHigh Education" = "High Education",
  "employment1" = "Employed",
  "sexFemale" = "Female",
  "urbanityRural" = "Rural (vs Urban)",
  "roundr5" = "Round 5",
  "roundr6" = "Round 6",
  "roundr7" = "Round 7",
  "roundr8" = "Round 8",
  "roundr9" = "Round 9"
)


# set chord 
UTM29N <- 32629


# set theme

theme_thesis <- function(base_size = 11) {
  theme_minimal(base_size = base_size) +
    theme(
      plot.title       = element_text(size = base_size + 2, face = "bold",
                                      margin = margin(b = 4)),
      plot.subtitle    = element_text(size = base_size - 0.5, colour = "grey40",
                                      margin = margin(b = 10)),
      plot.caption     = element_text(size = base_size - 2, colour = "grey50",
                                      hjust = 0, margin = margin(t = 8)),
      axis.title       = element_text(size = base_size - 0.5, colour = "grey30"),
      axis.text        = element_text(size = base_size - 1.5, colour = "grey40"),
      legend.title     = element_text(size = base_size - 0.5, face = "bold"),
      legend.text      = element_text(size = base_size - 1),
      panel.grid.major = element_line(colour = "grey92", linewidth = 0.4),
      panel.grid.minor = element_blank(),
      panel.border     = element_blank(),
      legend.position  = "bottom",
      legend.key.width = unit(1.2, "cm"),
      plot.margin      = margin(12, 12, 8, 12)
    )
}


# Colour palette  for consistent across all figures
COL_TREAT   <- "#1D6B8F"   # treated / near grid  — steel blue
COL_CONTROL <- "#B0B0B0"   # control / far         — grey
COL_POST    <- "#C14B28"   # post-treatment marker — rust
COL_ACCENT  <- "#E8A838"   # accent                — amber
COL_CONTROL <- "#B0B0B0"
COL_ALT <- "#C0392B"
COL_MAIN <- "#4C7EAF"
COL_POST <- "#C14B28"
COL_PRE <- "#4C7EAF"
COL_REF <- "grey40"
COL_ROB <- "#4CAF82"


#bring in grid files 
GRID_DIR <- "liberia_transmissiongridmapping"

lines    <- st_read(file.path(GRID_DIR, "Liberia_Powerline.shp"),          quiet = TRUE) |> st_transform(UTM29N)
towers   <- st_read(file.path(GRID_DIR, "Liberia_Powertower_withDEM.shp"), quiet = TRUE) |> st_transform(UTM29N)
plants   <- st_read(file.path(GRID_DIR, "Liberia_PowerPlant.shp"),         quiet = TRUE) |> st_transform(UTM29N)
stations <- st_read(file.path(GRID_DIR, "Liberia_Substation.shp"),         quiet = TRUE) |> st_transform(UTM29N)


# Filter to operational infrastructure only
# Drop 2 under-construction 225KV CLSG lines (regional interconnector)
lines_op <- lines |>
  filter(is.na(Comment) |
         !str_detect(Comment, regex("under construction", ignore_case = TRUE)))

stations_op <- stations |>
  filter(!str_detect(Business, regex("under construction", ignore_case = TRUE)))


# bring shape boundaries shape file 
liberia_adm1 <- st_read("Geo/liberia-county-boundary/", quiet = TRUE) |>
  st_transform(UTM29N)


# data prep and cleaning 

# filter by rounds 

round_4 <- afro_combined |> 
  filter(round=="r4")

round_5 <- afro_combined |> 
  filter(round=="r5")

round_6 <- afro_combined |> 
  filter(round=="r6")

round_7 <- afro_combined |> 
  filter(round=="r7")

round_8 <- afro_combined |> 
  filter(round=="r8")

round_9 <- afro_combined |> 
  filter(round=="r9")



# cleaning 

afro_combined <- afro_combined |> 
  mutate(latitude = coalesce(latitude, ea_gps_la)) |> 
  mutate(longitude = coalesce(longitude, ea_gps_lo)) |> 
  mutate(q94 = coalesce(q94, q97))


# export
# writexl::write_xlsx(afro_combined, "Clean/final_clean_data.xlsx")
# write_csv(afro_combined, "Clean/final_clean_data.csv")
# write_rds(afro_combined, "Clean/final_clean_data.rds")


# bring in clean data 

# dependent variable 
df <- afro_combined |> 
  mutate(
    electricity_access = ifelse(ea_svc_a ==1, 1, 0),
    urbanity = factor(urbrur, levels = 1:2, labels = c("Urban", "Rural")))


# education 
df <- df %>%
  mutate(
    round = as.character(round), 
    edu_src = case_when(
      round == "r4" ~ as.numeric(q89),
      round == "r5" ~ as.numeric(q97),
      round == "r6" ~ as.numeric(q97),
      round == "r7" ~ as.numeric(q97),
      round == "r8" ~ as.numeric(q97),
      round == "r9" ~ as.numeric(q94),
      TRUE       ~ NA_real_
    )
  )


# recode missings 
df <- df %>%
  mutate(education = recode_missing(edu_src)) 


# Build Categorical Education Variable 
df <- df |>
  mutate(
    education_cat = case_when(
      education %in% 0:3                  ~ 1,                 # low
      education %in% 4:6                  ~ 2,                 # medium
      education %in% 7:9                  ~ 3,                 # high
      TRUE                          ~ NA_real_
    ),
    education_cat = factor(
      education_cat,
      levels = c(1, 2, 3),
      labels = c("Low Education", "Medium Education", "High Education")
    )
  )



# employment 

df <- df %>%
  mutate(
    round = as.character(round), 
    employ = case_when(
      round == "r4" ~ as.numeric(q94),
      round == "r5" ~ as.numeric(q96),
      round == "r6" ~ as.numeric(q95),
      round == "r7" ~ as.numeric(q94),
      round == "r8" ~ as.numeric(q95a),
      round == "r9" ~ as.numeric(q93a),
      TRUE       ~ NA_real_
    )
  )

# recode missings and build employed variable 
df <- df |> 
  mutate(
    employment = recode_missing(employ), 
  
    employed = case_when(
    employment %in% c(2, 3, 4, 5) ~ 1L,  # employed (any)
    employment %in% c(0, 1)        ~ 0L,  # not employed
    TRUE ~ NA_integer_
    )
)




# asset ownership 

# TV 
df <- df |> 
  mutate(
    round = as.character(round), 
    tv = case_when(
      round == "r4" ~ as.numeric(q92b),
      round == "r5" ~ as.numeric(q90b),
      round == "r6" ~ as.numeric(q91b),
      round == "r7" ~ as.numeric(q89b),
      round == "r8" ~ as.numeric(q92b),
      round == "r9" ~ as.numeric(q90b),
      TRUE       ~ NA_real_
    )
  )



# recode missings and build TV variable 
df <- df |> 
  mutate(
    tv = recode_missing(tv, extra = c(9)), 
  
    tv_own = case_when(
    tv %in% c(1, 2) ~ 1L,  # own TV 
    tv %in% c(0)        ~ 0L,  # No TV
    TRUE ~ NA_integer_
    )
)



# gender  

# TV 
df <- df |> 
  mutate(
    round = as.character(round), 
    gender = case_when(
      round == "r4" ~ as.numeric(q101),
      round == "r5" ~ as.numeric(q101),
      round == "r6" ~ as.numeric(q101),
      round == "r7" ~ as.numeric(q101),
      round == "r8" ~ as.numeric(q101),
      round == "r9" ~ as.numeric(q100),
      TRUE       ~ NA_real_
    )
  )



# recode missings and build gender variable 
df <- df |>
  mutate(
    gender = recode_missing(gender),
    sex = case_when(
      gender == 1 ~ "Male",
      gender == 2 ~ "Female",
      TRUE        ~ NA_character_
    )
  )


# build county variable 
df <- df |>
  mutate(
    county = case_when(
      region == 380 ~ "Bomi",
      region == 381 ~ "Bong",
      region == 382 ~ "Gbarpolu",
      region == 383 ~ "Grand Bassa",
      region == 384 ~ "Grand Cape Mount",
      region == 385 ~ "Grand Gedeh",
      region == 386 ~ "Grand Kru",
      region == 387 ~ "Lofa",
      region == 388 ~ "Margibi",
      region == 389 ~ "Maryland",
      region == 390 ~ "Montserrado",
      region == 391 ~ "Nimba",
      region == 392 ~ "Rivercess",
      region == 393 ~ "River Gee",
      region == 394 ~ "Sinoe",
      TRUE         ~ NA_character_
    )
  )




# poverty

## food 

df <- df |> 
  mutate(
    round = as.character(round), 
    food = case_when(
      round == "r4" ~ as.numeric(q8a),
      round == "r5" ~ as.numeric(q8a),
      round == "r6" ~ as.numeric(q8a),
      round == "r7" ~ as.numeric(q8a),
      round == "r8" ~ as.numeric(q7a),
      round == "r9" ~ as.numeric(q6a),
      TRUE       ~ NA_real_
    )
  )



## water 

df <- df |> 
  mutate(
    round = as.character(round), 
    water = case_when(
      round == "r4" ~ as.numeric(q8b),
      round == "r5" ~ as.numeric(q8b),
      round == "r6" ~ as.numeric(q8b),
      round == "r7" ~ as.numeric(q8b),
      round == "r8" ~ as.numeric(q7b),
      round == "r9" ~ as.numeric(q6b),
      TRUE       ~ NA_real_
    )
  )


## medicine  

df <- df |> 
  mutate(
    round = as.character(round), 
    medicine = case_when(
      round == "r4" ~ as.numeric(q8c),
      round == "r5" ~ as.numeric(q8c),
      round == "r6" ~ as.numeric(q8c),
      round == "r7" ~ as.numeric(q8c),
      round == "r8" ~ as.numeric(q7c),
      round == "r9" ~ as.numeric(q6c),
      TRUE       ~ NA_real_
    )
  )


## fuel  

df <- df |> 
  mutate(
    round = as.character(round), 
    fuel = case_when(
      round == "r4" ~ as.numeric(q8d),
      round == "r5" ~ as.numeric(q8d),
      round == "r6" ~ as.numeric(q8d),
      round == "r7" ~ as.numeric(q8d),
      round == "r8" ~ as.numeric(q7d),
      round == "r9" ~ as.numeric(q6d),
      TRUE       ~ NA_real_
    )
  )

## cash  

df <- df |> 
  mutate(
    round = as.character(round), 
    cash = case_when(
      round == "r4" ~ as.numeric(q8e),
      round == "r5" ~ as.numeric(q8e),
      round == "r6" ~ as.numeric(q8e),
      round == "r7" ~ as.numeric(q8e),
      round == "r8" ~ as.numeric(q7e),
      round == "r9" ~ as.numeric(q6e),
      TRUE       ~ NA_real_
    )
  )



## build LPI 

build_lpi <- function(df) {
  df %>%
    mutate(
      lpi_food = recode_missing(food, extra = c(8, 9)),
      lpi_water = recode_missing(water, extra = c(8, 9)), 
      lpi_med = recode_missing(medicine, extra = c(8, 9)),
      lpi_fuel = recode_missing(fuel, extra = c(8, 9)),
      lpi_cash = recode_missing(cash, extra = c(8, 9)),
      
    # ── Count of valid (non-missing) items per respondent ──
      lpi_n_valid = rowSums(!is.na(cbind(
        lpi_food, lpi_water, lpi_med, lpi_fuel, lpi_cash
      ))),
      lpi = ifelse(
        lpi_n_valid == 5,
        rowMeans(cbind(lpi_food, lpi_water, lpi_med, lpi_fuel, lpi_cash),
                 na.rm = FALSE),
        NA_real_
      ),
      lpi_cat = case_when(
        is.na(lpi)       ~ NA_character_,
        lpi == 0         ~ "No lived poverty",
        lpi <= 1.0       ~ "Low lived poverty",
        lpi <= 2.0       ~ "Moderate lived poverty",
        lpi >  2.0       ~ "High lived poverty"
      ),

      lpi_cat = factor(lpi_cat, levels = c(
        "No lived poverty",
        "Low lived poverty",
        "Moderate lived poverty",
        "High lived poverty"
      )),
      lpi_high = case_when(
        is.na(lpi)  ~ NA_real_,
        lpi >= 2.20 ~ 1,
        TRUE        ~ 0
      ),

      freq_food  = if_else(lpi_food  >= 3, 1L, 0L, missing = NA_integer_),
      freq_water = if_else(lpi_water >= 3, 1L, 0L, missing = NA_integer_),
      freq_med   = if_else(lpi_med   >= 3, 1L, 0L, missing = NA_integer_),
      freq_fuel  = if_else(lpi_fuel  >= 3, 1L, 0L, missing = NA_integer_),
      freq_cash  = if_else(lpi_cash  >= 3, 1L, 0L, missing = NA_integer_)
    )
}


# add to dataframe  
df <- df %>%
  rename_with(tolower) %>%
  build_lpi() 


## merge 
df <- df  %>%
  mutate(
    # Merge the two weight column names into one
    withinwt = coalesce(
      suppressWarnings(as.numeric(withinwt)),
      suppressWarnings(as.numeric(withinwt_hh))
    ),
    wt_imputed = is.na(withinwt),
    withinwt   = if_else(is.na(withinwt) | withinwt <= 0, 1, withinwt)
  )

# verify 
df %>%
  group_by(round) %>%
  summarise(
    wt_class    = class(withinwt)[1],
    wt_min      = min(withinwt),
    wt_max      = max(withinwt),
    pct_imputed = mean(wt_imputed) * 100,
    .groups     = "drop"
  ) %>%
  kable(
    digits    = 2,
    caption   = "Round Weights Verification",
    col.names = c("Round", "Weight Class", "Min Weight", "Max Weight", "% Imputed")
  ) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE)
Round Weights Verification
Round Weight Class Min Weight Max Weight % Imputed
r4 numeric 0.09 3.96 0
r5 numeric 0.13 3.47 0
r6 numeric 0.06 3.51 0
r7 numeric 0.14 3.03 0
r8 numeric 0.08 2.41 0
r9 numeric 0.04 5.37 0
Code
# Validation of LPI

df %>%
  group_by(round) %>%
  summarise(
    n              = n(),
    lpi_mean       = weighted.mean(lpi, w = withinwt, na.rm = TRUE),
    pct_high_lpi   = weighted.mean(lpi_high, w = withinwt, na.rm = TRUE) * 100,
    pct_no         = weighted.mean(lpi_cat == "No lived poverty",
                                   w = withinwt, na.rm = TRUE) * 100,
    pct_low        = weighted.mean(lpi_cat == "Low lived poverty",
                                   w = withinwt, na.rm = TRUE) * 100,
    pct_moderate   = weighted.mean(lpi_cat == "Moderate lived poverty",
                                   w = withinwt, na.rm = TRUE) * 100,
    pct_high_cat   = weighted.mean(lpi_cat == "High lived poverty",
                                   w = withinwt, na.rm = TRUE) * 100,
    freq_food_pct  = weighted.mean(freq_food,  w = withinwt, na.rm = TRUE) * 100,
    freq_water_pct = weighted.mean(freq_water, w = withinwt, na.rm = TRUE) * 100,
    freq_med_pct   = weighted.mean(freq_med,   w = withinwt, na.rm = TRUE) * 100,
    freq_fuel_pct  = weighted.mean(freq_fuel,  w = withinwt, na.rm = TRUE) * 100,
    freq_cash_pct  = weighted.mean(freq_cash,  w = withinwt, na.rm = TRUE) * 100,
    .groups = "drop"
  ) %>%
  kable(
    digits    = 2,
    caption   = "LPI Category Distribution and Deprivation Dimensions by Round",
    col.names = c(
      "Round", "N", "Mean LPI", "% High LPI",
      "% No Poverty", "% Low", "% Moderate", "% High",
      "% Food", "% Water", "% Medical", "% Fuel", "% Cash"
    )
  ) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  add_header_above(c(
    " " = 4,
    "LPI Category (%)" = 4,
    "Frequent Shortage by Dimension (%)" = 5
  )) %>%
  row_spec(0, bold = TRUE)
LPI Category Distribution and Deprivation Dimensions by Round
LPI Category (%)
Frequent Shortage by Dimension (%)
Round N Mean LPI % High LPI % No Poverty % Low % Moderate % High % Food % Water % Medical % Fuel % Cash
r4 1200 1.24 13.57 11.36 35.34 39.73 13.57 18.46 18.44 23.66 11.81 31.53
r5 1199 1.46 26.36 9.13 29.88 34.63 26.36 20.70 21.87 29.55 13.45 49.03
r6 1199 1.72 34.89 9.39 20.66 35.06 34.89 22.64 27.65 28.52 21.15 47.99
r7 1200 1.31 17.99 9.79 33.87 38.35 17.99 16.39 20.19 24.66 11.79 37.17
r8 1200 1.38 18.90 5.31 37.51 38.28 18.90 18.14 18.30 20.61 16.47 24.31
r9 1200 1.47 23.67 7.54 29.51 39.28 23.67 21.52 17.18 33.76 13.65 32.29
Code
# summary stats of LPI
lpi_summary <- df %>%
  group_by(round) %>%
  summarise(
    n            = n(),
    lpi_mean     = weighted.mean(lpi,      w = withinwt, na.rm = TRUE),
    pct_high_lpi = weighted.mean(lpi_high, w = withinwt, na.rm = TRUE) * 100,
    .groups = "drop"
  ) %>%
  mutate(
    lpi_change      = lpi_mean     - lag(lpi_mean),
    high_lpi_change = pct_high_lpi - lag(pct_high_lpi)
  )

# print LPI Summary 
lpi_summary %>%
  kable(
    digits  = 3,
    caption = "LPI Summary Statistics by Afrobarometer Round",
    col.names = c("Round", "N", "Mean LPI", "% High LPI", "Δ Mean LPI", "Δ % High LPI")
  ) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE)
LPI Summary Statistics by Afrobarometer Round
Round N Mean LPI % High LPI Δ Mean LPI Δ % High LPI
r4 1200 1.238 13.569 NA NA
r5 1199 1.461 26.361 0.223 12.791
r6 1199 1.716 34.888 0.255 8.527
r7 1200 1.311 17.989 -0.405 -16.899
r8 1200 1.375 18.901 0.064 0.912
r9 1200 1.474 23.666 0.098 4.765
Code
# Filter to operational infrastructure only
# Drop 2 under-construction 225KV CLSG lines (regional interconnector)
lines_op <- lines |>
  filter(is.na(Comment) |
         !str_detect(Comment, regex("under construction", ignore_case = TRUE)))

stations_op <- stations |>
  filter(!str_detect(Business, regex("under construction", ignore_case = TRUE)))

Figure 1: Conceptual Framework

Code
p <- grViz("
digraph conceptual_framework {
  graph [layout = dot, rankdir = TB]
  node [shape = box, style = filled, color = lightgrey]
  A [label = 'Household Socioeconomic Factors\n(Lived Poverty, Education, Employment)']
  B [label = 'Place-Based Factors\n(Urban/Rural, Proximity to Grid Infrastructure)']
  C [label = 'Electricity Access']
  D [label = 'Spatial Inequality\n(Bright vs Dark Zones)']
  E [label = 'Variance Decomposition\n(Within vs Between, ICC)']
  A -> C
  B -> C
  C -> D
  D -> E
}
")

p
Table 1: Observations per Round of Afrobarometer Data
Round 4 Round 5 Round 6 Round 7 Round 8 Round 9
Overall Sample 1,200 1,199 1,199 1,200 1,200 1,200

Table 2: Descriptive Statistics: Accesss to Electricity in Liberia

Code
df_descptive <- df %>%
  filter(
    !is.na(electricity_access),
    !is.na(employed),
    !is.na(lpi_cat),
    !is.na(education_cat)) %>%
  mutate(
    round_label = case_when(
      round == "r4" ~ "Round 4",
      round == "r5" ~ "Round 5",
      round == "r6" ~ "Round 6",
      round == "r7" ~ "Round 7",
      round == "r8" ~ "Round 8",
      round == "r9" ~ "Round 9",
      TRUE          ~ as.character(round)
    ),
    electricity_access = case_when(
      electricity_access == 1 ~ "Has electricity access",
      electricity_access == 0 ~ "Does not have electricity access"
    ),
    employed = case_when(
      employed == 1 ~ "Employed",
      employed == 0 ~ "Not employed"
    )
  )


# helper to compute n (%) per variable
summarise_cat <- function(df, var, label) {
  df %>%
    count(level = as.character(.data[[var]])) %>%
    mutate(
      variable = label,
      stat     = paste0(n, " (", round(n / sum(n) * 100, 1), "%)")
    ) %>%
    select(variable, level, stat)
}

vars <- list(
  electricity_access = "Electricity Access",
  lpi_cat            = "Lived Poverty Index",
  education_cat      = "Education Level",
  employed           = "Employment Status",
  sex                = "Gender",
  urbanity           = "Urbanity",
  county             = "county",
  round_label        = "Survey Round"
)

tbl_desc <- map_dfr(names(vars), ~ summarise_cat(df_descptive, .x, vars[[.x]])) %>%
  rename(
    "Characteristic" = variable,
    " "              = level,
    "N (%)"          = stat
  )

tbl_desc %>%
  kable(
    caption = "Table 2: Descriptive Statistics: Access to Electricity in Liberia",
    align   = c("l", "l", "r")
  ) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE) %>%
  pack_rows(index = table(fct_inorder(tbl_desc$Characteristic))) %>%
  kableExtra::footnote(
    general       = "N = 7,013. N (%) reported for all categorical variables. Source: Afrobarometer rounds 4–9, Liberia. Author's own calculations.",
    general_title = "Notes:"
  )
Table 2: Descriptive Statistics: Access to Electricity in Liberia
Characteristic N (%)
Electricity Access
Electricity Access Does not have electricity access 4913 (70.1%)
Electricity Access Has electricity access 2100 (29.9%)
Lived Poverty Index
Lived Poverty Index High lived poverty 1591 (22.7%)
Lived Poverty Index Low lived poverty 2185 (31.2%)
Lived Poverty Index Moderate lived poverty 2662 (38%)
Lived Poverty Index No lived poverty 575 (8.2%)
Education Level
Education Level High Education 936 (13.3%)
Education Level Low Education 3253 (46.4%)
Education Level Medium Education 2824 (40.3%)
Employment Status
Employment Status Employed 1537 (21.9%)
Employment Status Not employed 5476 (78.1%)
Gender
Gender Female 3504 (50%)
Gender Male 3509 (50%)
Urbanity
Urbanity Rural 3622 (51.6%)
Urbanity Urban 3391 (48.4%)
county
county Bomi 228 (3.3%)
county Bong 569 (8.1%)
county Gbarpolu 252 (3.6%)
county Grand Bassa 423 (6%)
county Grand Cape Mount 217 (3.1%)
county Grand Gedeh 283 (4%)
county Grand Kru 95 (1.4%)
county Lofa 546 (7.8%)
county Margibi 422 (6%)
county Maryland 255 (3.6%)
county Montserrado 2359 (33.6%)
county Nimba 893 (12.7%)
county River Gee 143 (2%)
county Rivercess 136 (1.9%)
county Sinoe 192 (2.7%)
Survey Round
Survey Round Round 4 1169 (16.7%)
Survey Round Round 5 1106 (15.8%)
Survey Round Round 6 1162 (16.6%)
Survey Round Round 7 1189 (17%)
Survey Round Round 8 1192 (17%)
Survey Round Round 9 1195 (17%)
Notes:
N = 7,013. N (%) reported for all categorical variables. Source: Afrobarometer rounds 4–9, Liberia. Author's own calculations.
Code
# export 

# as a png 
# tbl_desc %>%
#   kable(
#     caption = "Table 2: Descriptive Statistics: Access to Electricity in Liberia",
#     align   = c("l", "l", "r")
#   ) %>%
#   kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
#   row_spec(0, bold = TRUE) %>%
#   pack_rows(index = table(fct_inorder(tbl_desc$Characteristic))) %>%
#   footnote(
#     general       = "N = 7,013. N (%) reported for all categorical variables. Source: Afrobarometer rounds 4–9, Liberia. Author's own calculations.",
#     general_title = "Notes:"
#   ) %>%
#   save_kable("table2_descriptive.png", zoom = 2)  
# 
# as a table for docx 
# tbl_desc %>%
#   flextable() %>%
#   set_caption("Table 2: Descriptive Statistics: Access to Electricity in Liberia") %>%
#   bold(part = "header") %>%
#   set_table_properties(width = 1, layout = "autofit") %>%
#   footnote(
#     i = 1, j = 1,
#     value = as_paragraph("Notes: N = 7,013. N (%) reported for all categorical variables. Source: Afrobarometer rounds 4–9, Liberia. Author's own calculations."),
#     ref_symbols = " ",
#     part = "header"
#   ) %>%
#   save_as_docx(path = "table2_descriptive.docx")

Table 3: Key Sample Statistics by Survey Round

Code
time_table <- df_descptive |>
  group_by(round, round_label) |>
  summarise(
    n            = n(),
    pct_access   = round(100 * mean(electricity_access == "Has electricity access", na.rm = TRUE), 1),
    pct_urban    = round(100 * mean(urbanity == "Urban", na.rm = TRUE), 1),
    mean_lpi     = round(mean(as.numeric(lpi), na.rm = TRUE), 2),
    pct_employed = round(100 * mean(employed == "Employed", na.rm = TRUE), 1),
    .groups = "drop"
  ) |>
  arrange(round) |>
  select(round_label, n, pct_urban, pct_employed, mean_lpi, pct_access) |>
  rename(
    "Round" = round_label,
    "N"            = n,
    "% Urban"      = pct_urban,
    "% Employed"   = pct_employed,
    "Mean LPI"     = mean_lpi,
    "% Electricity access"     = pct_access
  )

time_table %>%
  kable(
    caption = "Table 3: Key Sample Statistics by Survey Round",
    align   = c("l", "r", "r", "r", "r", "r")
  ) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE) %>%
  kableExtra::footnote(
    general       = "Source: Afrobarometer rounds 4–9, Liberia. Author's own calculations.",
    general_title = "Notes:"
  )
Table 3: Key Sample Statistics by Survey Round
Round N % Urban % Employed Mean LPI % Electricity access
Round 4 1169 47.7 18.4 1.27 9.2
Round 5 1106 49.0 27.2 1.48 19.3
Round 6 1162 48.5 27.6 1.74 32.4
Round 7 1189 47.9 16.0 1.30 31.4
Round 8 1192 48.7 21.8 1.38 40.0
Round 9 1195 48.2 20.9 1.47 46.2
Notes:
Source: Afrobarometer rounds 4–9, Liberia. Author's own calculations.

Figure 2: Access to Electricity in Liberia by Urbanity, 2008-2021

Code
fig_time_urban <- df_descptive |>
  mutate(
    elec_num   = as.numeric(electricity_access == "Has electricity access"),
    round_year = case_when(
      round == "r4" ~ 2008,
      round == "r5" ~ 2012,
      round == "r6" ~ 2014,
      round == "r7" ~ 2017,
      round == "r8" ~ 2019,
      round == "r9" ~ 2021
    )
  ) |>
  group_by(round_year, urbanity) |>
  summarise(
    elec_rate = mean(elec_num, na.rm = TRUE),
    se        = sd(elec_num, na.rm = TRUE) / sqrt(sum(!is.na(elec_num))),
    .groups   = "drop"
  ) |>
  filter(!is.na(urbanity)) |>
  ggplot(aes(x = round_year, y = elec_rate,
             colour = urbanity, group = urbanity)) +
  geom_ribbon(aes(ymin = elec_rate - 1.96 * se,
                  ymax = elec_rate + 1.96 * se,
                  fill = urbanity),
              alpha = 0.12, colour = NA) +
  geom_line(linewidth = 1.0) +
  geom_point(size = 3, shape = 21,
             aes(fill = urbanity), colour = "white", stroke = 1.5) +
  geom_text(
    aes(label = percent(elec_rate, accuracy = 1)),
    vjust       = -1.2,
    size        = 2.5,
    fontface    = "bold",
    show.legend = FALSE
  ) +
  geom_vline(xintercept = 2016.5, linetype = "dashed",
             colour = COL_POST, linewidth = 0.6) +
  annotate("text", x = 2017, y = 0.02,
           label = "Mount Coffee\nonline", hjust = 0,
           size = 2.7, colour = COL_POST) +
  scale_colour_manual(values = c("Urban" = COL_TREAT, "Rural" = COL_CONTROL)) +
  scale_fill_manual(  values = c("Urban" = COL_TREAT, "Rural" = COL_CONTROL)) +
  scale_y_continuous(labels = percent_format(accuracy = 1), limits = c(0, 1)) +
  scale_x_continuous(breaks = c(2008, 2012, 2014, 2017, 2019, 2021)) +
  coord_cartesian(ylim = c(0, 1)) +
  labs(
    title    = "Access to Electricity in Liberia by Urbanity, 2008–2021",
    subtitle = "Urban-rural gap widens significantly after Mount Coffee rehabilitation in 2016",
    x        = NULL, y = "Share with electricity access",
    colour   = NULL, fill = NULL
  ) +
  theme_thesis()

print(fig_time_urban)

Table 4: Electricity Access by county (Pooled 2008-2021, descending order by access rate)

Code
# Investigating county Access 

county_table <- df_descptive |>
  group_by(county) |>
  summarise(
    n            = n(),
    n_access     = sum(electricity_access == "Has electricity access", na.rm = TRUE),
    pct_access   = round(100 * mean(electricity_access == "Has electricity access", na.rm = TRUE), 1),
    mean_lpi     = round(mean(as.numeric(lpi), na.rm = TRUE), 2),
    pct_urban    = round(100 * mean(urbanity == "Urban", na.rm = TRUE), 1),
    .groups = "drop"
  ) |>
  arrange(desc(pct_access)) |>
  rename(
    county               = county,
    N        = n,
    "(N) with access"   = n_access,
    "% with access" = pct_access,
    "Mean LPI"           = mean_lpi,
    "% Urban"            = pct_urban
  )

county_table %>%
  kable(
    caption = "Table 4: Electricity Access by county (Pooled 2008-2021, descending order by access rate)",
    align   = c("l", "r", "r", "r", "r", "r")
  ) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE) %>%
  kableExtra::footnote(
    general       = "Source: Afrobarometer rounds 4–9, Liberia. Author's own calculations.",
    general_title = "Notes:"
  )
Table 4: Electricity Access by county (Pooled 2008-2021, descending order by access rate)
county N (N) with access % with access Mean LPI % Urban
Montserrado 2359 1369 58.0 1.23 91.9
Grand Gedeh 283 110 38.9 1.41 33.9
Maryland 255 87 34.1 1.33 34.5
Margibi 422 133 31.5 1.61 44.3
Grand Bassa 423 79 18.7 1.66 25.3
Nimba 893 133 14.9 1.55 20.7
Bomi 228 32 14.0 1.66 28.1
Gbarpolu 252 32 12.7 1.51 18.7
Lofa 546 69 12.6 1.52 33.3
Sinoe 192 16 8.3 1.50 24.0
River Gee 143 8 5.6 1.53 28.0
Bong 569 24 4.2 1.62 29.0
Grand Cape Mount 217 8 3.7 1.46 3.7
Grand Kru 95 0 0.0 1.32 0.0
Rivercess 136 0 0.0 1.64 5.1
Notes:
Source: Afrobarometer rounds 4–9, Liberia. Author's own calculations.
Code
#export

# as a table for docx 

# county_table %>%
#   flextable() %>%
#   set_caption("Electricity Access by county (Pooled 2008-2021, descending order by access rate") %>%
#   bold(part = "header") %>%
#   set_table_properties(width = 1, layout = "autofit") %>%
#   footnote(
#     i = 1, j = 1,
#     value = as_paragraph("Source: Afrobarometer rounds 4–9, Liberia. Author's own calculations."),
#     ref_symbols = " ",
#     part = "header"
#   ) %>%
#   save_as_docx(path = "table4_descriptive.docx")

Figure 3: LPI Distribution by Electricity Access Status in Liberia

Code
fig_lpi <- df_descptive |>
  filter(!is.na(lpi_cat), !is.na(electricity_access)) |>
  mutate(
    access_label = if_else(electricity_access == "Has electricity access",
                           "Has access to electricity", "No access to electricity"),
    lpi_cat = factor(lpi_cat,
                     levels = c("No lived poverty", "Low lived poverty",
                                "Moderate lived poverty", "High lived poverty"))
  ) |>
  count(access_label, lpi_cat) |>
  group_by(access_label) |>
  mutate(pct = n / sum(n)) |>
  ungroup() |>
  ggplot(aes(x = lpi_cat, y = pct, fill = access_label)) +
  geom_col(position = "dodge", width = 0.7, alpha = 0.85) +
  geom_text(
    aes(label = percent(pct, accuracy = 0.1)),
    position    = position_dodge(width = 0.7),
    vjust       = -0.5,
    size        = 2.8,
    fontface    = "bold",
    show.legend = FALSE
  ) +
  scale_fill_manual(values = c("Has access to electricity" = COL_TREAT,
                               "No access to electricity"  = COL_CONTROL)) +
  scale_y_continuous(labels = percent_format(accuracy = 1)) +
  coord_cartesian(ylim = c(0, 0.5)) +
  labs(
    title    = "LPI Distribution by Electricity Access Status in Liberia",
    subtitle = "Households without electricity are disproportionately in high-poverty categories",
    x        = "LPI Category",
    y        = "Share of respondents",
    fill     = NULL
  ) +
  theme_thesis()

print(fig_lpi)

Figure 4: Electricity access by poverty level and education

Code
fig_edu_lpi <- df_descptive |>
  filter(!is.na(lpi_cat), !is.na(education_cat)) |>
  mutate(
    lpi_cat = factor(lpi_cat,
                     levels = c("No lived poverty", "Low lived poverty",
                                "Moderate lived poverty", "High lived poverty")),
    education_cat = factor(education_cat,
                           levels = c("Low Education", "Medium Education", "High Education"))
  ) |>
  group_by(lpi_cat, education_cat) |>
  summarise(
    elec_rate = mean(electricity_access == "Has electricity access", na.rm = TRUE),
    .groups   = "drop"
  ) |>
  ggplot(aes(x = lpi_cat, y = elec_rate, fill = education_cat)) +
  geom_col(position = "dodge", width = 0.75, alpha = 0.85) +
  geom_text(
    aes(label = percent(elec_rate, accuracy = 0.1)),
    position    = position_dodge(width = 0.75),
    vjust       = -0.5,
    size        = 2.8,
    fontface    = "bold",
    show.legend = FALSE
  ) +
  scale_fill_manual(values = c(
    "Low Education"    = "#B0C4D8",
    "Medium Education" = "#457B9D",
    "High Education"   = COL_TREAT
  )) +
  scale_y_continuous(labels = percent_format(accuracy = 1)) +
  coord_cartesian(ylim = c(0, 1)) +
  labs(
    title    = "Electricity Access by Poverty and Education Levels in Liberia",
    subtitle = "Higher education mitigates but does not eliminate the poverty-access gradient",
    x        = "LPI Category",
    y        = "Share with access to electricity",
    fill     = "Education level"
  ) +
  theme_thesis() +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5))

print(fig_edu_lpi)

Figure 5: Electricity access rate by county and survey round

Code
fig_heat <- df_descptive |>
  mutate(
    elec_num   = as.numeric(electricity_access == "Has electricity access"),
    round_year = case_when(
      round == "r4" ~ 2008,
      round == "r5" ~ 2012,
      round == "r6" ~ 2014,
      round == "r7" ~ 2017,
      round == "r8" ~ 2019,
      round == "r9" ~ 2021
    )
  ) |>
  group_by(county, round_year) |>
  summarise(elec_rate = mean(elec_num, na.rm = TRUE), .groups = "drop") |>
  mutate(county = fct_reorder(county, elec_rate, .fun = mean)) |>
  ggplot(aes(x = factor(round_year), y = county, fill = elec_rate)) +
  geom_tile(colour = "white", linewidth = 0.5) +
  geom_text(aes(label = paste0(round(elec_rate * 100), "%")),
            size = 2.5, colour = "white", fontface = "bold") +
  scale_fill_gradient(
    low    = "#EAF3FB",
    high   = COL_TREAT,
    name   = "Access rate",
    labels = percent_format(accuracy = 1)
  ) +
  scale_x_discrete(labels = c("2008\n(Round 4)", "2012\n(Round 5)", "2014\n(Round 6)",
                               "2017\n(Round 7)", "2019\n(Round 8)", "2021\n(Round 9)")) +
  labs(
    title    = "Electricity Access Rate by county and Survey Round",
    subtitle = "Sharp post-2016 increase in Montserrado; minimal changes elsewhere",
    x        = NULL, y = NULL
  ) +
  theme_thesis() +
  theme(
    panel.grid       = element_blank(),
    axis.text.y      = element_text(size = 8),
    legend.key.width = unit(1.5, "cm")
  )

print(fig_heat)

Access Maps per Round

Code
# bring in spatial data 
spatial <- df 

# recode missings 
spatial <- spatial |> 
  mutate(
    ea_svc_a = recode_missing(ea_svc_a, extra = c(9))
  )

# filter NAs 
spatial <- spatial |> 
  filter(!is.na(latitude)) |> 
  filter(!is.na(longitude))

#set coords 
spatial_sf <- st_as_sf(
  spatial,
  coords = c("longitude", "latitude"),
  crs = 4326
)

# transform
liberia_adm1 <- suppressMessages(st_transform(liberia_adm1, st_crs(spatial_sf)))


# round 4 

#load data 
spatial_r4 <- spatial |> 
  filter(round=="r4")


#set coords 
spatial_sf_r4 <- st_as_sf(
  spatial_r4,
  coords = c("longitude", "latitude"),
  crs = 4326
)


# transform
liberia_adm1_r4 <- suppressMessages(
  st_transform(liberia_adm1, st_crs(spatial_sf_r4))
) 


# Aggregate data to counties 
county_access_agg_r4 <- spatial_sf_r4 %>%
  st_join(liberia_adm1_r4) %>%
  st_drop_geometry() %>% 
  group_by(region) %>%
  summarise(
    access_rate = mean(ea_svc_a == 1, na.rm = TRUE),
    n = n(),
    .groups = "drop"
  )


# data prep 
liberia_access_r4 <- liberia_adm1_r4 %>%
  mutate(
    region = case_when(
      county == "Bomi"           ~ 380,
      county == "Bong"           ~ 381,
      county == "Gbarpolu"       ~ 382,
      county == "Grand Bassa"    ~ 383,
      county == "Grand Cape Mount" ~ 384,
      county == "Grand Gedeh"    ~ 385,
      county == "Grand Kru"      ~ 386,
      county == "Lofa"           ~ 387,
      county == "Margibi"        ~ 388,
      county == "Maryland"       ~ 389,
      county == "Montserrado"    ~ 390,
      county == "Nimba"          ~ 391,
      county == "Rivercess"      ~ 392,
      county == "River Gee"      ~ 393,
      county == "Sinoe"          ~ 394,
      TRUE ~ NA_real_
    )
  )

# Join to polygons and map
liberia_access_r4 <- liberia_access_r4 |> 
  mutate(region = as.character(region)) |> 
  left_join(county_access_agg_r4, by = "region")

aggregate_map_r4 <- ggplot() +
  geom_sf(data = liberia_access_r4, aes(fill = access_rate), color = "black", alpha = 0.8) +
  scale_fill_viridis_c(
    name = "Electricity Access Rate",
    labels = scales::percent,
    option = "plasma",  
    na.value = "grey90"
  ) +
  labs(title = "Electricity Access by county in Liberia (Round 4)") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    legend.position = "right"
  )

plot(aggregate_map_r4)
Code
# round 5 

#load data 
spatial_r5 <- spatial |> 
  filter(round=="r5")


#set coords 
spatial_sf_r5 <- st_as_sf(
  spatial_r5,
  coords = c("longitude", "latitude"),
  crs = 4326
)


# transform
liberia_adm1_r5 <- suppressMessages(
  st_transform(liberia_adm1, st_crs(spatial_sf_r5))
) 


# Aggregate data to counties (keep only attributes)
county_access_agg_r5 <- spatial_sf_r5 %>%
  st_join(liberia_adm1_r5) %>%
  st_drop_geometry() %>%  
  group_by(region) %>%
  summarise(
    access_rate = mean(ea_svc_a == 1, na.rm = TRUE),
    n = n(),
    .groups = "drop"
  )


# data prep 
liberia_access_r5 <- liberia_adm1_r5 %>%
  mutate(
    region = case_when(
      county == "Bomi"           ~ 380,
      county == "Bong"           ~ 381,
      county == "Gbarpolu"       ~ 382,
      county == "Grand Bassa"    ~ 383,
      county == "Grand Cape Mount" ~ 384,
      county == "Grand Gedeh"    ~ 385,
      county == "Grand Kru"      ~ 386,
      county == "Lofa"           ~ 387,
      county == "Margibi"        ~ 388,
      county == "Maryland"       ~ 389,
      county == "Montserrado"    ~ 390,
      county == "Nimba"          ~ 391,
      county == "Rivercess"      ~ 392,
      county == "River Gee"      ~ 393,
      county == "Sinoe"          ~ 394,
      TRUE ~ NA_real_
    )
  )

# Join to polygons and map
liberia_access_r5 <- liberia_access_r5 |> 
  mutate(region = as.character(region)) |> 
  left_join(county_access_agg_r5, by = "region")

aggregate_map_r5 <- ggplot() +
  geom_sf(data = liberia_access_r5, aes(fill = access_rate), color = "black", alpha = 0.8) +
  scale_fill_viridis_c(
    name = "Electricity Access Rate",
    labels = scales::percent,
    option = "plasma",  
    na.value = "grey90"
  ) +
  labs(title = "Electricity Access by county in Liberia (Round 5)") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    legend.position = "right"
  )

plot(aggregate_map_r5)
Code
# round 6 

#load data 
spatial_r6 <- spatial |> 
  filter(round=="r6")


#set coords 
spatial_sf_r6 <- st_as_sf(
  spatial_r6,
  coords = c("longitude", "latitude"),
  crs = 4326
)


# transform
liberia_adm1_r6 <- suppressMessages(
  st_transform(liberia_adm1, st_crs(spatial_sf_r6))
) 


# Aggregate data to counties (keep only attributes)
county_access_agg_r6 <- spatial_sf_r6 %>%
  st_join(liberia_adm1_r6) %>%
  st_drop_geometry() %>%  
  group_by(region) %>%
  summarise(
    access_rate = mean(ea_svc_a == 1, na.rm = TRUE),
    n = n(),
    .groups = "drop"
  )


# data prep 
liberia_access_r6 <- liberia_adm1_r6 %>%
  mutate(
    region = case_when(
      county == "Bomi"           ~ 380,
      county == "Bong"           ~ 381,
      county == "Gbarpolu"       ~ 382,
      county == "Grand Bassa"    ~ 383,
      county == "Grand Cape Mount" ~ 384,
      county == "Grand Gedeh"    ~ 385,
      county == "Grand Kru"      ~ 386,
      county == "Lofa"           ~ 387,
      county == "Margibi"        ~ 388,
      county == "Maryland"       ~ 389,
      county == "Montserrado"    ~ 390,
      county == "Nimba"          ~ 391,
      county == "Rivercess"      ~ 392,
      county == "River Gee"      ~ 393,
      county == "Sinoe"          ~ 394,
      TRUE ~ NA_real_
    )
  )

# Join to polygons and map
liberia_access_r6 <- liberia_access_r6 |> 
  mutate(region = as.character(region)) |> 
  left_join(county_access_agg_r6, by = "region")

aggregate_map_r6 <- ggplot() +
  geom_sf(data = liberia_access_r6, aes(fill = access_rate), color = "black", alpha = 0.8) +
  scale_fill_viridis_c(
    name = "Electricity Access Rate",
    labels = scales::percent,
    option = "plasma",  
    na.value = "grey90"
  ) +
  labs(title = "Electricity Access by county in Liberia (Round 6)") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    legend.position = "right"
  )

plot(aggregate_map_r6)
Code
# round 7 

#load data 
spatial_r7 <- spatial |> 
  filter(round=="r7")


#set coords 
spatial_sf_r7 <- st_as_sf(
  spatial_r7,
  coords = c("longitude", "latitude"),
  crs = 4326
)


# transform
liberia_adm1_r7 <- suppressMessages(
  st_transform(liberia_adm1, st_crs(spatial_sf_r7)) 
)


# Aggregate data to counties (keep only attributes)
county_access_agg_r7 <- spatial_sf_r7 %>%
  st_join(liberia_adm1_r7) %>%
  st_drop_geometry() %>%  
  group_by(region) %>%
  summarise(
    access_rate = mean(ea_svc_a == 1, na.rm = TRUE),
    n = n(),
    .groups = "drop"
  )


# data prep 
liberia_access_r7 <- liberia_adm1_r7 %>%
  mutate(
    region = case_when(
      county == "Bomi"           ~ 380,
      county == "Bong"           ~ 381,
      county == "Gbarpolu"       ~ 382,
      county == "Grand Bassa"    ~ 383,
      county == "Grand Cape Mount" ~ 384,
      county == "Grand Gedeh"    ~ 385,
      county == "Grand Kru"      ~ 386,
      county == "Lofa"           ~ 387,
      county == "Margibi"        ~ 388,
      county == "Maryland"       ~ 389,
      county == "Montserrado"    ~ 390,
      county == "Nimba"          ~ 391,
      county == "Rivercess"      ~ 392,
      county == "River Gee"      ~ 393,
      county == "Sinoe"          ~ 394,
      TRUE ~ NA_real_
    )
  )

# Join to polygons and map
liberia_access_r7 <- liberia_access_r7 |> 
  mutate(region = as.character(region)) |> 
  left_join(county_access_agg_r7, by = "region")

aggregate_map_r7 <- ggplot() +
  geom_sf(data = liberia_access_r7, aes(fill = access_rate), color = "black", alpha = 0.8) +
  scale_fill_viridis_c(
    name = "Electricity Access Rate",
    labels = scales::percent,
    option = "plasma",  
    na.value = "grey90"
  ) +
  labs(title = "Electricity Access by county in Liberia (Round 7)") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    legend.position = "right"
  )

plot(aggregate_map_r7)
Code
# round 8 

#load data 
spatial_r8 <- spatial |> 
  filter(round=="r8")


#set coords 
spatial_sf_r8 <- st_as_sf(
  spatial_r8,
  coords = c("longitude", "latitude"),
  crs = 4326
)


# transform
liberia_adm1_r8 <- suppressMessages(
 st_transform(liberia_adm1, st_crs(spatial_sf_r8)) 
) 


# Aggregate data to counties (keep only attributes)
county_access_agg_r8 <- spatial_sf_r8 %>%
  st_join(liberia_adm1_r8) %>%
  st_drop_geometry() %>%  
  group_by(region) %>%
  summarise(
    access_rate = mean(ea_svc_a == 1, na.rm = TRUE),
    n = n(),
    .groups = "drop"
  )


# data prep 
liberia_access_r8 <- liberia_adm1_r8 %>%
  mutate(
    region = case_when(
      county == "Bomi"           ~ 380,
      county == "Bong"           ~ 381,
      county == "Gbarpolu"       ~ 382,
      county == "Grand Bassa"    ~ 383,
      county == "Grand Cape Mount" ~ 384,
      county == "Grand Gedeh"    ~ 385,
      county == "Grand Kru"      ~ 386,
      county == "Lofa"           ~ 387,
      county == "Margibi"        ~ 388,
      county == "Maryland"       ~ 389,
      county == "Montserrado"    ~ 390,
      county == "Nimba"          ~ 391,
      county == "Rivercess"      ~ 392,
      county == "River Gee"      ~ 393,
      county == "Sinoe"          ~ 394,
      TRUE ~ NA_real_
    )
  )

# Join to polygons and map
liberia_access_r8 <- liberia_access_r8 |> 
  mutate(region = as.character(region)) |> 
  left_join(county_access_agg_r8, by = "region")

aggregate_map_r8 <- ggplot() +
  geom_sf(data = liberia_access_r8, aes(fill = access_rate), color = "black", alpha = 0.8) +
  scale_fill_viridis_c(
    name = "Electricity Access Rate",
    labels = scales::percent,
    option = "plasma",  
    na.value = "grey90"
  ) +
  labs(title = "Electricity Access by county in Liberia (Round 8)") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    legend.position = "right"
  )

plot(aggregate_map_r8)
Code
# round 9 

#load data 
spatial_r9 <- spatial |> 
  filter(round=="r9")


#set coords 
spatial_sf_r9 <- st_as_sf(
  spatial_r9,
  coords = c("longitude", "latitude"),
  crs = 4326
)


# transform
liberia_adm1_r9 <- suppressMessages(
  st_transform(liberia_adm1, st_crs(spatial_sf_r9))
) 


# Aggregate data to counties (keep only attributes)
county_access_agg_r9 <- spatial_sf_r9 %>%
  st_join(liberia_adm1_r9) %>%
  st_drop_geometry() %>%  
  group_by(region) %>%
  summarise(
    access_rate = mean(ea_svc_a == 1, na.rm = TRUE),
    n = n(),
    .groups = "drop"
  )


# data prep 
liberia_access_r9 <- liberia_adm1_r9 %>%
  mutate(
    region = case_when(
      county == "Bomi"           ~ 380,
      county == "Bong"           ~ 381,
      county == "Gbarpolu"       ~ 382,
      county == "Grand Bassa"    ~ 383,
      county == "Grand Cape Mount" ~ 384,
      county == "Grand Gedeh"    ~ 385,
      county == "Grand Kru"      ~ 386,
      county == "Lofa"           ~ 387,
      county == "Margibi"        ~ 388,
      county == "Maryland"       ~ 389,
      county == "Montserrado"    ~ 390,
      county == "Nimba"          ~ 391,
      county == "Rivercess"      ~ 392,
      county == "River Gee"      ~ 393,
      county == "Sinoe"          ~ 394,
      TRUE ~ NA_real_
    )
  )

# Join to polygons and map
liberia_access_r9 <- liberia_access_r9 |> 
  mutate(region = as.character(region)) |> 
  left_join(county_access_agg_r9, by = "region")

aggregate_map_r9 <- ggplot() +
  geom_sf(data = liberia_access_r9, aes(fill = access_rate), color = "black", alpha = 0.8) +
  scale_fill_viridis_c(
    name = "Electricity Access Rate",
    labels = scales::percent,
    option = "plasma",  
    na.value = "grey90"
  ) +
  labs(title = "Electricity Access by county in Liberia (Round 9)") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    legend.position = "right"
  )

plot(aggregate_map_r9)

Figure 6: Electricity Access by county in Liberia (2008-2021)

Code
# Pre-compute county centroids 
county_centroids <- suppressWarnings(st_centroid(liberia_adm1)) |>
  mutate(
    X = st_coordinates(geometry)[, 1],
    Y = st_coordinates(geometry)[, 2]
  ) |>
  st_drop_geometry()

# inset locator map 
africa <- ne_countries(continent = "Africa", returnclass = "sf")

inset_map <- ggplot(africa) +
  geom_sf(fill = "grey85", color = "white", linewidth = 0.2) +
  geom_sf(data = liberia_adm1, fill = "orange", color = NA) +
  theme_void() +
  theme(
    panel.border = element_rect(color = "grey40", fill = NA, linewidth = 0.5)
  )

# Shared scale limits 
access_limits <- c(0, 1)

# helpful function to build map
make_access_map <- function(data, round_label, show_legend = FALSE) {
  ggplot() +
    geom_sf(data = data, aes(fill = access_rate), color = "white", linewidth = 0.3) +
    scale_fill_viridis_c(
      name     = "Access Rate",
      labels   = scales::percent,
      option   = "plasma",
      limits   = access_limits,
      na.value = "grey90"
    ) +
    # county labels on every panel
    geom_text_repel(
      data          = county_centroids,
      aes(x = X, y = Y, label = county),
      size          = 1.6,
      color         = "white",
      fontface      = "bold",
      segment.size  = 0.2,
      segment.color = "grey60",
      max.overlaps  = 20,
      inherit.aes   = FALSE
    ) +
    labs(title = round_label) +
    theme_void() +
    theme(
      plot.title      = element_text(hjust = 0.5, size = 13, margin = margin(b = 5)),
      legend.position = if (show_legend) "right" else "none"
    )
}

# Build individual maps
aggregate_map_r4 <- make_access_map(liberia_access_r4, "Round 4")
aggregate_map_r5 <- make_access_map(liberia_access_r5, "Round 5")
aggregate_map_r6 <- make_access_map(liberia_access_r6, "Round 6", show_legend = TRUE)
aggregate_map_r7 <- make_access_map(liberia_access_r7, "Round 7")
aggregate_map_r8 <- make_access_map(liberia_access_r8, "Round 8")

# r9 carries scale bar, north arrow, and inset 
aggregate_map_r9 <- make_access_map(liberia_access_r9, "Round 9", show_legend = TRUE) +
  annotation_scale(
    location   = "bl",
    width_hint = 0.3,
    style      = "ticks",
    text_cex   = 0.6
  ) +
  annotation_north_arrow(
    location    = "tr",
    which_north = "true",
    style       = north_arrow_minimal(),
    height      = unit(0.8, "cm"),
    width       = unit(0.8, "cm")
  )

# Overlay inset on r9
aggregate_map_r9 <- aggregate_map_r9 +
  inset_element(inset_map, left = 0, bottom = 0.05, right = 0.32, top = 0.37)

# combine map
new_combined_map <- (aggregate_map_r4 | aggregate_map_r5 | aggregate_map_r6) /
                (aggregate_map_r7 | aggregate_map_r8 | aggregate_map_r9) +
  plot_annotation(
    title    = "Electricity Access by county in Liberia",
    subtitle = "Shows access rate for each county spaning rounds 4–9",
    theme = theme(
      plot.title    = element_text(hjust = 0.5, size = 16, face = "bold"),
      plot.subtitle = element_text(hjust = 0.5, size = 12, color = "black"),
      plot.margin   = margin(10, 10, 10, 10)
    )
  )

plot(new_combined_map)

Figure 7: Bright Zones and Dark Zones of Electricity Access in Liberia

Code
# Aggregate access rates
county_access_all_rounds <- spatial |>
  filter(round %in% c("r4", "r5", "r6", "r7", "r8", "r9")) |>
  st_as_sf(coords = c("longitude", "latitude"), crs = 4326) |>
  st_join(st_transform(liberia_adm1, 4326)) |>
  st_drop_geometry() |>
  rename(county = county.x) |>          # <-- fix column name conflict
  group_by(county, round) |>
  summarise(
    access_rate = mean(ea_svc_a == 1, na.rm = TRUE),
    n           = n(),
    .groups     = "drop"
  )

# Global breakpoints 
global_breaks <- classIntervals(
  county_access_all_rounds$access_rate,
  n     = 2,
  style = "equal"
)

# Apply zones 
county_access_zones <- county_access_all_rounds |>
  mutate(
    zone = cut(
      access_rate,
      global_breaks$brks,
      labels         = c("Dark Zone", "Bright Zone"),
      include.lowest = TRUE
    )
  ) |>
  filter(!is.na(county))

# Attach region codes 
liberia_adm1_coded <- liberia_adm1 |>
  mutate(
    region = case_when(
      county == "Bomi"             ~ "380",
      county == "Bong"             ~ "381",
      county == "Gbarpolu"         ~ "382",
      county == "Grand Bassa"      ~ "383",
      county == "Grand Cape Mount" ~ "384",
      county == "Grand Gedeh"      ~ "385",
      county == "Grand Kru"        ~ "386",
      county == "Lofa"             ~ "387",
      county == "Margibi"          ~ "388",
      county == "Maryland"         ~ "389",
      county == "Montserrado"      ~ "390",
      county == "Nimba"            ~ "391",
      county == "Rivercess"        ~ "392",
      county == "River Gee"        ~ "393",
      county == "Sinoe"            ~ "394",
      TRUE ~ NA_character_
    )
  )

# County population (2022 Liberia Census) 
liberia_population <- tibble(
  county = c("Bomi", "Bong", "Gbarpolu", "Grand Bassa", "Grand Cape Mount",
             "Grand Gedeh", "Grand Kru", "Lofa", "Margibi", "Maryland",
             "Montserrado", "Nimba", "Rivercess", "River Gee", "Sinoe"),
  population = c(133705, 467561, 95995, 293689, 178867,
                 216692, 109342, 367376, 304946, 172587,
                 1920965, 621841, 90819, 124653, 151149)
)

# County centroids with population 
county_centroids_pop <- suppressWarnings(st_centroid(liberia_adm1_coded)) |>
  mutate(
    X = st_coordinates(geometry)[, 1],
    Y = st_coordinates(geometry)[, 2]
  ) |>
  st_drop_geometry() |>
  left_join(liberia_population, by = "county")

# Build inset locator map 
africa <- ne_countries(continent = "Africa", returnclass = "sf")

inset_map <- ggplot(africa) +
  geom_sf(fill = "grey85", color = "white", linewidth = 0.2) +
  geom_sf(data = liberia_adm1, fill = "orange", color = NA) +
  theme_void() +
  theme(
    panel.border = element_rect(color = "grey40", fill = NA, linewidth = 0.5)
  )

# Join zones to polygons 
liberia_zones_rounds <- county_access_zones |>
  left_join(liberia_adm1_coded, by = "county") |>
  st_as_sf()

# Map building function 
make_zone_map <- function(data, round_label, show_legend = FALSE) {
  ggplot(data) +
    geom_sf(aes(fill = zone), color = "white", linewidth = 0.3) +
    scale_fill_manual(
      values       = c("Dark Zone" = "black", "Bright Zone" = "orange"),
      name         = "Electricity Access Zone",
      na.value     = "grey90",
      na.translate = FALSE,
      drop         = FALSE
    ) +
    geom_point(
      data        = county_centroids_pop,
      aes(x = X, y = Y, size = population),
      shape       = 21,
      fill        = "white",
      color       = "grey30",
      alpha       = 0.4,
      inherit.aes = FALSE
    ) +
    scale_size_area(
      name     = "Population",
      max_size = 10,
      labels   = scales::comma,
      breaks   = c(100000, 500000, 1000000, 1500000)
    ) +
    geom_text_repel(
      data          = county_centroids_pop,
      aes(x = X, y = Y, label = county),
      size          = 1.6,
      color         = "white",
      fontface      = "bold",
      segment.size  = 0.2,
      segment.color = "grey60",
      max.overlaps  = 20,
      inherit.aes   = FALSE
    ) +
    labs(title = round_label) +
    theme_void() +
    theme(
      plot.title      = element_text(hjust = 0.5, size = 11, face = "bold"),
      legend.title    = element_text(size = 10, face = "bold"),
      legend.text     = element_text(size = 9),
      legend.position = if (show_legend) "right" else "none"
    )
}

# Build panels 
zone_r4 <- make_zone_map(filter(liberia_zones_rounds, round == "r4"), "Round 4")
zone_r5 <- make_zone_map(filter(liberia_zones_rounds, round == "r5"), "Round 5")
zone_r6 <- make_zone_map(filter(liberia_zones_rounds, round == "r6"), "Round 6")
zone_r7 <- make_zone_map(filter(liberia_zones_rounds, round == "r7"), "Round 7")
zone_r8 <- make_zone_map(filter(liberia_zones_rounds, round == "r8"), "Round 8")

zone_r9 <- make_zone_map(
    filter(liberia_zones_rounds, round == "r9"), "Round 9", show_legend = TRUE
  ) +
  annotation_scale(
    location   = "bl",
    width_hint = 0.3,
    style      = "ticks",
    text_cex   = 0.6
  ) +
  annotation_north_arrow(
    location    = "tr",
    which_north = "true",
    style       = north_arrow_minimal(),
    height      = unit(0.8, "cm"),
    width       = unit(0.8, "cm")
  )

zone_r9 <- zone_r9 +
  inset_element(inset_map, left = 0, bottom = 0.05, right = 0.32, top = 0.37)

# Combine 
final_zone_plot <- (zone_r4 | zone_r5 | zone_r6) /
                   (zone_r7 | zone_r8 | zone_r9) +
  plot_annotation(
    title    = "Bright Zones and Dark Zones of Electricity Access in Liberia",
    subtitle = "Shows zone type and the population of each county",
    theme = theme(
      plot.title    = element_text(hjust = 0.5, size = 15, face = "bold"),
      plot.subtitle = element_text(hjust = 0.5, size = 11, color = "black"),
      plot.margin   = margin(10, 10, 10, 10)
    )
  )

plot(final_zone_plot)

Table 5: Determinants of Household Electricity Access in Liberia

Code
# data 
df_clean <- df |> 
  mutate(
    electricity_access = as.numeric(electricity_access),
    sex        = factor(sex, levels = c("Male", "Female")),
    employed = factor(employed, levels = c(0, 1), labels = c("Not employed", "Employed"))
  )

# coefficient labels
coef_labels <- c(
  "lpi_catLow lived poverty"      = "Low Lived Poverty",
  "lpi_catModerate lived poverty" = "Moderate Lived Poverty",
  "lpi_catHigh lived poverty"     = "High Lived Poverty",
  "education_catMedium Education" = "Medium Education",
  "education_catHigh Education"   = "High Education",
  "employedEmployed"            = "Employed",
  "sexFemale"                       = "Female",
  "urbanityRural"                 = "Rural (vs Urban)",
  "roundr5"                       = "Round 5",
  "roundr6"                       = "Round 6",
  "roundr7"                       = "Round 7",
  "roundr8"                       = "Round 8",
  "roundr9"                       = "Round 9"
)

# model 1: SES only
model_1 <- glmer(
  electricity_access ~ 
    lpi_cat + 
    education_cat + 
    employed + 
    sex +
    (1 | county),
  data   = df_clean,
  family = binomial(link = "logit")
)

# model 2: SES + geography
model_2 <- glmer(
  electricity_access ~ 
    lpi_cat + 
    education_cat + 
    employed + 
    sex +
    urbanity +
    (1 | county),
  data   = df_clean,
  family = binomial(link = "logit")
)

# model 3: full model
model_3 <- glmer(
  electricity_access ~ 
    lpi_cat + 
    education_cat + 
    employed + 
    sex +
    urbanity +
    round +
    (1 | county),
  data   = df_clean,
  family = binomial(link = "logit")
)

# ICC
get_icc <- function(model) {
  var_county <- as.numeric(VarCorr(model)$county[1])
  icc        <- var_county / (var_county + (pi^2 / 3))
  return(round(icc, 2))
}

icc_1 <- get_icc(model_1)
icc_2 <- get_icc(model_2)
icc_3 <- get_icc(model_3)

# table
modelsummary(
  list(
    "SES Only"             = model_1,
    "SES + Geography"      = model_2,
    "Full Model"           = model_3
  ),
  exponentiate = TRUE,
  fmt          = 2,
  coef_map     = coef_labels,
  statistic    = "({conf.low}, {conf.high})",
  stars        = c("*" = .1, "**" = .05, "***" = .01),
  gof_map      = data.frame(
    raw   = c("nobs", "AIC", "BIC"),
    clean = c("Observations", "AIC", "BIC"),
    fmt   = c(0, 1, 1)
  ),
  add_rows = data.frame(
    term             = "ICC (County)",
    "SES Only"       = icc_1,
    "SES + Geography" = icc_2,
    "Full Model"     = icc_3
  ),
  title = "Determinants of Household Electricity Access in Liberia",
  notes = "Odds ratios reported. 95% confidence intervals in parentheses. Reference categories: No lived poverty, Low Education, Not employed, Female, Urban, Round 4. Source: Afrobarometer rounds 4–9, Liberia; authors' calculations.")
Determinants of Household Electricity Access in Liberia
SES Only SES + Geography Full Model
* p < 0.1, ** p < 0.05, *** p < 0.01
Odds ratios reported. 95% confidence intervals in parentheses. Reference categories: No lived poverty, Low Education, Not employed, Female, Urban, Round 4. Source: Afrobarometer rounds 4–9, Liberia; authors' calculations.
Low Lived Poverty 0.87 0.88 0.68***
(0.71, 1.08) (0.70, 1.10) (0.53, 0.88)
Moderate Lived Poverty 0.73*** 0.76** 0.58***
(0.59, 0.90) (0.61, 0.95) (0.45, 0.75)
High Lived Poverty 0.66*** 0.75** 0.57***
(0.52, 0.83) (0.59, 0.95) (0.43, 0.75)
Medium Education 1.71*** 1.40*** 1.48***
(1.49, 1.95) (1.21, 1.62) (1.26, 1.74)
High Education 2.66*** 1.91*** 1.53***
(2.21, 3.22) (1.57, 2.33) (1.23, 1.90)
Employed 1.06 0.96 0.92
(0.92, 1.22) (0.82, 1.11) (0.78, 1.09)
Female 1.15** 1.08 1.06
(1.02, 1.30) (0.95, 1.23) (0.92, 1.22)
Rural (vs Urban) 0.10*** 0.07***
(0.09, 0.12) (0.06, 0.09)
Round 5 2.92***
(2.22, 3.86)
Round 6 8.50***
(6.47, 11.17)
Round 7 7.51***
(5.75, 9.82)
Round 8 14.63***
(11.09, 19.29)
Round 9 25.99***
(19.58, 34.51)
Observations 7013 7013 7013
ICC (County) 0.38 0.26 0.32

Figure 8: Coefficient Plot for Robustness Check on Multilevel Logistic Regression Model

Code
# coefficient plots for both robustness models 


# Coefficient labels 
coef_labels_glmer <- c(
  "lpi_catLow lived poverty"      = "LPI: Low",
  "lpi_catModerate lived poverty" = "LPI: Moderate",
  "lpi_catHigh lived poverty"     = "LPI: High",
  "education_catMedium Education" = "Education: Medium",
  "education_catHigh Education"   = "Education: High",
  "employment1"                   = "Employed",
  "sexFemale"                     = "Female",
  "urbanityRural"                 = "Rural",
  "roundr5"                       = "Round 5",
  "roundr6"                       = "Round 6",
  "roundr7"                       = "Round 7",
  "roundr8"                       = "Round 8",
  "roundr9"                       = "Round 9"
)


# model 1
model_fe_logit <- suppressMessages(
  feglm(
  electricity_access ~ lpi_cat + education_cat + employment + sex + urbanity |
    county + round,
  data   = df_clean,
  family = binomial(link = "logit"), 
  cluster = ~county
))


# model 2 
df_no_capital <- df_clean %>%
  filter(county != "Montserrado")

model_no_capital <- suppressMessages(
  glmer(
  electricity_access ~ 
    lpi_cat + education_cat + employment + sex + urbanity + round +
    (1 | county),
  data = df_no_capital,
  family = binomial(link = "logit")
)) 


# combine model and make plot 
model_list_rob <- list(
  "Fixed Effect"          = model_fe_logit,
  "Excluding Montserrado" = model_no_capital
)

# colors
model_colours_rob <- c(
  "Fixed Effect"          = "#E05C5C",
  "Excluding Montserrado" = "#4C7EAF"
)

model_shapes_rob <- c(
  "Fixed Effect"          = 16,
  "Excluding Montserrado" = 17
)

# extra plot data 
pd_rob <- modelplot(
  model_list_rob,
  coef_map     = coef_labels_glmer, 
  exponentiate = TRUE,
  draw         = FALSE
)

# Add significance & factor ordering 
pd_rob <- pd_rob |>
  mutate(
    sig = case_when(
      p.value < 0.01 ~ "p < 0.01",
      p.value < 0.05 ~ "p < 0.05",
      p.value < 0.10 ~ "p < 0.10",
      TRUE           ~ "Not significant"
    ),
    sig   = factor(sig, levels = c("p < 0.01", "p < 0.05", "p < 0.10", "Not significant")),
    term  = factor(term, levels = rev(unique(term))),
    model = factor(model, levels = names(model_list_rob))
  )

# Build plot 
p_rob <- ggplot(pd_rob, aes(x = estimate, y = term,
                             xmin = conf.low, xmax = conf.high,
                             colour = model, shape = model,
                             alpha  = sig)) +
  geom_vline(xintercept = 1, linetype = "dashed", colour = "grey40") +
  geom_pointrange(position = position_dodge(width = 0.5), size = 0.55) +
  scale_colour_manual(values = model_colours_rob) +
  scale_shape_manual(values  = model_shapes_rob) +
  scale_alpha_manual(
    values = c(
      "p < 0.01"        = 1.00,
      "p < 0.05"        = 1.00,
      "p < 0.10"        = 0.55,
      "Not significant" = 0.20
    ),
    name = "Significance"
  ) +
  scale_x_log10() +
  labs(
    title   = "Robustness Check: Determinants of Household Electricity Access in Liberia",
    x       = "Odds Ratio",
    y       = NULL,
    colour  = "Model",
    shape   = "Model") +
  theme_bw(base_size = 12) +
  theme(
    legend.position  = "right",
    panel.grid.minor = element_blank(),
    axis.text.y      = element_text(hjust = 1),
    plot.title       = element_text(face = "bold", size = 13)
  )

p_rob

Code
# load clean data 
ab <- df_clean

ab <- ab |>
  mutate(
    # Round: was coded "r4"..."r9" — strip prefix
    round = as.integer(str_remove(round, "^r")),

    # Survey years
    year = case_when(
      round == 4 ~ 2008,
      round == 5 ~ 2012,
      round == 6 ~ 2014,
      round == 7 ~ 2017,
      round == 8 ~ 2019,
      round == 9 ~ 2021
    ),

    # Post-treatment: Mt. Coffee fully online by end of 2016
    post_2016 = as.integer(year >= 2017),

    # County names from region codes
    county = case_when(
      region == "380" ~ "Bomi",
      region == "381" ~ "Bong",
      region == "382" ~ "Gbarpolu",
      region == "383" ~ "Grand Bassa",
      region == "384" ~ "Grand Cape Mount",
      region == "385" ~ "Grand Gedeh",
      region == "386" ~ "Grand Kru",
      region == "387" ~ "Lofa",
      region == "388" ~ "Margibi",
      region == "389" ~ "Maryland",
      region == "390" ~ "Montserrado",
      region == "391" ~ "Nimba",
      region == "392" ~ "River Cess",
      region == "393" ~ "River Gee",
      region == "394" ~ "Sinoe",
      TRUE            ~ NA_character_
    ),

    # Electricity: ea_svc_a — observer-coded EA-level grid presence
    # 0 = absent, 1 = present, 9 = missing → NA
    electricity = case_when(
      ea_svc_a == "0" ~ 0L,
      ea_svc_a == "1" ~ 1L,
      TRUE            ~ NA_integer_
    ),

    # LPI: already harmonised across rounds
    lpi = as.numeric(lpi),

    # Coordinates: stored as character → numeric
    latitude  = as.numeric(latitude),
    longitude = as.numeric(longitude),

    # Urban: urbrur 1 = urban, 2 = rural → recode to binary
    urban = as.integer(urbrur == "1"),

    # Bounding box filter: Liberia lon -11.6 to -7.4, lat 4.3 to 8.6
    coords_ok = between(longitude, -12, -7) & between(latitude, 4, 9)
  )

# Audit
cat("\nCoordinate audit:\n");  ab |> count(coords_ok)    |> print()
cat("Electricity coding:\n"); ab |> count(electricity)   |> print()
cat("Round × year:\n");       ab |> count(round, year)   |> print()

# convert to sf, reproject, compute distances 
ab_sf <- ab |>
  filter(coords_ok, !is.na(longitude), !is.na(latitude)) |>
  st_as_sf(coords = c("longitude", "latitude"), crs = 4326) |>
  st_transform(UTM29N)

cat("\nCoordinate range after reproject (metres):\n")
cat("Easting: ",  range(st_coordinates(ab_sf)[, 1]), "\n")
cat("Northing:", range(st_coordinates(ab_sf)[, 2]), "\n")

cat("\nComputing distances — may take ~30s...\n")

dist_line <- st_distance(ab_sf, lines_op)     # n × 6 matrix
dist_mc   <- st_distance(ab_sf, plants)       # n × 1 (single plant)
dist_sub  <- st_distance(ab_sf, stations_op)  # n × 6 matrix

ab_sf <- ab_sf |>
  mutate(
    dist_line_m  = as.numeric(apply(dist_line, 1, min)),
    dist_line_km = dist_line_m / 1000,
    dist_mc_m    = as.numeric(dist_mc),
    dist_mc_km   = dist_mc_m / 1000,
    dist_sub_m   = as.numeric(apply(dist_sub, 1, min)),
    dist_sub_km  = dist_sub_m / 1000
  )

# Build analysis panel 
ab_panel <- ab_sf |>
  st_drop_geometry() |>
  mutate(
    treat_5km  = as.integer(dist_line_km <  5),
    treat_10km = as.integer(dist_line_km < 10),
    treat_20km = as.integer(dist_line_km < 20),

    did_5km  = treat_5km  * post_2016,
    did_10km = treat_10km * post_2016,
    did_20km = treat_20km * post_2016,

    # pmax() prevents log(0) when EA sits exactly on a line
    log_dist_line = log(pmax(dist_line_km, 0.01)),
    log_dist_mc   = log(pmax(dist_mc_km,   0.01)),
    log_dist_sub  = log(pmax(dist_sub_km,  0.01))
  )

# Diagnostics 
cat("\n=== Buffer coverage (unique EAs) ===\n")
ab_panel |>
  distinct(uniqueea, treat_5km, treat_10km, treat_20km) |>
  summarise(
    n_ea     = n(),
    n_5km    = sum(treat_5km,  na.rm = TRUE),
    n_10km   = sum(treat_10km, na.rm = TRUE),
    n_20km   = sum(treat_20km, na.rm = TRUE),
    pct_5km  = round(100 * n_5km  / n_ea, 1),
    pct_10km = round(100 * n_10km / n_ea, 1),
    pct_20km = round(100 * n_20km / n_ea, 1)
  ) |> print()

cat("\nCounties in 10km treatment buffer:\n")
ab_panel |>
  filter(treat_10km == 1) |>
  distinct(uniqueea, county) |>
  count(county, sort = TRUE) |> print()

# Confirm repeated cross-section structure (drives FE choice)
cat("\nEA appearances across rounds:\n")
ab_panel |>
  group_by(uniqueea) |>
  summarise(n_rounds = n_distinct(round), .groups = "drop") |>
  count(n_rounds) |> print()
# → ~390 EAs appear once; EA FEs are not feasible → use county + round FEs

Table 7: DiD and IV Effect on Electricity Access

Code
# main results table 

main_rows <- tribble(
  ~term,       ~`(1) Elec. 5km`, ~`(2) Elec. 10km`,
               ~`(3) Elec. 20km`, ~`(4) LPI`, ~`(5) LPI IV`,
  "County FE", "Yes", "Yes", "Yes", "Yes", "Yes",
  "Round FE",  "Yes", "Yes", "Yes", "Yes", "Yes"
)

modelsummary(
  list(
    "5km"  = did_elec_5,
    "10km" = did_elec_10,
    "20km" = did_elec_20,
    "LPI"        = did_lpi_10,
    "LPI IV"     = iv_single
  ),
  stars    = c("*" = 0.1, "**" = 0.05, "***" = 0.01),
  coef_map = c(
    "did_5km"         = "Treated × Post (5km)",
    "did_10km"        = "Treated × Post (10km)",
    "did_20km"        = "Treated × Post (20km)",
    "fit_electricity" = "Electricity Access (IV)",
    "urban"           = "Urban"
  ),
  gof_map = tribble(
    ~raw,         ~clean,            ~fmt,
    "nobs",       "Observations",    0,
    "r.squared",  "R²",              3,
    "ivf.stat",   "First-stage F",   1
  ),
  add_rows = main_rows,
  notes    = paste(
    "County-clustered SEs in parentheses.",
    "Treated = households in EAs centroid within Xkm of nearest operational 22/66KV line.",
    "Post = Afrobarometer rounds 7-9 (2017-2021).",
    "IV instrument: log(distance to nearest operational line).",
    "Source: Afrobarometer rounds 4–9, Liberia; authors' calculations."))
5km 10km 20km LPI LPI IV
* p < 0.1, ** p < 0.05, *** p < 0.01
County-clustered SEs in parentheses. Treated = households in EAs centroid within Xkm of nearest operational 22/66KV line. Post = Afrobarometer rounds 7-9 (2017-2021). IV instrument: log(distance to nearest operational line). Source: Afrobarometer rounds 4–9, Liberia; authors' calculations.
Treated × Post (5km) 0.533***
(0.043)
Treated × Post (10km) 0.440*** -0.244***
(0.040) (0.065)
Treated × Post (20km) 0.349***
(0.106)
Electricity Access (IV) -0.621
(0.622)
Urban 0.325*** 0.329*** 0.331*** -0.202*** 0.009
(0.041) (0.054) (0.058) (0.027) (0.205)
Observations 7126 7126 7126 7019 6988
0.467 0.444 0.425 0.071 0.066
County FE Yes Yes Yes Yes Yes
Round FE Yes Yes Yes Yes Yes

Appendix D: Coefficient Plot for Multilevel Regression Analysis

Code
# coefficient plots with stars 


# Model list 
model_list <- list(
  "SES Only"        = model_1,
  "SES + Geography" = model_2,
  "Full Model"      = model_3
)

# Coefficient labels 
coef_labels_glmer <- c(
  "lpi_catLow lived poverty"      = "LPI: Low",
  "lpi_catModerate lived poverty" = "LPI: Moderate",
  "lpi_catHigh lived poverty"     = "LPI: High",
  "education_catMedium Education" = "Education: Medium",
  "education_catHigh Education"   = "Education: High",
  "employment1"                   = "Employed",
  "sexFemale"                     = "Female",
  "urbanityRural"                 = "Rural",
  "roundr5"                       = "Round 5",
  "roundr6"                       = "Round 6",
  "roundr7"                       = "Round 7",
  "roundr8"                       = "Round 8",
  "roundr9"                       = "Round 9"
)

# Colour + shape scales
model_colours <- c(
  "SES Only"        = "#E05C5C",
  "SES + Geography" = "#4CAF82",
  "Full Model"      = "#4C7EAF"
)

model_shapes <- c(
  "SES Only"        = 16,
  "SES + Geography" = 17,
  "Full Model"      = 15
)

# Extract plot data
pd <- modelplot(
  model_list,
  coef_map     = coef_labels_glmer,
  exponentiate = TRUE,
  draw         = FALSE
)

# Add significance & factor ordering directly 
pd <- pd |>
  mutate(
    sig = case_when(
      p.value < 0.01 ~ "p < 0.01",
      p.value < 0.05 ~ "p < 0.05",
      p.value < 0.10 ~ "p < 0.10",
      TRUE           ~ "Not significant"
    ),
    sig   = factor(sig, levels = c("p < 0.01", "p < 0.05", "p < 0.10", "Not significant")),
    term  = factor(term, levels = rev(unique(term))),
    model = factor(model, levels = names(model_list))
  )

# Build plot
p_glmer <- ggplot(pd, aes(x = estimate, y = term,
                           xmin = conf.low, xmax = conf.high,
                           colour = model, shape = model,
                           alpha  = sig)) +
  geom_vline(xintercept = 1, linetype = "dashed", colour = "grey40") +
  geom_pointrange(position = position_dodge(width = 0.5), size = 0.55) +
  scale_colour_manual(values = model_colours) +
  scale_shape_manual(values  = model_shapes) +
  scale_alpha_manual(
    values = c(
      "p < 0.01"        = 1.00,
      "p < 0.05"        = 1.00,
      "p < 0.10"        = 0.50,
      "Not significant" = 0.25
    ),
    name = "Significance"
  ) +
  scale_x_log10() +
  labs(
    title   = "Determinants of Household Electricity Access in Liberia",
    x       = "Odds Ratio",
    y       = NULL,
    colour  = "Model",
    shape   = "Model") +
  theme_bw(base_size = 12) +
  theme(
    legend.position  = "right",
    panel.grid.minor = element_blank(),
    axis.text.y      = element_text(hjust = 1),
    plot.title       = element_text(face = "bold", size = 13)
  )

p_glmer

Table 8: Robustness Check: Determinants of Electricity Access in Liberia

Code
# robustness check model 

modelsummary(
  list(
    "Fixed Effect" = model_fe_logit,
    "Excluding Montserrado" = model_no_capital
  ),
  
  exponentiate = TRUE,
  fmt = 2,
  
  coef_map = coef_labels,
  
  statistic = "({conf.low}, {conf.high})",
  
  stars = c('*' = .1, '**' = .05, '***' = .01),
  
  gof_map = data.frame(
  raw = c("nobs", "AIC", "BIC"),
  clean = c("Observations", "AIC", "BIC"),
  fmt = c(0, 1, 1)
),
  
  title = "Robustness Check: Determinants of Household Electricity Access in Liberia",
  
  notes = "Odds ratios reported. 95% confidence intervals in parentheses.")
Robustness Check: Determinants of Household Electricity Access in Liberia
Fixed Effect Excluding Montserrado
* p < 0.1, ** p < 0.05, *** p < 0.01
Odds ratios reported. 95% confidence intervals in parentheses.
Low Lived Poverty 0.69 0.54***
(0.44, 1.09) (0.37, 0.80)
Moderate Lived Poverty 0.59* 0.43***
(0.33, 1.06) (0.29, 0.62)
High Lived Poverty 0.57* 0.43***
(0.31, 1.07) (0.29, 0.64)
Medium Education 1.46** 1.34***
(1.02, 2.07) (1.09, 1.66)
High Education 1.45** 1.62**
(1.07, 1.95) (1.12, 2.33)
Female 1.10*** 1.13
(1.03, 1.16) (0.93, 1.37)
Rural (vs Urban) 0.08*** 0.11***
(0.03, 0.20) (0.09, 0.13)
Round 5 3.05***
(2.00, 4.65)
Round 6 10.44***
(6.96, 15.65)
Round 7 1.69**
(1.09, 2.64)
Round 8 6.41***
(4.25, 9.66)
Round 9 9.56***
(6.38, 14.32)
Observations 6790 4660

Appendix F: Coefficient Plot for DiD and IV Models

Code
# colors 
COL_MAIN <- "#4C7EAF"   # blue  — main / baseline specs
COL_ALT  <- "#C0392B"   # red   — alternative outcomes / IV
COL_ROB  <- "#4CAF82"   # green — robustness variants


# DID and IV Models 

# --- Model list ---
main_model_list <- list(
  "Elec. (5km)"  = did_elec_5,
  "Elec. (10km)" = did_elec_10,
  "Elec. (20km)" = did_elec_20,
  "LPI (10km)"   = did_lpi_10,
  "LPI (IV)"     = iv_single
)

# --- Coefficient labels ---
coef_labels_main <- c(
  "did_5km"         = "Treated × Post (5km)",
  "did_10km"        = "Treated × Post (10km)",
  "did_20km"        = "Treated × Post (20km)",
  "fit_electricity" = "Electricity Access (IV)",
  "urban"           = "Urban"
)

# --- Colour + shape by model ---
main_colours <- c(
  "Elec. (5km)"  = COL_MAIN,
  "Elec. (10km)" = COL_MAIN,
  "Elec. (20km)" = COL_MAIN,
  "LPI (10km)"   = COL_ALT,
  "LPI (IV)"     = COL_ALT
)

main_shapes <- c(
  "Elec. (5km)"  = 21,
  "Elec. (10km)" = 22,
  "Elec. (20km)" = 23,
  "LPI (10km)"   = 21,
  "LPI (IV)"     = 24
)

# --- Extract plot data ---
pd_main <- modelplot(
  main_model_list,
  coef_map = coef_labels_main,
  draw     = FALSE
) |>
  mutate(
    sig = case_when(
      p.value < 0.01 ~ "p < 0.01",
      p.value < 0.05 ~ "p < 0.05",
      p.value < 0.10 ~ "p < 0.10",
      TRUE           ~ "Not significant"
    ),
    sig   = factor(sig, levels = c("p < 0.01", "p < 0.05",
                                   "p < 0.10", "Not significant")),
    term  = factor(term, levels = rev(unique(term))),
    model = factor(model, levels = names(main_model_list))
  )

# --- Build plot ---
fig_main_coef <- ggplot(pd_main,
                        aes(x = estimate, y = term,
                            xmin = conf.low, xmax = conf.high,
                            colour = model, shape = model,
                            alpha  = sig)) +
  geom_vline(xintercept = 0, colour = "grey50",
             linewidth = 0.4, linetype = "dashed") +
  geom_linerange(position = position_dodge(width = 0.55),
                 linewidth = 0.6) +
  geom_point(aes(fill = model),
             position = position_dodge(width = 0.55),
             size = 3, stroke = 1.1) +
  scale_colour_manual(values = main_colours) +
  scale_fill_manual(  values = main_colours) +
  scale_shape_manual( values = main_shapes)  +
  scale_alpha_manual(
    values = c(
      "p < 0.01"        = 1.00,
      "p < 0.05"        = 1.00,
      "p < 0.10"        = 0.55,
      "Not significant" = 0.20
    ),
    name = "Significance"
  ) +
  labs(
    title    = "Main DiD and IV Results: Estimates of Treatment Effect",
    subtitle = "County + round FE throughout | County-clustered SE | 95% CIs shown",
    x        = "Coefficient estimate",
    y        = NULL,
    colour   = "Specification",
    shape    = "Specification",
    fill     = "Specification")+
  theme_bw(base_size = 11) +
  theme(
    panel.grid.major.x = element_line(colour = "grey90", linewidth = 0.3),
    panel.grid.major.y = element_blank(),
    panel.grid.minor   = element_blank(),
    panel.border       = element_rect(colour = "grey80", linewidth = 0.4),
    axis.ticks         = element_line(colour = "grey70", linewidth = 0.3),
    axis.text          = element_text(size = 8.5, colour = "grey20"),
    axis.title.x       = element_text(size = 9, colour = "grey20",
                                      margin = margin(t = 6)),
    plot.title         = element_text(size = 10.5, face = "bold",
                                      colour = "grey10", margin = margin(b = 3)),
    plot.subtitle      = element_text(size = 8.5, colour = "grey40",
                                      margin = margin(b = 8)),
    plot.caption       = element_text(size = 7.5, colour = "grey45",
                                      hjust = 0, margin = margin(t = 6)),
    legend.position    = "right",
    legend.text        = element_text(size = 8),
    legend.key.size    = unit(0.4, "cm"),
    plot.margin        = margin(10, 14, 8, 10)
  )


fig_main_coef

Table 9: Robustness of DiD Estimates

Code
# robustness check 

rob_rows <- tribble(
  ~term,                     ~`(1) Baseline`, ~`(2) Nearby cos.`,
                             ~`(3) Co. trends`, ~`(4) LPI DiD`, ~`(5) LPI IV`,
  "County FE",               "Yes", "Yes", "Yes", "Yes", "Yes",
  "Round FE",                "Yes", "Yes", "Yes", "Yes", "Yes",
  "County × round trends",   "No",  "No",  "Yes", "No",  "No",
  "Sample",                  "Full","Adjacent","Full","Full","Full"
)

modelsummary(
  list(
    "Baseline"    = did_elec_10,
    "Nearby Counties." = did_nearby,
    "County Trends"  = did_trends,
    "LPI DiD"     = did_lpi_10,
    "LPI IV"      = iv_single
  ),
  stars    = c("*" = 0.1, "**" = 0.05, "***" = 0.01),
  coef_map = c(
    "did_10km"        = "Treated × Post (10km)",
    "fit_electricity" = "Electricity Access (IV)",
    "urban"           = "Urban"
  ),
  gof_map = tribble(
    ~raw,        ~clean,          ~fmt,
    "nobs",      "Observations",  0,
    "r.squared", "R²",            3,
    "ivf.stat",  "First-stage F", 1
  ),
  add_rows = rob_rows,
  notes    = paste(
    "County-clustered SEs in parentheses.",
    "Col. (2) restricts to adjacent counties: Bomi, Margibi, Grand Bassa,",
    "Grand Cape Mount. Col. (3) adds county-specific linear time trends.",
    "IV Sargan-Hansen test p = 0.52 (instruments valid).",
    "LPI = Lived Poverty Index (higher = more deprived)."))
Baseline Nearby Counties. County Trends LPI DiD LPI IV
* p < 0.1, ** p < 0.05, *** p < 0.01
County-clustered SEs in parentheses. Col. (2) restricts to adjacent counties: Bomi, Margibi, Grand Bassa, Grand Cape Mount. Col. (3) adds county-specific linear time trends. IV Sargan-Hansen test p = 0.52 (instruments valid). LPI = Lived Poverty Index (higher = more deprived).
Treated × Post (10km) 0.440*** 0.547*** 0.295*** -0.244***
(0.040) (0.066) (0.064) (0.065)
Electricity Access (IV) -0.621
(0.622)
Urban 0.329*** 0.336** 0.335*** -0.202*** 0.009
(0.054) (0.120) (0.056) (0.027) (0.205)
Observations 7126 3703 7126 7019 6988
0.444 0.450 0.461 0.071 0.066
County FE Yes Yes Yes Yes Yes
Round FE Yes Yes Yes Yes Yes
County × round trends No No Yes No No
Sample Full Adjacent Full Full Full

Appendix H: Coefficient plot for robustness checks on DiD and IV

Code
# Robustness 

# models
rob_model_list <- list(
  "Baseline (10km)"       = did_elec_10,
  "Nearby counties"       = did_nearby,
  "County trends"         = did_trends,
  "LPI outcome"           = did_lpi_10,
  "LPI (IV)"              = iv_single
)

# Coefficient labels
coef_labels_rob <- c(
  "did_10km"        = "Treated × Post (10km)",
  "fit_electricity" = "Electricity Access (IV)",
  "urban"           = "Urban"
)

# Colour + shape by model 
rob_colours <- c(
  "Baseline (10km)"  = COL_MAIN,
  "Nearby counties"  = COL_ROB,
  "County trends"    = COL_ROB,
  "LPI outcome"      = COL_ALT,
  "LPI (IV)"         = COL_ALT
)

rob_shapes <- c(
  "Baseline (10km)"  = 21,
  "Nearby counties"  = 22,
  "County trends"    = 23,
  "LPI outcome"      = 21,
  "LPI (IV)"         = 24
)

# Extract plot data 
pd_rob <- modelplot(
  rob_model_list,
  coef_map = coef_labels_rob,
  draw     = FALSE
) |>
  mutate(
    sig = case_when(
      p.value < 0.01 ~ "p < 0.01",
      p.value < 0.05 ~ "p < 0.05",
      p.value < 0.10 ~ "p < 0.10",
      TRUE           ~ "Not significant"
    ),
    sig   = factor(sig, levels = c("p < 0.01", "p < 0.05",
                                   "p < 0.10", "Not significant")),
    term  = factor(term, levels = rev(unique(term))),
    model = factor(model, levels = names(rob_model_list))
  )

# Build plot 
fig_rob_coef <- ggplot(pd_rob,
                       aes(x = estimate, y = term,
                           xmin = conf.low, xmax = conf.high,
                           colour = model, shape = model,
                           alpha  = sig)) +
  geom_vline(xintercept = 0, colour = "grey50",
             linewidth = 0.4, linetype = "dashed") +
  geom_linerange(position = position_dodge(width = 0.55),
                 linewidth = 0.6) +
  geom_point(aes(fill = model),
             position = position_dodge(width = 0.55),
             size = 3, stroke = 1.1) +
  scale_colour_manual(values = rob_colours) +
  scale_fill_manual(  values = rob_colours) +
  scale_shape_manual( values = rob_shapes)  +
  scale_alpha_manual(
    values = c(
      "p < 0.01"        = 1.00,
      "p < 0.05"        = 1.00,
      "p < 0.10"        = 0.55,
      "Not significant" = 0.20
    ),
    name = "Significance"
  ) +
  labs(
    title    = "Robustness Checks: Treatment Effect Estimates",
    subtitle = "County + round FE throughout | County-clustered SE | 95% CIs shown",
    x        = "Coefficient estimate",
    y        = NULL,
    colour   = "Specification",
    shape    = "Specification",
    fill     = "Specification",
  ) +
  theme_bw(base_size = 11) +
  theme(
    panel.grid.major.x = element_line(colour = "grey90", linewidth = 0.3),
    panel.grid.major.y = element_blank(),
    panel.grid.minor   = element_blank(),
    panel.border       = element_rect(colour = "grey80", linewidth = 0.4),
    axis.ticks         = element_line(colour = "grey70", linewidth = 0.3),
    axis.text          = element_text(size = 8.5, colour = "grey20"),
    axis.title.x       = element_text(size = 9, colour = "grey20",
                                      margin = margin(t = 6)),
    plot.title         = element_text(size = 10.5, face = "bold",
                                      colour = "grey10", margin = margin(b = 3)),
    plot.subtitle      = element_text(size = 8.5, colour = "grey40",
                                      margin = margin(b = 8)),
    plot.caption       = element_text(size = 7.5, colour = "grey45",
                                      hjust = 0, margin = margin(t = 6)),
    legend.position    = "right",
    legend.text        = element_text(size = 8),
    legend.key.size    = unit(0.4, "cm"),
    plot.margin        = margin(10, 14, 8, 10)
  )

fig_rob_coef