Bright Zone, Dark Zone: Spatial Electricity Inequality in Liberia

Replication Materials

Author

Affiliation

Vermon Washington

Hertie School

Published

April 28, 2026

Instructions and Overview

To complete this replication, you will need the following datasets: 1. Liberia’s Afrobarometer, Rounds 4-9 (geocoded version available upon request from Afrobarometer) 2. ESMAP files from the World Bank, and 3. Liberia’s shape files from HDX.

The code is structure according to the order that tables and figures appear in the thesis. If you have any questions, please do not hesitant to send an email. Thank you.

Code

#| output: false
#| message: false
#| warning: false

rm(list = ls()) 

# set up
pacman::p_load(dplyr, haven, tidyverse, ggplot2, kableExtra, readxl, janitor, tools, purrr, sf, classInt, tmap, patchwork, scales, gtsummary, webshot2, patchwork, cowplot, lubridate, gt, kableExtra, margins, broom, modelsummary, fixest, marginaleffects, broom.mixed, lme4, tibble, DiagrammeR, DiagrammeRsvg, rsvg, flextable, officer, ggspatial, rnaturalearth, ggrepel, viridis)

# helper functions 

# extract_round function 

extract_round <- function(filename) {
  str_extract(filename, regex("r[0-9]+", ignore_case = TRUE)) %>%
    str_to_lower()
}

makeVlist <- function(x) {
  labels <- sapply(x, function(col) {
    lbl <- attr(col, "label")
    if (is.null(lbl)) NA_character_ else as.character(lbl)
  })
  tibble(name = names(labels), label = labels)
}


# record_missing function  
recode_missing <- function(x, extra = NULL) {
  miss_vals <- c(-1, 98, 99, 998, 999)
  if (!is.null(extra)) miss_vals <- c(miss_vals, extra)
  ifelse(x %in% miss_vals, NA_real_, x)
}



# read data 

data_dir <- "Data"
files    <- list.files(data_dir, pattern = "\\.(sav|xlsx?)$", full.names = TRUE)

excel_files <- files[file_ext(files) %in% c("xls", "xlsx")]
sav_files   <- files[file_ext(files) == "sav"]

excel_list <- map(excel_files, ~ {
  read_excel(.x, col_types = "text") %>%   # ← force everything to character
    clean_names() %>%
    mutate(source_file = basename(.x),
           round       = extract_round(basename(.x)))
})

sav_list <- map(sav_files, ~ {
  haven::read_sav(.x) %>%
    clean_names() %>%
    mutate(source_file = basename(.x),
           round       = extract_round(basename(.x)))
})


# extract labels 

Labels <- map_dfr(sav_list, makeVlist) %>%
  distinct(name, .keep_all = TRUE) %>%
  filter(!is.na(label))

# standardize 

afro_list <- c(excel_list, sav_list) %>%
  lapply(function(x) {
    x[] <- lapply(x, as.character)
    x
  })

# combine
afro_combined <- bind_rows(afro_list)



# code book function 

make_codebook <- function(df) {
  map_dfr(names(df), function(var) {
    col <- df[[var]]
    
    var_label   <- attr(col, "label")       
    value_labels <- attr(col, "labels")   
    
    if (!is.null(value_labels)) {
      tibble(
        variable    = var,
        question    = if (is.null(var_label)) NA_character_ else as.character(var_label),
        value       = unname(value_labels),
        value_label = names(value_labels)
      )
    } else {
      tibble(
        variable    = var,
        question    = if (is.null(var_label)) NA_character_ else as.character(var_label),
        value       = NA_real_,
        value_label = NA_character_
      )
    }
  })
}

# build code book from all sav files and deduplicate
codebook <- map_dfr(sav_list, make_codebook) %>%
  distinct(variable, value, .keep_all = TRUE)


# cleaning labels 
coef_labels <- c(
  "lpi_catLow lived poverty" = "Low Lived Poverty",
  "lpi_catModerate lived poverty" = "Moderate Lived Poverty",
  "lpi_catHigh lived poverty" = "High Lived Poverty",
  "education_catMedium Education" = "Medium Education",
  "education_catHigh Education" = "High Education",
  "employment1" = "Employed",
  "sexFemale" = "Female",
  "urbanityRural" = "Rural (vs Urban)",
  "roundr5" = "Round 5",
  "roundr6" = "Round 6",
  "roundr7" = "Round 7",
  "roundr8" = "Round 8",
  "roundr9" = "Round 9"
)


# set chord 
UTM29N <- 32629


# set theme

theme_thesis <- function(base_size = 11) {
  theme_minimal(base_size = base_size) +
    theme(
      plot.title       = element_text(size = base_size + 2, face = "bold",
                                      margin = margin(b = 4)),
      plot.subtitle    = element_text(size = base_size - 0.5, colour = "grey40",
                                      margin = margin(b = 10)),
      plot.caption     = element_text(size = base_size - 2, colour = "grey50",
                                      hjust = 0, margin = margin(t = 8)),
      axis.title       = element_text(size = base_size - 0.5, colour = "grey30"),
      axis.text        = element_text(size = base_size - 1.5, colour = "grey40"),
      legend.title     = element_text(size = base_size - 0.5, face = "bold"),
      legend.text      = element_text(size = base_size - 1),
      panel.grid.major = element_line(colour = "grey92", linewidth = 0.4),
      panel.grid.minor = element_blank(),
      panel.border     = element_blank(),
      legend.position  = "bottom",
      legend.key.width = unit(1.2, "cm"),
      plot.margin      = margin(12, 12, 8, 12)
    )
}


# Colour palette  for consistent across all figures
COL_TREAT   <- "#1D6B8F"   # treated / near grid  — steel blue
COL_CONTROL <- "#B0B0B0"   # control / far         — grey
COL_POST    <- "#C14B28"   # post-treatment marker — rust
COL_ACCENT  <- "#E8A838"   # accent                — amber
COL_CONTROL <- "#B0B0B0"
COL_ALT <- "#C0392B"
COL_MAIN <- "#4C7EAF"
COL_POST <- "#C14B28"
COL_PRE <- "#4C7EAF"
COL_REF <- "grey40"
COL_ROB <- "#4CAF82"


#bring in grid files 
GRID_DIR <- "liberia_transmissiongridmapping"

lines    <- st_read(file.path(GRID_DIR, "Liberia_Powerline.shp"),          quiet = TRUE) |> st_transform(UTM29N)
towers   <- st_read(file.path(GRID_DIR, "Liberia_Powertower_withDEM.shp"), quiet = TRUE) |> st_transform(UTM29N)
plants   <- st_read(file.path(GRID_DIR, "Liberia_PowerPlant.shp"),         quiet = TRUE) |> st_transform(UTM29N)
stations <- st_read(file.path(GRID_DIR, "Liberia_Substation.shp"),         quiet = TRUE) |> st_transform(UTM29N)


# Filter to operational infrastructure only
# Drop 2 under-construction 225KV CLSG lines (regional interconnector)
lines_op <- lines |>
  filter(is.na(Comment) |
         !str_detect(Comment, regex("under construction", ignore_case = TRUE)))

stations_op <- stations |>
  filter(!str_detect(Business, regex("under construction", ignore_case = TRUE)))


# bring shape boundaries shape file 
liberia_adm1 <- st_read("Geo/liberia-county-boundary/", quiet = TRUE) |>
  st_transform(UTM29N)


# data prep and cleaning 

# filter by rounds 

round_4 <- afro_combined |> 
  filter(round=="r4")

round_5 <- afro_combined |> 
  filter(round=="r5")

round_6 <- afro_combined |> 
  filter(round=="r6")

round_7 <- afro_combined |> 
  filter(round=="r7")

round_8 <- afro_combined |> 
  filter(round=="r8")

round_9 <- afro_combined |> 
  filter(round=="r9")



# cleaning 

afro_combined <- afro_combined |> 
  mutate(latitude = coalesce(latitude, ea_gps_la)) |> 
  mutate(longitude = coalesce(longitude, ea_gps_lo)) |> 
  mutate(q94 = coalesce(q94, q97))


# export
# writexl::write_xlsx(afro_combined, "Clean/final_clean_data.xlsx")
# write_csv(afro_combined, "Clean/final_clean_data.csv")
# write_rds(afro_combined, "Clean/final_clean_data.rds")


# bring in clean data 

# dependent variable 
df <- afro_combined |> 
  mutate(
    electricity_access = ifelse(ea_svc_a ==1, 1, 0),
    urbanity = factor(urbrur, levels = 1:2, labels = c("Urban", "Rural")))


# education 
df <- df %>%
  mutate(
    round = as.character(round), 
    edu_src = case_when(
      round == "r4" ~ as.numeric(q89),
      round == "r5" ~ as.numeric(q97),
      round == "r6" ~ as.numeric(q97),
      round == "r7" ~ as.numeric(q97),
      round == "r8" ~ as.numeric(q97),
      round == "r9" ~ as.numeric(q94),
      TRUE       ~ NA_real_
    )
  )


# recode missings 
df <- df %>%
  mutate(education = recode_missing(edu_src)) 


# Build Categorical Education Variable 
df <- df |>
  mutate(
    education_cat = case_when(
      education %in% 0:3                  ~ 1,                 # low
      education %in% 4:6                  ~ 2,                 # medium
      education %in% 7:9                  ~ 3,                 # high
      TRUE                          ~ NA_real_
    ),
    education_cat = factor(
      education_cat,
      levels = c(1, 2, 3),
      labels = c("Low Education", "Medium Education", "High Education")
    )
  )



# employment 

df <- df %>%
  mutate(
    round = as.character(round), 
    employ = case_when(
      round == "r4" ~ as.numeric(q94),
      round == "r5" ~ as.numeric(q96),
      round == "r6" ~ as.numeric(q95),
      round == "r7" ~ as.numeric(q94),
      round == "r8" ~ as.numeric(q95a),
      round == "r9" ~ as.numeric(q93a),
      TRUE       ~ NA_real_
    )
  )

# recode missings and build employed variable 
df <- df |> 
  mutate(
    employment = recode_missing(employ), 
  
    employed = case_when(
    employment %in% c(2, 3, 4, 5) ~ 1L,  # employed (any)
    employment %in% c(0, 1)        ~ 0L,  # not employed
    TRUE ~ NA_integer_
    )
)




# asset ownership 

# TV 
df <- df |> 
  mutate(
    round = as.character(round), 
    tv = case_when(
      round == "r4" ~ as.numeric(q92b),
      round == "r5" ~ as.numeric(q90b),
      round == "r6" ~ as.numeric(q91b),
      round == "r7" ~ as.numeric(q89b),
      round == "r8" ~ as.numeric(q92b),
      round == "r9" ~ as.numeric(q90b),
      TRUE       ~ NA_real_
    )
  )



# recode missings and build TV variable 
df <- df |> 
  mutate(
    tv = recode_missing(tv, extra = c(9)), 
  
    tv_own = case_when(
    tv %in% c(1, 2) ~ 1L,  # own TV 
    tv %in% c(0)        ~ 0L,  # No TV
    TRUE ~ NA_integer_
    )
)



# gender  

# TV 
df <- df |> 
  mutate(
    round = as.character(round), 
    gender = case_when(
      round == "r4" ~ as.numeric(q101),
      round == "r5" ~ as.numeric(q101),
      round == "r6" ~ as.numeric(q101),
      round == "r7" ~ as.numeric(q101),
      round == "r8" ~ as.numeric(q101),
      round == "r9" ~ as.numeric(q100),
      TRUE       ~ NA_real_
    )
  )



# recode missings and build gender variable 
df <- df |>
  mutate(
    gender = recode_missing(gender),
    sex = case_when(
      gender == 1 ~ "Male",
      gender == 2 ~ "Female",
      TRUE        ~ NA_character_
    )
  )


# build county variable 
df <- df |>
  mutate(
    county = case_when(
      region == 380 ~ "Bomi",
      region == 381 ~ "Bong",
      region == 382 ~ "Gbarpolu",
      region == 383 ~ "Grand Bassa",
      region == 384 ~ "Grand Cape Mount",
      region == 385 ~ "Grand Gedeh",
      region == 386 ~ "Grand Kru",
      region == 387 ~ "Lofa",
      region == 388 ~ "Margibi",
      region == 389 ~ "Maryland",
      region == 390 ~ "Montserrado",
      region == 391 ~ "Nimba",
      region == 392 ~ "Rivercess",
      region == 393 ~ "River Gee",
      region == 394 ~ "Sinoe",
      TRUE         ~ NA_character_
    )
  )




# poverty

## food 

df <- df |> 
  mutate(
    round = as.character(round), 
    food = case_when(
      round == "r4" ~ as.numeric(q8a),
      round == "r5" ~ as.numeric(q8a),
      round == "r6" ~ as.numeric(q8a),
      round == "r7" ~ as.numeric(q8a),
      round == "r8" ~ as.numeric(q7a),
      round == "r9" ~ as.numeric(q6a),
      TRUE       ~ NA_real_
    )
  )



## water 

df <- df |> 
  mutate(
    round = as.character(round), 
    water = case_when(
      round == "r4" ~ as.numeric(q8b),
      round == "r5" ~ as.numeric(q8b),
      round == "r6" ~ as.numeric(q8b),
      round == "r7" ~ as.numeric(q8b),
      round == "r8" ~ as.numeric(q7b),
      round == "r9" ~ as.numeric(q6b),
      TRUE       ~ NA_real_
    )
  )


## medicine  

df <- df |> 
  mutate(
    round = as.character(round), 
    medicine = case_when(
      round == "r4" ~ as.numeric(q8c),
      round == "r5" ~ as.numeric(q8c),
      round == "r6" ~ as.numeric(q8c),
      round == "r7" ~ as.numeric(q8c),
      round == "r8" ~ as.numeric(q7c),
      round == "r9" ~ as.numeric(q6c),
      TRUE       ~ NA_real_
    )
  )


## fuel  

df <- df |> 
  mutate(
    round = as.character(round), 
    fuel = case_when(
      round == "r4" ~ as.numeric(q8d),
      round == "r5" ~ as.numeric(q8d),
      round == "r6" ~ as.numeric(q8d),
      round == "r7" ~ as.numeric(q8d),
      round == "r8" ~ as.numeric(q7d),
      round == "r9" ~ as.numeric(q6d),
      TRUE       ~ NA_real_
    )
  )

## cash  

df <- df |> 
  mutate(
    round = as.character(round), 
    cash = case_when(
      round == "r4" ~ as.numeric(q8e),
      round == "r5" ~ as.numeric(q8e),
      round == "r6" ~ as.numeric(q8e),
      round == "r7" ~ as.numeric(q8e),
      round == "r8" ~ as.numeric(q7e),
      round == "r9" ~ as.numeric(q6e),
      TRUE       ~ NA_real_
    )
  )



## build LPI 

build_lpi <- function(df) {
  df %>%
    mutate(
      lpi_food = recode_missing(food, extra = c(8, 9)),
      lpi_water = recode_missing(water, extra = c(8, 9)), 
      lpi_med = recode_missing(medicine, extra = c(8, 9)),
      lpi_fuel = recode_missing(fuel, extra = c(8, 9)),
      lpi_cash = recode_missing(cash, extra = c(8, 9)),
      
    # ── Count of valid (non-missing) items per respondent ──
      lpi_n_valid = rowSums(!is.na(cbind(
        lpi_food, lpi_water, lpi_med, lpi_fuel, lpi_cash
      ))),
      lpi = ifelse(
        lpi_n_valid == 5,
        rowMeans(cbind(lpi_food, lpi_water, lpi_med, lpi_fuel, lpi_cash),
                 na.rm = FALSE),
        NA_real_
      ),
      lpi_cat = case_when(
        is.na(lpi)       ~ NA_character_,
        lpi == 0         ~ "No lived poverty",
        lpi <= 1.0       ~ "Low lived poverty",
        lpi <= 2.0       ~ "Moderate lived poverty",
        lpi >  2.0       ~ "High lived poverty"
      ),

      lpi_cat = factor(lpi_cat, levels = c(
        "No lived poverty",
        "Low lived poverty",
        "Moderate lived poverty",
        "High lived poverty"
      )),
      lpi_high = case_when(
        is.na(lpi)  ~ NA_real_,
        lpi >= 2.20 ~ 1,
        TRUE        ~ 0
      ),

      freq_food  = if_else(lpi_food  >= 3, 1L, 0L, missing = NA_integer_),
      freq_water = if_else(lpi_water >= 3, 1L, 0L, missing = NA_integer_),
      freq_med   = if_else(lpi_med   >= 3, 1L, 0L, missing = NA_integer_),
      freq_fuel  = if_else(lpi_fuel  >= 3, 1L, 0L, missing = NA_integer_),
      freq_cash  = if_else(lpi_cash  >= 3, 1L, 0L, missing = NA_integer_)
    )
}


# add to dataframe  
df <- df %>%
  rename_with(tolower) %>%
  build_lpi() 


## merge 
df <- df  %>%
  mutate(
    # Merge the two weight column names into one
    withinwt = coalesce(
      suppressWarnings(as.numeric(withinwt)),
      suppressWarnings(as.numeric(withinwt_hh))
    ),
    wt_imputed = is.na(withinwt),
    withinwt   = if_else(is.na(withinwt) | withinwt <= 0, 1, withinwt)
  )

# verify 
df %>%
  group_by(round) %>%
  summarise(
    wt_class    = class(withinwt)[1],
    wt_min      = min(withinwt),
    wt_max      = max(withinwt),
    pct_imputed = mean(wt_imputed) * 100,
    .groups     = "drop"
  ) %>%
  kable(
    digits    = 2,
    caption   = "Round Weights Verification",
    col.names = c("Round", "Weight Class", "Min Weight", "Max Weight", "% Imputed")
  ) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE)

Round Weights Verification
Round	Weight Class	Min Weight	Max Weight
r4	numeric	0.09	3.96
r5	numeric	0.13	3.47
r6	numeric	0.06	3.51
r7	numeric	0.14	3.03
r8	numeric	0.08	2.41
r9	numeric	0.04	5.37

Code

# Validation of LPI

df %>%
  group_by(round) %>%
  summarise(
    n              = n(),
    lpi_mean       = weighted.mean(lpi, w = withinwt, na.rm = TRUE),
    pct_high_lpi   = weighted.mean(lpi_high, w = withinwt, na.rm = TRUE) * 100,
    pct_no         = weighted.mean(lpi_cat == "No lived poverty",
                                   w = withinwt, na.rm = TRUE) * 100,
    pct_low        = weighted.mean(lpi_cat == "Low lived poverty",
                                   w = withinwt, na.rm = TRUE) * 100,
    pct_moderate   = weighted.mean(lpi_cat == "Moderate lived poverty",
                                   w = withinwt, na.rm = TRUE) * 100,
    pct_high_cat   = weighted.mean(lpi_cat == "High lived poverty",
                                   w = withinwt, na.rm = TRUE) * 100,
    freq_food_pct  = weighted.mean(freq_food,  w = withinwt, na.rm = TRUE) * 100,
    freq_water_pct = weighted.mean(freq_water, w = withinwt, na.rm = TRUE) * 100,
    freq_med_pct   = weighted.mean(freq_med,   w = withinwt, na.rm = TRUE) * 100,
    freq_fuel_pct  = weighted.mean(freq_fuel,  w = withinwt, na.rm = TRUE) * 100,
    freq_cash_pct  = weighted.mean(freq_cash,  w = withinwt, na.rm = TRUE) * 100,
    .groups = "drop"
  ) %>%
  kable(
    digits    = 2,
    caption   = "LPI Category Distribution and Deprivation Dimensions by Round",
    col.names = c(
      "Round", "N", "Mean LPI", "% High LPI",
      "% No Poverty", "% Low", "% Moderate", "% High",
      "% Food", "% Water", "% Medical", "% Fuel", "% Cash"
    )
  ) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  add_header_above(c(
    " " = 4,
    "LPI Category (%)" = 4,
    "Frequent Shortage by Dimension (%)" = 5
  )) %>%
  row_spec(0, bold = TRUE)

LPI Category Distribution and Deprivation Dimensions by Round
				LPI Category (%)				Frequent Shortage by Dimension (%)
Round	N	Mean LPI	% High LPI	% No Poverty	% Low	% Moderate	% High	% Food	% Water	% Medical	% Fuel	% Cash
r4	1200	1.24	13.57	11.36	35.34	39.73	13.57	18.46	18.44	23.66	11.81	31.53
r5	1199	1.46	26.36	9.13	29.88	34.63	26.36	20.70	21.87	29.55	13.45	49.03
r6	1199	1.72	34.89	9.39	20.66	35.06	34.89	22.64	27.65	28.52	21.15	47.99
r7	1200	1.31	17.99	9.79	33.87	38.35	17.99	16.39	20.19	24.66	11.79	37.17
r8	1200	1.38	18.90	5.31	37.51	38.28	18.90	18.14	18.30	20.61	16.47	24.31
r9	1200	1.47	23.67	7.54	29.51	39.28	23.67	21.52	17.18	33.76	13.65	32.29

Code

# summary stats of LPI
lpi_summary <- df %>%
  group_by(round) %>%
  summarise(
    n            = n(),
    lpi_mean     = weighted.mean(lpi,      w = withinwt, na.rm = TRUE),
    pct_high_lpi = weighted.mean(lpi_high, w = withinwt, na.rm = TRUE) * 100,
    .groups = "drop"
  ) %>%
  mutate(
    lpi_change      = lpi_mean     - lag(lpi_mean),
    high_lpi_change = pct_high_lpi - lag(pct_high_lpi)
  )

# print LPI Summary 
lpi_summary %>%
  kable(
    digits  = 3,
    caption = "LPI Summary Statistics by Afrobarometer Round",
    col.names = c("Round", "N", "Mean LPI", "% High LPI", "Δ Mean LPI", "Δ % High LPI")
  ) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE)

LPI Summary Statistics by Afrobarometer Round
Round	N	Mean LPI	% High LPI	Δ Mean LPI	Δ % High LPI
r4	1200	1.238	13.569	NA	NA
r5	1199	1.461	26.361	0.223	12.791
r6	1199	1.716	34.888	0.255	8.527
r7	1200	1.311	17.989	-0.405	-16.899
r8	1200	1.375	18.901	0.064	0.912
r9	1200	1.474	23.666	0.098	4.765

Code

# Filter to operational infrastructure only
# Drop 2 under-construction 225KV CLSG lines (regional interconnector)
lines_op <- lines |>
  filter(is.na(Comment) |
         !str_detect(Comment, regex("under construction", ignore_case = TRUE)))

stations_op <- stations |>
  filter(!str_detect(Business, regex("under construction", ignore_case = TRUE)))

Figure 1: Conceptual Framework

Code

p <- grViz("
digraph conceptual_framework {
  graph [layout = dot, rankdir = TB]
  node [shape = box, style = filled, color = lightgrey]
  A [label = 'Household Socioeconomic Factors\n(Lived Poverty, Education, Employment)']
  B [label = 'Place-Based Factors\n(Urban/Rural, Proximity to Grid Infrastructure)']
  C [label = 'Electricity Access']
  D [label = 'Spatial Inequality\n(Bright vs Dark Zones)']
  E [label = 'Variance Decomposition\n(Within vs Between, ICC)']
  A -> C
  B -> C
  C -> D
  D -> E
}
")

p

Table 1: Observations per Round of Afrobarometer Data

	Round 4	Round 5	Round 6	Round 7	Round 8	Round 9
Overall Sample	1,200	1,199	1,199	1,200	1,200	1,200

Table 2: Descriptive Statistics: Accesss to Electricity in Liberia

Code

df_descptive <- df %>%
  filter(
    !is.na(electricity_access),
    !is.na(employed),
    !is.na(lpi_cat),
    !is.na(education_cat)) %>%
  mutate(
    round_label = case_when(
      round == "r4" ~ "Round 4",
      round == "r5" ~ "Round 5",
      round == "r6" ~ "Round 6",
      round == "r7" ~ "Round 7",
      round == "r8" ~ "Round 8",
      round == "r9" ~ "Round 9",
      TRUE          ~ as.character(round)
    ),
    electricity_access = case_when(
      electricity_access == 1 ~ "Has electricity access",
      electricity_access == 0 ~ "Does not have electricity access"
    ),
    employed = case_when(
      employed == 1 ~ "Employed",
      employed == 0 ~ "Not employed"
    )
  )


# helper to compute n (%) per variable
summarise_cat <- function(df, var, label) {
  df %>%
    count(level = as.character(.data[[var]])) %>%
    mutate(
      variable = label,
      stat     = paste0(n, " (", round(n / sum(n) * 100, 1), "%)")
    ) %>%
    select(variable, level, stat)
}

vars <- list(
  electricity_access = "Electricity Access",
  lpi_cat            = "Lived Poverty Index",
  education_cat      = "Education Level",
  employed           = "Employment Status",
  sex                = "Gender",
  urbanity           = "Urbanity",
  county             = "county",
  round_label        = "Survey Round"
)

tbl_desc <- map_dfr(names(vars), ~ summarise_cat(df_descptive, .x, vars[[.x]])) %>%
  rename(
    "Characteristic" = variable,
    " "              = level,
    "N (%)"          = stat
  )

tbl_desc %>%
  kable(
    caption = "Table 2: Descriptive Statistics: Access to Electricity in Liberia",
    align   = c("l", "l", "r")
  ) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE) %>%
  pack_rows(index = table(fct_inorder(tbl_desc$Characteristic))) %>%
  kableExtra::footnote(
    general       = "N = 7,013. N (%) reported for all categorical variables. Source: Afrobarometer rounds 4–9, Liberia. Author's own calculations.",
    general_title = "Notes:"
  )

Table 2: Descriptive Statistics: Access to Electricity in Liberia
Characteristic		N (%)
Electricity Access
Electricity Access	Does not have electricity access	4913 (70.1%)
Electricity Access	Has electricity access	2100 (29.9%)
Lived Poverty Index
Lived Poverty Index	High lived poverty	1591 (22.7%)
Lived Poverty Index	Low lived poverty	2185 (31.2%)
Lived Poverty Index	Moderate lived poverty	2662 (38%)
Lived Poverty Index	No lived poverty	575 (8.2%)
Education Level
Education Level	High Education	936 (13.3%)
Education Level	Low Education	3253 (46.4%)
Education Level	Medium Education	2824 (40.3%)
Employment Status
Employment Status	Employed	1537 (21.9%)
Employment Status	Not employed	5476 (78.1%)
Gender
Gender	Female	3504 (50%)
Gender	Male	3509 (50%)
Urbanity
Urbanity	Rural	3622 (51.6%)
Urbanity	Urban	3391 (48.4%)
county
county	Bomi	228 (3.3%)
county	Bong	569 (8.1%)
county	Gbarpolu	252 (3.6%)
county	Grand Bassa	423 (6%)
county	Grand Cape Mount	217 (3.1%)
county	Grand Gedeh	283 (4%)
county	Grand Kru	95 (1.4%)
county	Lofa	546 (7.8%)
county	Margibi	422 (6%)
county	Maryland	255 (3.6%)
county	Montserrado	2359 (33.6%)
county	Nimba	893 (12.7%)
county	River Gee	143 (2%)
county	Rivercess	136 (1.9%)
county	Sinoe	192 (2.7%)
Survey Round
Survey Round	Round 4	1169 (16.7%)
Survey Round	Round 5	1106 (15.8%)
Survey Round	Round 6	1162 (16.6%)
Survey Round	Round 7	1189 (17%)
Survey Round	Round 8	1192 (17%)
Survey Round	Round 9	1195 (17%)
Notes:
N = 7,013. N (%) reported for all categorical variables. Source: Afrobarometer rounds 4–9, Liberia. Author's own calculations.

Code

# export 

# as a png 
# tbl_desc %>%
#   kable(
#     caption = "Table 2: Descriptive Statistics: Access to Electricity in Liberia",
#     align   = c("l", "l", "r")
#   ) %>%
#   kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
#   row_spec(0, bold = TRUE) %>%
#   pack_rows(index = table(fct_inorder(tbl_desc$Characteristic))) %>%
#   footnote(
#     general       = "N = 7,013. N (%) reported for all categorical variables. Source: Afrobarometer rounds 4–9, Liberia. Author's own calculations.",
#     general_title = "Notes:"
#   ) %>%
#   save_kable("table2_descriptive.png", zoom = 2)  
# 
# as a table for docx 
# tbl_desc %>%
#   flextable() %>%
#   set_caption("Table 2: Descriptive Statistics: Access to Electricity in Liberia") %>%
#   bold(part = "header") %>%
#   set_table_properties(width = 1, layout = "autofit") %>%
#   footnote(
#     i = 1, j = 1,
#     value = as_paragraph("Notes: N = 7,013. N (%) reported for all categorical variables. Source: Afrobarometer rounds 4–9, Liberia. Author's own calculations."),
#     ref_symbols = " ",
#     part = "header"
#   ) %>%
#   save_as_docx(path = "table2_descriptive.docx")

Table 3: Key Sample Statistics by Survey Round

Code

time_table <- df_descptive |>
  group_by(round, round_label) |>
  summarise(
    n            = n(),
    pct_access   = round(100 * mean(electricity_access == "Has electricity access", na.rm = TRUE), 1),
    pct_urban    = round(100 * mean(urbanity == "Urban", na.rm = TRUE), 1),
    mean_lpi     = round(mean(as.numeric(lpi), na.rm = TRUE), 2),
    pct_employed = round(100 * mean(employed == "Employed", na.rm = TRUE), 1),
    .groups = "drop"
  ) |>
  arrange(round) |>
  select(round_label, n, pct_urban, pct_employed, mean_lpi, pct_access) |>
  rename(
    "Round" = round_label,
    "N"            = n,
    "% Urban"      = pct_urban,
    "% Employed"   = pct_employed,
    "Mean LPI"     = mean_lpi,
    "% Electricity access"     = pct_access
  )

time_table %>%
  kable(
    caption = "Table 3: Key Sample Statistics by Survey Round",
    align   = c("l", "r", "r", "r", "r", "r")
  ) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE) %>%
  kableExtra::footnote(
    general       = "Source: Afrobarometer rounds 4–9, Liberia. Author's own calculations.",
    general_title = "Notes:"
  )

Table 3: Key Sample Statistics by Survey Round
Round	N	% Urban	% Employed	Mean LPI	% Electricity access
Round 4	1169	47.7	18.4	1.27	9.2
Round 5	1106	49.0	27.2	1.48	19.3
Round 6	1162	48.5	27.6	1.74	32.4
Round 7	1189	47.9	16.0	1.30	31.4
Round 8	1192	48.7	21.8	1.38	40.0
Round 9	1195	48.2	20.9	1.47	46.2
Notes:
Source: Afrobarometer rounds 4–9, Liberia. Author's own calculations.

Figure 2: Access to Electricity in Liberia by Urbanity, 2008-2021

Code

fig_time_urban <- df_descptive |>
  mutate(
    elec_num   = as.numeric(electricity_access == "Has electricity access"),
    round_year = case_when(
      round == "r4" ~ 2008,
      round == "r5" ~ 2012,
      round == "r6" ~ 2014,
      round == "r7" ~ 2017,
      round == "r8" ~ 2019,
      round == "r9" ~ 2021
    )
  ) |>
  group_by(round_year, urbanity) |>
  summarise(
    elec_rate = mean(elec_num, na.rm = TRUE),
    se        = sd(elec_num, na.rm = TRUE) / sqrt(sum(!is.na(elec_num))),
    .groups   = "drop"
  ) |>
  filter(!is.na(urbanity)) |>
  ggplot(aes(x = round_year, y = elec_rate,
             colour = urbanity, group = urbanity)) +
  geom_ribbon(aes(ymin = elec_rate - 1.96 * se,
                  ymax = elec_rate + 1.96 * se,
                  fill = urbanity),
              alpha = 0.12, colour = NA) +
  geom_line(linewidth = 1.0) +
  geom_point(size = 3, shape = 21,
             aes(fill = urbanity), colour = "white", stroke = 1.5) +
  geom_text(
    aes(label = percent(elec_rate, accuracy = 1)),
    vjust       = -1.2,
    size        = 2.5,
    fontface    = "bold",
    show.legend = FALSE
  ) +
  geom_vline(xintercept = 2016.5, linetype = "dashed",
             colour = COL_POST, linewidth = 0.6) +
  annotate("text", x = 2017, y = 0.02,
           label = "Mount Coffee\nonline", hjust = 0,
           size = 2.7, colour = COL_POST) +
  scale_colour_manual(values = c("Urban" = COL_TREAT, "Rural" = COL_CONTROL)) +
  scale_fill_manual(  values = c("Urban" = COL_TREAT, "Rural" = COL_CONTROL)) +
  scale_y_continuous(labels = percent_format(accuracy = 1), limits = c(0, 1)) +
  scale_x_continuous(breaks = c(2008, 2012, 2014, 2017, 2019, 2021)) +
  coord_cartesian(ylim = c(0, 1)) +
  labs(
    title    = "Access to Electricity in Liberia by Urbanity, 2008–2021",
    subtitle = "Urban-rural gap widens significantly after Mount Coffee rehabilitation in 2016",
    x        = NULL, y = "Share with electricity access",
    colour   = NULL, fill = NULL
  ) +
  theme_thesis()

print(fig_time_urban)

Table 4: Electricity Access by county (Pooled 2008-2021, descending order by access rate)

Code

# Investigating county Access 

county_table <- df_descptive |>
  group_by(county) |>
  summarise(
    n            = n(),
    n_access     = sum(electricity_access == "Has electricity access", na.rm = TRUE),
    pct_access   = round(100 * mean(electricity_access == "Has electricity access", na.rm = TRUE), 1),
    mean_lpi     = round(mean(as.numeric(lpi), na.rm = TRUE), 2),
    pct_urban    = round(100 * mean(urbanity == "Urban", na.rm = TRUE), 1),
    .groups = "drop"
  ) |>
  arrange(desc(pct_access)) |>
  rename(
    county               = county,
    N        = n,
    "(N) with access"   = n_access,
    "% with access" = pct_access,
    "Mean LPI"           = mean_lpi,
    "% Urban"            = pct_urban
  )

county_table %>%
  kable(
    caption = "Table 4: Electricity Access by county (Pooled 2008-2021, descending order by access rate)",
    align   = c("l", "r", "r", "r", "r", "r")
  ) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  row_spec(0, bold = TRUE) %>%
  kableExtra::footnote(
    general       = "Source: Afrobarometer rounds 4–9, Liberia. Author's own calculations.",
    general_title = "Notes:"
  )

Table 4: Electricity Access by county (Pooled 2008-2021, descending order by access rate)
county	N	(N) with access	% with access	Mean LPI	% Urban
Montserrado	2359	1369	58.0	1.23	91.9
Grand Gedeh	283	110	38.9	1.41	33.9
Maryland	255	87	34.1	1.33	34.5
Margibi	422	133	31.5	1.61	44.3
Grand Bassa	423	79	18.7	1.66	25.3
Nimba	893	133	14.9	1.55	20.7
Bomi	228	32	14.0	1.66	28.1
Gbarpolu	252	32	12.7	1.51	18.7
Lofa	546	69	12.6	1.52	33.3
Sinoe	192	16	8.3	1.50	24.0
River Gee	143	8	5.6	1.53	28.0
Bong	569	24	4.2	1.62	29.0
Grand Cape Mount	217	8	3.7	1.46	3.7
Grand Kru	95	0	0.0	1.32	0.0
Rivercess	136	0	0.0	1.64	5.1
Notes:
Source: Afrobarometer rounds 4–9, Liberia. Author's own calculations.

Code

#export

# as a table for docx 

# county_table %>%
#   flextable() %>%
#   set_caption("Electricity Access by county (Pooled 2008-2021, descending order by access rate") %>%
#   bold(part = "header") %>%
#   set_table_properties(width = 1, layout = "autofit") %>%
#   footnote(
#     i = 1, j = 1,
#     value = as_paragraph("Source: Afrobarometer rounds 4–9, Liberia. Author's own calculations."),
#     ref_symbols = " ",
#     part = "header"
#   ) %>%
#   save_as_docx(path = "table4_descriptive.docx")

Figure 3: LPI Distribution by Electricity Access Status in Liberia

Code

fig_lpi <- df_descptive |>
  filter(!is.na(lpi_cat), !is.na(electricity_access)) |>
  mutate(
    access_label = if_else(electricity_access == "Has electricity access",
                           "Has access to electricity", "No access to electricity"),
    lpi_cat = factor(lpi_cat,
                     levels = c("No lived poverty", "Low lived poverty",
                                "Moderate lived poverty", "High lived poverty"))
  ) |>
  count(access_label, lpi_cat) |>
  group_by(access_label) |>
  mutate(pct = n / sum(n)) |>
  ungroup() |>
  ggplot(aes(x = lpi_cat, y = pct, fill = access_label)) +
  geom_col(position = "dodge", width = 0.7, alpha = 0.85) +
  geom_text(
    aes(label = percent(pct, accuracy = 0.1)),
    position    = position_dodge(width = 0.7),
    vjust       = -0.5,
    size        = 2.8,
    fontface    = "bold",
    show.legend = FALSE
  ) +
  scale_fill_manual(values = c("Has access to electricity" = COL_TREAT,
                               "No access to electricity"  = COL_CONTROL)) +
  scale_y_continuous(labels = percent_format(accuracy = 1)) +
  coord_cartesian(ylim = c(0, 0.5)) +
  labs(
    title    = "LPI Distribution by Electricity Access Status in Liberia",
    subtitle = "Households without electricity are disproportionately in high-poverty categories",
    x        = "LPI Category",
    y        = "Share of respondents",
    fill     = NULL
  ) +
  theme_thesis()

print(fig_lpi)

Figure 4: Electricity access by poverty level and education

Code

fig_edu_lpi <- df_descptive |>
  filter(!is.na(lpi_cat), !is.na(education_cat)) |>
  mutate(
    lpi_cat = factor(lpi_cat,
                     levels = c("No lived poverty", "Low lived poverty",
                                "Moderate lived poverty", "High lived poverty")),
    education_cat = factor(education_cat,
                           levels = c("Low Education", "Medium Education", "High Education"))
  ) |>
  group_by(lpi_cat, education_cat) |>
  summarise(
    elec_rate = mean(electricity_access == "Has electricity access", na.rm = TRUE),
    .groups   = "drop"
  ) |>
  ggplot(aes(x = lpi_cat, y = elec_rate, fill = education_cat)) +
  geom_col(position = "dodge", width = 0.75, alpha = 0.85) +
  geom_text(
    aes(label = percent(elec_rate, accuracy = 0.1)),
    position    = position_dodge(width = 0.75),
    vjust       = -0.5,
    size        = 2.8,
    fontface    = "bold",
    show.legend = FALSE
  ) +
  scale_fill_manual(values = c(
    "Low Education"    = "#B0C4D8",
    "Medium Education" = "#457B9D",
    "High Education"   = COL_TREAT
  )) +
  scale_y_continuous(labels = percent_format(accuracy = 1)) +
  coord_cartesian(ylim = c(0, 1)) +
  labs(
    title    = "Electricity Access by Poverty and Education Levels in Liberia",
    subtitle = "Higher education mitigates but does not eliminate the poverty-access gradient",
    x        = "LPI Category",
    y        = "Share with access to electricity",
    fill     = "Education level"
  ) +
  theme_thesis() +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5))

print(fig_edu_lpi)

Figure 5: Electricity access rate by county and survey round

Code

fig_heat <- df_descptive |>
  mutate(
    elec_num   = as.numeric(electricity_access == "Has electricity access"),
    round_year = case_when(
      round == "r4" ~ 2008,
      round == "r5" ~ 2012,
      round == "r6" ~ 2014,
      round == "r7" ~ 2017,
      round == "r8" ~ 2019,
      round == "r9" ~ 2021
    )
  ) |>
  group_by(county, round_year) |>
  summarise(elec_rate = mean(elec_num, na.rm = TRUE), .groups = "drop") |>
  mutate(county = fct_reorder(county, elec_rate, .fun = mean)) |>
  ggplot(aes(x = factor(round_year), y = county, fill = elec_rate)) +
  geom_tile(colour = "white", linewidth = 0.5) +
  geom_text(aes(label = paste0(round(elec_rate * 100), "%")),
            size = 2.5, colour = "white", fontface = "bold") +
  scale_fill_gradient(
    low    = "#EAF3FB",
    high   = COL_TREAT,
    name   = "Access rate",
    labels = percent_format(accuracy = 1)
  ) +
  scale_x_discrete(labels = c("2008\n(Round 4)", "2012\n(Round 5)", "2014\n(Round 6)",
                               "2017\n(Round 7)", "2019\n(Round 8)", "2021\n(Round 9)")) +
  labs(
    title    = "Electricity Access Rate by county and Survey Round",
    subtitle = "Sharp post-2016 increase in Montserrado; minimal changes elsewhere",
    x        = NULL, y = NULL
  ) +
  theme_thesis() +
  theme(
    panel.grid       = element_blank(),
    axis.text.y      = element_text(size = 8),
    legend.key.width = unit(1.5, "cm")
  )

print(fig_heat)

Access Maps per Round

Code

# bring in spatial data 
spatial <- df 

# recode missings 
spatial <- spatial |> 
  mutate(
    ea_svc_a = recode_missing(ea_svc_a, extra = c(9))
  )

# filter NAs 
spatial <- spatial |> 
  filter(!is.na(latitude)) |> 
  filter(!is.na(longitude))

#set coords 
spatial_sf <- st_as_sf(
  spatial,
  coords = c("longitude", "latitude"),
  crs = 4326
)

# transform
liberia_adm1 <- suppressMessages(st_transform(liberia_adm1, st_crs(spatial_sf)))


# round 4 

#load data 
spatial_r4 <- spatial |> 
  filter(round=="r4")


#set coords 
spatial_sf_r4 <- st_as_sf(
  spatial_r4,
  coords = c("longitude", "latitude"),
  crs = 4326
)


# transform
liberia_adm1_r4 <- suppressMessages(
  st_transform(liberia_adm1, st_crs(spatial_sf_r4))
) 


# Aggregate data to counties 
county_access_agg_r4 <- spatial_sf_r4 %>%
  st_join(liberia_adm1_r4) %>%
  st_drop_geometry() %>% 
  group_by(region) %>%
  summarise(
    access_rate = mean(ea_svc_a == 1, na.rm = TRUE),
    n = n(),
    .groups = "drop"
  )


# data prep 
liberia_access_r4 <- liberia_adm1_r4 %>%
  mutate(
    region = case_when(
      county == "Bomi"           ~ 380,
      county == "Bong"           ~ 381,
      county == "Gbarpolu"       ~ 382,
      county == "Grand Bassa"    ~ 383,
      county == "Grand Cape Mount" ~ 384,
      county == "Grand Gedeh"    ~ 385,
      county == "Grand Kru"      ~ 386,
      county == "Lofa"           ~ 387,
      county == "Margibi"        ~ 388,
      county == "Maryland"       ~ 389,
      county == "Montserrado"    ~ 390,
      county == "Nimba"          ~ 391,
      county == "Rivercess"      ~ 392,
      county == "River Gee"      ~ 393,
      county == "Sinoe"          ~ 394,
      TRUE ~ NA_real_
    )
  )

# Join to polygons and map
liberia_access_r4 <- liberia_access_r4 |> 
  mutate(region = as.character(region)) |> 
  left_join(county_access_agg_r4, by = "region")

aggregate_map_r4 <- ggplot() +
  geom_sf(data = liberia_access_r4, aes(fill = access_rate), color = "black", alpha = 0.8) +
  scale_fill_viridis_c(
    name = "Electricity Access Rate",
    labels = scales::percent,
    option = "plasma",  
    na.value = "grey90"
  ) +
  labs(title = "Electricity Access by county in Liberia (Round 4)") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    legend.position = "right"
  )

plot(aggregate_map_r4)

Code

# round 5 

#load data 
spatial_r5 <- spatial |> 
  filter(round=="r5")


#set coords 
spatial_sf_r5 <- st_as_sf(
  spatial_r5,
  coords = c("longitude", "latitude"),
  crs = 4326
)


# transform
liberia_adm1_r5 <- suppressMessages(
  st_transform(liberia_adm1, st_crs(spatial_sf_r5))
) 


# Aggregate data to counties (keep only attributes)
county_access_agg_r5 <- spatial_sf_r5 %>%
  st_join(liberia_adm1_r5) %>%
  st_drop_geometry() %>%  
  group_by(region) %>%
  summarise(
    access_rate = mean(ea_svc_a == 1, na.rm = TRUE),
    n = n(),
    .groups = "drop"
  )


# data prep 
liberia_access_r5 <- liberia_adm1_r5 %>%
  mutate(
    region = case_when(
      county == "Bomi"           ~ 380,
      county == "Bong"           ~ 381,
      county == "Gbarpolu"       ~ 382,
      county == "Grand Bassa"    ~ 383,
      county == "Grand Cape Mount" ~ 384,
      county == "Grand Gedeh"    ~ 385,
      county == "Grand Kru"      ~ 386,
      county == "Lofa"           ~ 387,
      county == "Margibi"        ~ 388,
      county == "Maryland"       ~ 389,
      county == "Montserrado"    ~ 390,
      county == "Nimba"          ~ 391,
      county == "Rivercess"      ~ 392,
      county == "River Gee"      ~ 393,
      county == "Sinoe"          ~ 394,
      TRUE ~ NA_real_
    )
  )

# Join to polygons and map
liberia_access_r5 <- liberia_access_r5 |> 
  mutate(region = as.character(region)) |> 
  left_join(county_access_agg_r5, by = "region")

aggregate_map_r5 <- ggplot() +
  geom_sf(data = liberia_access_r5, aes(fill = access_rate), color = "black", alpha = 0.8) +
  scale_fill_viridis_c(
    name = "Electricity Access Rate",
    labels = scales::percent,
    option = "plasma",  
    na.value = "grey90"
  ) +
  labs(title = "Electricity Access by county in Liberia (Round 5)") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    legend.position = "right"
  )

plot(aggregate_map_r5)

Code

# round 6 

#load data 
spatial_r6 <- spatial |> 
  filter(round=="r6")


#set coords 
spatial_sf_r6 <- st_as_sf(
  spatial_r6,
  coords = c("longitude", "latitude"),
  crs = 4326
)


# transform
liberia_adm1_r6 <- suppressMessages(
  st_transform(liberia_adm1, st_crs(spatial_sf_r6))
) 


# Aggregate data to counties (keep only attributes)
county_access_agg_r6 <- spatial_sf_r6 %>%
  st_join(liberia_adm1_r6) %>%
  st_drop_geometry() %>%  
  group_by(region) %>%
  summarise(
    access_rate = mean(ea_svc_a == 1, na.rm = TRUE),
    n = n(),
    .groups = "drop"
  )


# data prep 
liberia_access_r6 <- liberia_adm1_r6 %>%
  mutate(
    region = case_when(
      county == "Bomi"           ~ 380,
      county == "Bong"           ~ 381,
      county == "Gbarpolu"       ~ 382,
      county == "Grand Bassa"    ~ 383,
      county == "Grand Cape Mount" ~ 384,
      county == "Grand Gedeh"    ~ 385,
      county == "Grand Kru"      ~ 386,
      county == "Lofa"           ~ 387,
      county == "Margibi"        ~ 388,
      county == "Maryland"       ~ 389,
      county == "Montserrado"    ~ 390,
      county == "Nimba"          ~ 391,
      county == "Rivercess"      ~ 392,
      county == "River Gee"      ~ 393,
      county == "Sinoe"          ~ 394,
      TRUE ~ NA_real_
    )
  )

# Join to polygons and map
liberia_access_r6 <- liberia_access_r6 |> 
  mutate(region = as.character(region)) |> 
  left_join(county_access_agg_r6, by = "region")

aggregate_map_r6 <- ggplot() +
  geom_sf(data = liberia_access_r6, aes(fill = access_rate), color = "black", alpha = 0.8) +
  scale_fill_viridis_c(
    name = "Electricity Access Rate",
    labels = scales::percent,
    option = "plasma",  
    na.value = "grey90"
  ) +
  labs(title = "Electricity Access by county in Liberia (Round 6)") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    legend.position = "right"
  )

plot(aggregate_map_r6)

Code

# round 7 

#load data 
spatial_r7 <- spatial |> 
  filter(round=="r7")


#set coords 
spatial_sf_r7 <- st_as_sf(
  spatial_r7,
  coords = c("longitude", "latitude"),
  crs = 4326
)


# transform
liberia_adm1_r7 <- suppressMessages(
  st_transform(liberia_adm1, st_crs(spatial_sf_r7)) 
)


# Aggregate data to counties (keep only attributes)
county_access_agg_r7 <- spatial_sf_r7 %>%
  st_join(liberia_adm1_r7) %>%
  st_drop_geometry() %>%  
  group_by(region) %>%
  summarise(
    access_rate = mean(ea_svc_a == 1, na.rm = TRUE),
    n = n(),
    .groups = "drop"
  )


# data prep 
liberia_access_r7 <- liberia_adm1_r7 %>%
  mutate(
    region = case_when(
      county == "Bomi"           ~ 380,
      county == "Bong"           ~ 381,
      county == "Gbarpolu"       ~ 382,
      county == "Grand Bassa"    ~ 383,
      county == "Grand Cape Mount" ~ 384,
      county == "Grand Gedeh"    ~ 385,
      county == "Grand Kru"      ~ 386,
      county == "Lofa"           ~ 387,
      county == "Margibi"        ~ 388,
      county == "Maryland"       ~ 389,
      county == "Montserrado"    ~ 390,
      county == "Nimba"          ~ 391,
      county == "Rivercess"      ~ 392,
      county == "River Gee"      ~ 393,
      county == "Sinoe"          ~ 394,
      TRUE ~ NA_real_
    )
  )

# Join to polygons and map
liberia_access_r7 <- liberia_access_r7 |> 
  mutate(region = as.character(region)) |> 
  left_join(county_access_agg_r7, by = "region")

aggregate_map_r7 <- ggplot() +
  geom_sf(data = liberia_access_r7, aes(fill = access_rate), color = "black", alpha = 0.8) +
  scale_fill_viridis_c(
    name = "Electricity Access Rate",
    labels = scales::percent,
    option = "plasma",  
    na.value = "grey90"
  ) +
  labs(title = "Electricity Access by county in Liberia (Round 7)") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    legend.position = "right"
  )

plot(aggregate_map_r7)

Code

# round 8 

#load data 
spatial_r8 <- spatial |> 
  filter(round=="r8")


#set coords 
spatial_sf_r8 <- st_as_sf(
  spatial_r8,
  coords = c("longitude", "latitude"),
  crs = 4326
)


# transform
liberia_adm1_r8 <- suppressMessages(
 st_transform(liberia_adm1, st_crs(spatial_sf_r8)) 
) 


# Aggregate data to counties (keep only attributes)
county_access_agg_r8 <- spatial_sf_r8 %>%
  st_join(liberia_adm1_r8) %>%
  st_drop_geometry() %>%  
  group_by(region) %>%
  summarise(
    access_rate = mean(ea_svc_a == 1, na.rm = TRUE),
    n = n(),
    .groups = "drop"
  )


# data prep 
liberia_access_r8 <- liberia_adm1_r8 %>%
  mutate(
    region = case_when(
      county == "Bomi"           ~ 380,
      county == "Bong"           ~ 381,
      county == "Gbarpolu"       ~ 382,
      county == "Grand Bassa"    ~ 383,
      county == "Grand Cape Mount" ~ 384,
      county == "Grand Gedeh"    ~ 385,
      county == "Grand Kru"      ~ 386,
      county == "Lofa"           ~ 387,
      county == "Margibi"        ~ 388,
      county == "Maryland"       ~ 389,
      county == "Montserrado"    ~ 390,
      county == "Nimba"          ~ 391,
      county == "Rivercess"      ~ 392,
      county == "River Gee"      ~ 393,
      county == "Sinoe"          ~ 394,
      TRUE ~ NA_real_
    )
  )

# Join to polygons and map
liberia_access_r8 <- liberia_access_r8 |> 
  mutate(region = as.character(region)) |> 
  left_join(county_access_agg_r8, by = "region")

aggregate_map_r8 <- ggplot() +
  geom_sf(data = liberia_access_r8, aes(fill = access_rate), color = "black", alpha = 0.8) +
  scale_fill_viridis_c(
    name = "Electricity Access Rate",
    labels = scales::percent,
    option = "plasma",  
    na.value = "grey90"
  ) +
  labs(title = "Electricity Access by county in Liberia (Round 8)") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    legend.position = "right"
  )

plot(aggregate_map_r8)

Code

# round 9 

#load data 
spatial_r9 <- spatial |> 
  filter(round=="r9")


#set coords 
spatial_sf_r9 <- st_as_sf(
  spatial_r9,
  coords = c("longitude", "latitude"),
  crs = 4326
)


# transform
liberia_adm1_r9 <- suppressMessages(
  st_transform(liberia_adm1, st_crs(spatial_sf_r9))
) 


# Aggregate data to counties (keep only attributes)
county_access_agg_r9 <- spatial_sf_r9 %>%
  st_join(liberia_adm1_r9) %>%
  st_drop_geometry() %>%  
  group_by(region) %>%
  summarise(
    access_rate = mean(ea_svc_a == 1, na.rm = TRUE),
    n = n(),
    .groups = "drop"
  )


# data prep 
liberia_access_r9 <- liberia_adm1_r9 %>%
  mutate(
    region = case_when(
      county == "Bomi"           ~ 380,
      county == "Bong"           ~ 381,
      county == "Gbarpolu"       ~ 382,
      county == "Grand Bassa"    ~ 383,
      county == "Grand Cape Mount" ~ 384,
      county == "Grand Gedeh"    ~ 385,
      county == "Grand Kru"      ~ 386,
      county == "Lofa"           ~ 387,
      county == "Margibi"        ~ 388,
      county == "Maryland"       ~ 389,
      county == "Montserrado"    ~ 390,
      county == "Nimba"          ~ 391,
      county == "Rivercess"      ~ 392,
      county == "River Gee"      ~ 393,
      county == "Sinoe"          ~ 394,
      TRUE ~ NA_real_
    )
  )

# Join to polygons and map
liberia_access_r9 <- liberia_access_r9 |> 
  mutate(region = as.character(region)) |> 
  left_join(county_access_agg_r9, by = "region")

aggregate_map_r9 <- ggplot() +
  geom_sf(data = liberia_access_r9, aes(fill = access_rate), color = "black", alpha = 0.8) +
  scale_fill_viridis_c(
    name = "Electricity Access Rate",
    labels = scales::percent,
    option = "plasma",  
    na.value = "grey90"
  ) +
  labs(title = "Electricity Access by county in Liberia (Round 9)") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    legend.position = "right"
  )

plot(aggregate_map_r9)

Figure 6: Electricity Access by county in Liberia (2008-2021)

Code

# Pre-compute county centroids 
county_centroids <- suppressWarnings(st_centroid(liberia_adm1)) |>
  mutate(
    X = st_coordinates(geometry)[, 1],
    Y = st_coordinates(geometry)[, 2]
  ) |>
  st_drop_geometry()

# inset locator map 
africa <- ne_countries(continent = "Africa", returnclass = "sf")

inset_map <- ggplot(africa) +
  geom_sf(fill = "grey85", color = "white", linewidth = 0.2) +
  geom_sf(data = liberia_adm1, fill = "orange", color = NA) +
  theme_void() +
  theme(
    panel.border = element_rect(color = "grey40", fill = NA, linewidth = 0.5)
  )

# Shared scale limits 
access_limits <- c(0, 1)

# helpful function to build map
make_access_map <- function(data, round_label, show_legend = FALSE) {
  ggplot() +
    geom_sf(data = data, aes(fill = access_rate), color = "white", linewidth = 0.3) +
    scale_fill_viridis_c(
      name     = "Access Rate",
      labels   = scales::percent,
      option   = "plasma",
      limits   = access_limits,
      na.value = "grey90"
    ) +
    # county labels on every panel
    geom_text_repel(
      data          = county_centroids,
      aes(x = X, y = Y, label = county),
      size          = 1.6,
      color         = "white",
      fontface      = "bold",
      segment.size  = 0.2,
      segment.color = "grey60",
      max.overlaps  = 20,
      inherit.aes   = FALSE
    ) +
    labs(title = round_label) +
    theme_void() +
    theme(
      plot.title      = element_text(hjust = 0.5, size = 13, margin = margin(b = 5)),
      legend.position = if (show_legend) "right" else "none"
    )
}

# Build individual maps
aggregate_map_r4 <- make_access_map(liberia_access_r4, "Round 4")
aggregate_map_r5 <- make_access_map(liberia_access_r5, "Round 5")
aggregate_map_r6 <- make_access_map(liberia_access_r6, "Round 6", show_legend = TRUE)
aggregate_map_r7 <- make_access_map(liberia_access_r7, "Round 7")
aggregate_map_r8 <- make_access_map(liberia_access_r8, "Round 8")

# r9 carries scale bar, north arrow, and inset 
aggregate_map_r9 <- make_access_map(liberia_access_r9, "Round 9", show_legend = TRUE) +
  annotation_scale(
    location   = "bl",
    width_hint = 0.3,
    style      = "ticks",
    text_cex   = 0.6
  ) +
  annotation_north_arrow(
    location    = "tr",
    which_north = "true",
    style       = north_arrow_minimal(),
    height      = unit(0.8, "cm"),
    width       = unit(0.8, "cm")
  )

# Overlay inset on r9
aggregate_map_r9 <- aggregate_map_r9 +
  inset_element(inset_map, left = 0, bottom = 0.05, right = 0.32, top = 0.37)

# combine map
new_combined_map <- (aggregate_map_r4 | aggregate_map_r5 | aggregate_map_r6) /
                (aggregate_map_r7 | aggregate_map_r8 | aggregate_map_r9) +
  plot_annotation(
    title    = "Electricity Access by county in Liberia",
    subtitle = "Shows access rate for each county spaning rounds 4–9",
    theme = theme(
      plot.title    = element_text(hjust = 0.5, size = 16, face = "bold"),
      plot.subtitle = element_text(hjust = 0.5, size = 12, color = "black"),
      plot.margin   = margin(10, 10, 10, 10)
    )
  )

plot(new_combined_map)

Figure 7: Bright Zones and Dark Zones of Electricity Access in Liberia

Code

# Aggregate access rates
county_access_all_rounds <- spatial |>
  filter(round %in% c("r4", "r5", "r6", "r7", "r8", "r9")) |>
  st_as_sf(coords = c("longitude", "latitude"), crs = 4326) |>
  st_join(st_transform(liberia_adm1, 4326)) |>
  st_drop_geometry() |>
  rename(county = county.x) |>          # <-- fix column name conflict
  group_by(county, round) |>
  summarise(
    access_rate = mean(ea_svc_a == 1, na.rm = TRUE),
    n           = n(),
    .groups     = "drop"
  )

# Global breakpoints 
global_breaks <- classIntervals(
  county_access_all_rounds$access_rate,
  n     = 2,
  style = "equal"
)

# Apply zones 
county_access_zones <- county_access_all_rounds |>
  mutate(
    zone = cut(
      access_rate,
      global_breaks$brks,
      labels         = c("Dark Zone", "Bright Zone"),
      include.lowest = TRUE
    )
  ) |>
  filter(!is.na(county))

# Attach region codes 
liberia_adm1_coded <- liberia_adm1 |>
  mutate(
    region = case_when(
      county == "Bomi"             ~ "380",
      county == "Bong"             ~ "381",
      county == "Gbarpolu"         ~ "382",
      county == "Grand Bassa"      ~ "383",
      county == "Grand Cape Mount" ~ "384",
      county == "Grand Gedeh"      ~ "385",
      county == "Grand Kru"        ~ "386",
      county == "Lofa"             ~ "387",
      county == "Margibi"          ~ "388",
      county == "Maryland"         ~ "389",
      county == "Montserrado"      ~ "390",
      county == "Nimba"            ~ "391",
      county == "Rivercess"        ~ "392",
      county == "River Gee"        ~ "393",
      county == "Sinoe"            ~ "394",
      TRUE ~ NA_character_
    )
  )

# County population (2022 Liberia Census) 
liberia_population <- tibble(
  county = c("Bomi", "Bong", "Gbarpolu", "Grand Bassa", "Grand Cape Mount",
             "Grand Gedeh", "Grand Kru", "Lofa", "Margibi", "Maryland",
             "Montserrado", "Nimba", "Rivercess", "River Gee", "Sinoe"),
  population = c(133705, 467561, 95995, 293689, 178867,
                 216692, 109342, 367376, 304946, 172587,
                 1920965, 621841, 90819, 124653, 151149)
)

# County centroids with population 
county_centroids_pop <- suppressWarnings(st_centroid(liberia_adm1_coded)) |>
  mutate(
    X = st_coordinates(geometry)[, 1],
    Y = st_coordinates(geometry)[, 2]
  ) |>
  st_drop_geometry() |>
  left_join(liberia_population, by = "county")

# Build inset locator map 
africa <- ne_countries(continent = "Africa", returnclass = "sf")

inset_map <- ggplot(africa) +
  geom_sf(fill = "grey85", color = "white", linewidth = 0.2) +
  geom_sf(data = liberia_adm1, fill = "orange", color = NA) +
  theme_void() +
  theme(
    panel.border = element_rect(color = "grey40", fill = NA, linewidth = 0.5)
  )

# Join zones to polygons 
liberia_zones_rounds <- county_access_zones |>
  left_join(liberia_adm1_coded, by = "county") |>
  st_as_sf()

# Map building function 
make_zone_map <- function(data, round_label, show_legend = FALSE) {
  ggplot(data) +
    geom_sf(aes(fill = zone), color = "white", linewidth = 0.3) +
    scale_fill_manual(
      values       = c("Dark Zone" = "black", "Bright Zone" = "orange"),
      name         = "Electricity Access Zone",
      na.value     = "grey90",
      na.translate = FALSE,
      drop         = FALSE
    ) +
    geom_point(
      data        = county_centroids_pop,
      aes(x = X, y = Y, size = population),
      shape       = 21,
      fill        = "white",
      color       = "grey30",
      alpha       = 0.4,
      inherit.aes = FALSE
    ) +
    scale_size_area(
      name     = "Population",
      max_size = 10,
      labels   = scales::comma,
      breaks   = c(100000, 500000, 1000000, 1500000)
    ) +
    geom_text_repel(
      data          = county_centroids_pop,
      aes(x = X, y = Y, label = county),
      size          = 1.6,
      color         = "white",
      fontface      = "bold",
      segment.size  = 0.2,
      segment.color = "grey60",
      max.overlaps  = 20,
      inherit.aes   = FALSE
    ) +
    labs(title = round_label) +
    theme_void() +
    theme(
      plot.title      = element_text(hjust = 0.5, size = 11, face = "bold"),
      legend.title    = element_text(size = 10, face = "bold"),
      legend.text     = element_text(size = 9),
      legend.position = if (show_legend) "right" else "none"
    )
}

# Build panels 
zone_r4 <- make_zone_map(filter(liberia_zones_rounds, round == "r4"), "Round 4")
zone_r5 <- make_zone_map(filter(liberia_zones_rounds, round == "r5"), "Round 5")
zone_r6 <- make_zone_map(filter(liberia_zones_rounds, round == "r6"), "Round 6")
zone_r7 <- make_zone_map(filter(liberia_zones_rounds, round == "r7"), "Round 7")
zone_r8 <- make_zone_map(filter(liberia_zones_rounds, round == "r8"), "Round 8")

zone_r9 <- make_zone_map(
    filter(liberia_zones_rounds, round == "r9"), "Round 9", show_legend = TRUE
  ) +
  annotation_scale(
    location   = "bl",
    width_hint = 0.3,
    style      = "ticks",
    text_cex   = 0.6
  ) +
  annotation_north_arrow(
    location    = "tr",
    which_north = "true",
    style       = north_arrow_minimal(),
    height      = unit(0.8, "cm"),
    width       = unit(0.8, "cm")
  )

zone_r9 <- zone_r9 +
  inset_element(inset_map, left = 0, bottom = 0.05, right = 0.32, top = 0.37)

# Combine 
final_zone_plot <- (zone_r4 | zone_r5 | zone_r6) /
                   (zone_r7 | zone_r8 | zone_r9) +
  plot_annotation(
    title    = "Bright Zones and Dark Zones of Electricity Access in Liberia",
    subtitle = "Shows zone type and the population of each county",
    theme = theme(
      plot.title    = element_text(hjust = 0.5, size = 15, face = "bold"),
      plot.subtitle = element_text(hjust = 0.5, size = 11, color = "black"),
      plot.margin   = margin(10, 10, 10, 10)
    )
  )

plot(final_zone_plot)

Table 5: Determinants of Household Electricity Access in Liberia

Code

# data 
df_clean <- df |> 
  mutate(
    electricity_access = as.numeric(electricity_access),
    sex        = factor(sex, levels = c("Male", "Female")),
    employed = factor(employed, levels = c(0, 1), labels = c("Not employed", "Employed"))
  )

# coefficient labels
coef_labels <- c(
  "lpi_catLow lived poverty"      = "Low Lived Poverty",
  "lpi_catModerate lived poverty" = "Moderate Lived Poverty",
  "lpi_catHigh lived poverty"     = "High Lived Poverty",
  "education_catMedium Education" = "Medium Education",
  "education_catHigh Education"   = "High Education",
  "employedEmployed"            = "Employed",
  "sexFemale"                       = "Female",
  "urbanityRural"                 = "Rural (vs Urban)",
  "roundr5"                       = "Round 5",
  "roundr6"                       = "Round 6",
  "roundr7"                       = "Round 7",
  "roundr8"                       = "Round 8",
  "roundr9"                       = "Round 9"
)

# model 1: SES only
model_1 <- glmer(
  electricity_access ~ 
    lpi_cat + 
    education_cat + 
    employed + 
    sex +
    (1 | county),
  data   = df_clean,
  family = binomial(link = "logit")
)

# model 2: SES + geography
model_2 <- glmer(
  electricity_access ~ 
    lpi_cat + 
    education_cat + 
    employed + 
    sex +
    urbanity +
    (1 | county),
  data   = df_clean,
  family = binomial(link = "logit")
)

# model 3: full model
model_3 <- glmer(
  electricity_access ~ 
    lpi_cat + 
    education_cat + 
    employed + 
    sex +
    urbanity +
    round +
    (1 | county),
  data   = df_clean,
  family = binomial(link = "logit")
)

# ICC
get_icc <- function(model) {
  var_county <- as.numeric(VarCorr(model)$county[1])
  icc        <- var_county / (var_county + (pi^2 / 3))
  return(round(icc, 2))
}

icc_1 <- get_icc(model_1)
icc_2 <- get_icc(model_2)
icc_3 <- get_icc(model_3)

# table
modelsummary(
  list(
    "SES Only"             = model_1,
    "SES + Geography"      = model_2,
    "Full Model"           = model_3
  ),
  exponentiate = TRUE,
  fmt          = 2,
  coef_map     = coef_labels,
  statistic    = "({conf.low}, {conf.high})",
  stars        = c("*" = .1, "**" = .05, "***" = .01),
  gof_map      = data.frame(
    raw   = c("nobs", "AIC", "BIC"),
    clean = c("Observations", "AIC", "BIC"),
    fmt   = c(0, 1, 1)
  ),
  add_rows = data.frame(
    term             = "ICC (County)",
    "SES Only"       = icc_1,
    "SES + Geography" = icc_2,
    "Full Model"     = icc_3
  ),
  title = "Determinants of Household Electricity Access in Liberia",
  notes = "Odds ratios reported. 95% confidence intervals in parentheses. Reference categories: No lived poverty, Low Education, Not employed, Female, Urban, Round 4. Source: Afrobarometer rounds 4–9, Liberia; authors' calculations.")

Determinants of Household Electricity Access in Liberia
	SES Only	SES + Geography	Full Model
* p < 0.1, p < 0.05, * p < 0.01
Odds ratios reported. 95% confidence intervals in parentheses. Reference categories: No lived poverty, Low Education, Not employed, Female, Urban, Round 4. Source: Afrobarometer rounds 4–9, Liberia; authors' calculations.
Low Lived Poverty	0.87	0.88	0.68***
	(0.71, 1.08)	(0.70, 1.10)	(0.53, 0.88)
Moderate Lived Poverty	0.73***	0.76**	0.58***
	(0.59, 0.90)	(0.61, 0.95)	(0.45, 0.75)
High Lived Poverty	0.66***	0.75**	0.57***
	(0.52, 0.83)	(0.59, 0.95)	(0.43, 0.75)
Medium Education	1.71***	1.40***	1.48***
	(1.49, 1.95)	(1.21, 1.62)	(1.26, 1.74)
High Education	2.66***	1.91***	1.53***
	(2.21, 3.22)	(1.57, 2.33)	(1.23, 1.90)
Employed	1.06	0.96	0.92
	(0.92, 1.22)	(0.82, 1.11)	(0.78, 1.09)
Female	1.15**	1.08	1.06
	(1.02, 1.30)	(0.95, 1.23)	(0.92, 1.22)
Rural (vs Urban)		0.10***	0.07***
		(0.09, 0.12)	(0.06, 0.09)
Round 5			2.92***
			(2.22, 3.86)
Round 6			8.50***
			(6.47, 11.17)
Round 7			7.51***
			(5.75, 9.82)
Round 8			14.63***
			(11.09, 19.29)
Round 9			25.99***
			(19.58, 34.51)
Observations	7013	7013	7013
ICC (County)	0.38	0.26	0.32

Figure 8: Coefficient Plot for Robustness Check on Multilevel Logistic Regression Model

Code

# coefficient plots for both robustness models 


# Coefficient labels 
coef_labels_glmer <- c(
  "lpi_catLow lived poverty"      = "LPI: Low",
  "lpi_catModerate lived poverty" = "LPI: Moderate",
  "lpi_catHigh lived poverty"     = "LPI: High",
  "education_catMedium Education" = "Education: Medium",
  "education_catHigh Education"   = "Education: High",
  "employment1"                   = "Employed",
  "sexFemale"                     = "Female",
  "urbanityRural"                 = "Rural",
  "roundr5"                       = "Round 5",
  "roundr6"                       = "Round 6",
  "roundr7"                       = "Round 7",
  "roundr8"                       = "Round 8",
  "roundr9"                       = "Round 9"
)


# model 1
model_fe_logit <- suppressMessages(
  feglm(
  electricity_access ~ lpi_cat + education_cat + employment + sex + urbanity |
    county + round,
  data   = df_clean,
  family = binomial(link = "logit"), 
  cluster = ~county
))


# model 2 
df_no_capital <- df_clean %>%
  filter(county != "Montserrado")

model_no_capital <- suppressMessages(
  glmer(
  electricity_access ~ 
    lpi_cat + education_cat + employment + sex + urbanity + round +
    (1 | county),
  data = df_no_capital,
  family = binomial(link = "logit")
)) 


# combine model and make plot 
model_list_rob <- list(
  "Fixed Effect"          = model_fe_logit,
  "Excluding Montserrado" = model_no_capital
)

# colors
model_colours_rob <- c(
  "Fixed Effect"          = "#E05C5C",
  "Excluding Montserrado" = "#4C7EAF"
)

model_shapes_rob <- c(
  "Fixed Effect"          = 16,
  "Excluding Montserrado" = 17
)

# extra plot data 
pd_rob <- modelplot(
  model_list_rob,
  coef_map     = coef_labels_glmer, 
  exponentiate = TRUE,
  draw         = FALSE
)

# Add significance & factor ordering 
pd_rob <- pd_rob |>
  mutate(
    sig = case_when(
      p.value < 0.01 ~ "p < 0.01",
      p.value < 0.05 ~ "p < 0.05",
      p.value < 0.10 ~ "p < 0.10",
      TRUE           ~ "Not significant"
    ),
    sig   = factor(sig, levels = c("p < 0.01", "p < 0.05", "p < 0.10", "Not significant")),
    term  = factor(term, levels = rev(unique(term))),
    model = factor(model, levels = names(model_list_rob))
  )

# Build plot 
p_rob <- ggplot(pd_rob, aes(x = estimate, y = term,
                             xmin = conf.low, xmax = conf.high,
                             colour = model, shape = model,
                             alpha  = sig)) +
  geom_vline(xintercept = 1, linetype = "dashed", colour = "grey40") +
  geom_pointrange(position = position_dodge(width = 0.5), size = 0.55) +
  scale_colour_manual(values = model_colours_rob) +
  scale_shape_manual(values  = model_shapes_rob) +
  scale_alpha_manual(
    values = c(
      "p < 0.01"        = 1.00,
      "p < 0.05"        = 1.00,
      "p < 0.10"        = 0.55,
      "Not significant" = 0.20
    ),
    name = "Significance"
  ) +
  scale_x_log10() +
  labs(
    title   = "Robustness Check: Determinants of Household Electricity Access in Liberia",
    x       = "Odds Ratio",
    y       = NULL,
    colour  = "Model",
    shape   = "Model") +
  theme_bw(base_size = 12) +
  theme(
    legend.position  = "right",
    panel.grid.minor = element_blank(),
    axis.text.y      = element_text(hjust = 1),
    plot.title       = element_text(face = "bold", size = 13)
  )

p_rob

Code

# load clean data 
ab <- df_clean

ab <- ab |>
  mutate(
    # Round: was coded "r4"..."r9" — strip prefix
    round = as.integer(str_remove(round, "^r")),

    # Survey years
    year = case_when(
      round == 4 ~ 2008,
      round == 5 ~ 2012,
      round == 6 ~ 2014,
      round == 7 ~ 2017,
      round == 8 ~ 2019,
      round == 9 ~ 2021
    ),

    # Post-treatment: Mt. Coffee fully online by end of 2016
    post_2016 = as.integer(year >= 2017),

    # County names from region codes
    county = case_when(
      region == "380" ~ "Bomi",
      region == "381" ~ "Bong",
      region == "382" ~ "Gbarpolu",
      region == "383" ~ "Grand Bassa",
      region == "384" ~ "Grand Cape Mount",
      region == "385" ~ "Grand Gedeh",
      region == "386" ~ "Grand Kru",
      region == "387" ~ "Lofa",
      region == "388" ~ "Margibi",
      region == "389" ~ "Maryland",
      region == "390" ~ "Montserrado",
      region == "391" ~ "Nimba",
      region == "392" ~ "River Cess",
      region == "393" ~ "River Gee",
      region == "394" ~ "Sinoe",
      TRUE            ~ NA_character_
    ),

    # Electricity: ea_svc_a — observer-coded EA-level grid presence
    # 0 = absent, 1 = present, 9 = missing → NA
    electricity = case_when(
      ea_svc_a == "0" ~ 0L,
      ea_svc_a == "1" ~ 1L,
      TRUE            ~ NA_integer_
    ),

    # LPI: already harmonised across rounds
    lpi = as.numeric(lpi),

    # Coordinates: stored as character → numeric
    latitude  = as.numeric(latitude),
    longitude = as.numeric(longitude),

    # Urban: urbrur 1 = urban, 2 = rural → recode to binary
    urban = as.integer(urbrur == "1"),

    # Bounding box filter: Liberia lon -11.6 to -7.4, lat 4.3 to 8.6
    coords_ok = between(longitude, -12, -7) & between(latitude, 4, 9)
  )

# Audit
cat("\nCoordinate audit:\n");  ab |> count(coords_ok)    |> print()
cat("Electricity coding:\n"); ab |> count(electricity)   |> print()
cat("Round × year:\n");       ab |> count(round, year)   |> print()

# convert to sf, reproject, compute distances 
ab_sf <- ab |>
  filter(coords_ok, !is.na(longitude), !is.na(latitude)) |>
  st_as_sf(coords = c("longitude", "latitude"), crs = 4326) |>
  st_transform(UTM29N)

cat("\nCoordinate range after reproject (metres):\n")
cat("Easting: ",  range(st_coordinates(ab_sf)[, 1]), "\n")
cat("Northing:", range(st_coordinates(ab_sf)[, 2]), "\n")

cat("\nComputing distances — may take ~30s...\n")

dist_line <- st_distance(ab_sf, lines_op)     # n × 6 matrix
dist_mc   <- st_distance(ab_sf, plants)       # n × 1 (single plant)
dist_sub  <- st_distance(ab_sf, stations_op)  # n × 6 matrix

ab_sf <- ab_sf |>
  mutate(
    dist_line_m  = as.numeric(apply(dist_line, 1, min)),
    dist_line_km = dist_line_m / 1000,
    dist_mc_m    = as.numeric(dist_mc),
    dist_mc_km   = dist_mc_m / 1000,
    dist_sub_m   = as.numeric(apply(dist_sub, 1, min)),
    dist_sub_km  = dist_sub_m / 1000
  )

# Build analysis panel 
ab_panel <- ab_sf |>
  st_drop_geometry() |>
  mutate(
    treat_5km  = as.integer(dist_line_km <  5),
    treat_10km = as.integer(dist_line_km < 10),
    treat_20km = as.integer(dist_line_km < 20),

    did_5km  = treat_5km  * post_2016,
    did_10km = treat_10km * post_2016,
    did_20km = treat_20km * post_2016,

    # pmax() prevents log(0) when EA sits exactly on a line
    log_dist_line = log(pmax(dist_line_km, 0.01)),
    log_dist_mc   = log(pmax(dist_mc_km,   0.01)),
    log_dist_sub  = log(pmax(dist_sub_km,  0.01))
  )

# Diagnostics 
cat("\n=== Buffer coverage (unique EAs) ===\n")
ab_panel |>
  distinct(uniqueea, treat_5km, treat_10km, treat_20km) |>
  summarise(
    n_ea     = n(),
    n_5km    = sum(treat_5km,  na.rm = TRUE),
    n_10km   = sum(treat_10km, na.rm = TRUE),
    n_20km   = sum(treat_20km, na.rm = TRUE),
    pct_5km  = round(100 * n_5km  / n_ea, 1),
    pct_10km = round(100 * n_10km / n_ea, 1),
    pct_20km = round(100 * n_20km / n_ea, 1)
  ) |> print()

cat("\nCounties in 10km treatment buffer:\n")
ab_panel |>
  filter(treat_10km == 1) |>
  distinct(uniqueea, county) |>
  count(county, sort = TRUE) |> print()

# Confirm repeated cross-section structure (drives FE choice)
cat("\nEA appearances across rounds:\n")
ab_panel |>
  group_by(uniqueea) |>
  summarise(n_rounds = n_distinct(round), .groups = "drop") |>
  count(n_rounds) |> print()
# → ~390 EAs appear once; EA FEs are not feasible → use county + round FEs

Figure 9: Validating Parallel Trends Assumption

Code

# Event study
es_elec <- suppressMessages(
 feols(
  electricity ~ i(round, treat_10km, ref = 6) + urban | county + round,
  data    = ab_panel,
  cluster = ~county
)) 

# Primary DiD — buffer sensitivity

# 5km 
did_elec_5  <- suppressMessages(
 feols(electricity ~ did_5km  + urban | county + round,
                     data = ab_panel, cluster = ~county))

# 10km 
did_elec_10 <- suppressMessages(
 feols(electricity ~ did_10km + urban | county + round,
                     data = ab_panel, cluster = ~county))

# 20km
did_elec_20 <- suppressMessages(
 feols(electricity ~ did_20km + urban | county + round,
                     data = ab_panel, cluster = ~county)) 

# LPI — welfare channel
did_lpi_10 <- suppressMessages(
 feols(lpi ~ did_10km + urban | county + round,
                    data = ab_panel, cluster = ~county)) 

# Robustness: geographically adjacent counties only
nearby <- c("Montserrado", "Bomi", "Margibi", "Grand Bassa", "Grand Cape Mount")

did_nearby <- suppressMessages(
 feols(
  electricity ~ did_10km + urban | county + round,
  data    = ab_panel |> filter(county %in% nearby),
  cluster = ~county
))


# Robustness: county-specific linear time trends
did_trends <- suppressMessages(
 feols(
  electricity ~ did_10km + urban | county + round + county[round],
  data    = ab_panel,
  cluster = ~county
)) 

# IV / 2SLS

#1
iv_single <- suppressMessages(
 feols(
  lpi ~ urban | county + round | electricity ~ log_dist_line,
  data = ab_panel, cluster = ~county
))

#2
iv_double <- suppressMessages(
  feols(
  lpi ~ urban | county + round | electricity ~ log_dist_line + log_dist_mc,
  data = ab_panel, cluster = ~county
))




ct <- coeftable(es_elec)

es_coefs <- ct |>
  as.data.frame() |>
  rownames_to_column("term") |>
  rename(estimate = Estimate, std.error = `Std. Error`,
         statistic = `t value`, p.value = `Pr(>|t|)`) |>
  filter(str_detect(term, "treat_10km")) |>
  mutate(
    round_num = as.integer(str_extract(term, "(?<=round::)\\d+")),
    year      = case_when(
      round_num == 4 ~ 2008,
      round_num == 5 ~ 2012,
      round_num == 7 ~ 2017,
      round_num == 8 ~ 2019,
      round_num == 9 ~ 2021
    ),
    conf.low  = estimate - 1.96 * std.error,
    conf.high = estimate + 1.96 * std.error,
    post      = round_num >= 7
  ) |>
  # Add reference round (round 6, 2014) — all zeros by construction
  bind_rows(
    tibble(round_num = 6, year = 2014, estimate = 0,
           std.error = 0, conf.low = 0, conf.high = 0, post = FALSE)
  ) |>
  arrange(round_num)



# event study plot 

new_fig_es <- ggplot(es_coefs, aes(x = year, y = estimate)) +

  # Background shading 
  annotate("rect",
           xmin = -Inf, xmax = 2016.5,
           ymin = -Inf, ymax = Inf,
           fill = "#F7F7F7", alpha = 1) +

  # --- Zero line ---
  geom_hline(yintercept = 0, colour = "grey50",
             linewidth = 0.4, linetype = "solid") +

  # --- Treatment cutoff line ---
  geom_vline(xintercept = 2016.5,
             linetype = "dashed", colour = "grey30", linewidth = 0.45) +

  # --- CIs ---
  geom_linerange(aes(ymin = conf.low, ymax = conf.high, colour = post),
                 linewidth = 0.65) +

  # --- Points ---
  geom_point(aes(fill = post, colour = post),
             size = 3.2, shape = 21, stroke = 1.2) +

  # --- Reference point (open) ---
  geom_point(data = filter(es_coefs, round_num == 6),
             size = 3.2, shape = 21,
             fill = "white", colour = COL_REF, stroke = 1.2) +

  # --- Point labels (right side) ---
  ggrepel::geom_text_repel(
    data             = filter(es_coefs, round_num != 6),
    aes(label        = paste0(sprintf("%+.1f", estimate * 100), " pp"),
        colour       = post),
    size             = 2.8,
    fontface         = "plain",
    nudge_x          = 0.8,
    nudge_y          = 0,
    direction        = "y",
    hjust            = 0,
    segment.size     = 0.25,
    segment.colour   = "grey70",
    segment.linetype = "dotted",
    show.legend      = FALSE
  ) +

  # --- Region labels ---
  annotate("text", x = 2010.5, y = 0.54,
           label = "Pre-treatment", size = 2.6,
           colour = "grey45", fontface = "italic", hjust = 0.5) +
  annotate("text", x = 2019.5, y = 0.54,
           label = "Post-treatment", size = 2.6,
           colour = "grey45", fontface = "italic", hjust = 0.5) +

  # --- Mt. Coffee annotation ---
  annotate("text", x = 2016.7, y = -0.14,
           label = "Mt. Coffee\nonline (2016)",
           hjust = 0, size = 2.5,
           colour = "grey30", lineheight = 0.9) +

  # --- Scales ---
  scale_colour_manual(values = c("FALSE" = COL_PRE, "TRUE" = COL_POST)) +
  scale_fill_manual(  values = c("FALSE" = COL_PRE, "TRUE" = COL_POST)) +

  scale_x_continuous(
    breaks = c(2008, 2012, 2014, 2017, 2019, 2021),
    labels = c("2008", "2012", "2014\n(Reference Year)", "2017", "2019", "2021"),
    expand = expansion(mult = c(0.06, 0.12))   
  ) +

  scale_y_continuous(
    labels = function(x) paste0(round(x * 100), " pp"),
    expand = expansion(mult = 0.08)
  ) +

  coord_cartesian(ylim = c(-0.2, 0.6)) +

  labs(
    title    = "Event Study: Effect of Proximity to Grid Infrastructures on Household Electricity Access",
    subtitle = "10 km buffer treatment | Reference period: Round 6 (2014) | County + round FE, county-clustered SE",
    x        = NULL,
    y        = "Estimated effect (percentage points)",
    colour   = NULL, fill = NULL
  ) +

  theme_bw(base_size = 11) +
  theme(
    panel.grid.major.x = element_blank(),
    panel.grid.major.y = element_line(colour = "grey90", linewidth = 0.3),
    panel.grid.minor   = element_blank(),
    panel.border       = element_rect(colour = "grey80", linewidth = 0.4),

    axis.ticks         = element_line(colour = "grey70", linewidth = 0.3),
    axis.text          = element_text(size = 8.5, colour = "grey20"),
    axis.title.y       = element_text(size = 9, colour = "grey20",
                                      margin = margin(r = 8)),

    plot.title         = element_text(size = 10.5, face = "bold",
                                      colour = "grey10", margin = margin(b = 3)),
    plot.subtitle      = element_text(size = 8.5, colour = "grey40",
                                      margin = margin(b = 8)),
    plot.caption       = element_text(size = 7.5, colour = "grey45",
                                      hjust = 0, margin = margin(t = 6)),

    plot.margin        = margin(10, 14, 8, 10),
    legend.position    = "none"
  )


new_fig_es

Table 7: DiD and IV Effect on Electricity Access

Code

# main results table 

main_rows <- tribble(
  ~term,       ~`(1) Elec. 5km`, ~`(2) Elec. 10km`,
               ~`(3) Elec. 20km`, ~`(4) LPI`, ~`(5) LPI IV`,
  "County FE", "Yes", "Yes", "Yes", "Yes", "Yes",
  "Round FE",  "Yes", "Yes", "Yes", "Yes", "Yes"
)

modelsummary(
  list(
    "5km"  = did_elec_5,
    "10km" = did_elec_10,
    "20km" = did_elec_20,
    "LPI"        = did_lpi_10,
    "LPI IV"     = iv_single
  ),
  stars    = c("*" = 0.1, "**" = 0.05, "***" = 0.01),
  coef_map = c(
    "did_5km"         = "Treated × Post (5km)",
    "did_10km"        = "Treated × Post (10km)",
    "did_20km"        = "Treated × Post (20km)",
    "fit_electricity" = "Electricity Access (IV)",
    "urban"           = "Urban"
  ),
  gof_map = tribble(
    ~raw,         ~clean,            ~fmt,
    "nobs",       "Observations",    0,
    "r.squared",  "R²",              3,
    "ivf.stat",   "First-stage F",   1
  ),
  add_rows = main_rows,
  notes    = paste(
    "County-clustered SEs in parentheses.",
    "Treated = households in EAs centroid within Xkm of nearest operational 22/66KV line.",
    "Post = Afrobarometer rounds 7-9 (2017-2021).",
    "IV instrument: log(distance to nearest operational line).",
    "Source: Afrobarometer rounds 4–9, Liberia; authors' calculations."))

	5km	10km	20km	LPI	LPI IV
* p < 0.1, p < 0.05, * p < 0.01
County-clustered SEs in parentheses. Treated = households in EAs centroid within Xkm of nearest operational 22/66KV line. Post = Afrobarometer rounds 7-9 (2017-2021). IV instrument: log(distance to nearest operational line). Source: Afrobarometer rounds 4–9, Liberia; authors' calculations.
Treated × Post (5km)	0.533***
	(0.043)
Treated × Post (10km)		0.440***		-0.244***
		(0.040)		(0.065)
Treated × Post (20km)			0.349***
			(0.106)
Electricity Access (IV)					-0.621
					(0.622)
Urban	0.325***	0.329***	0.331***	-0.202***	0.009
	(0.041)	(0.054)	(0.058)	(0.027)	(0.205)
Observations	7126	7126	7126	7019	6988
R²	0.467	0.444	0.425	0.071	0.066
County FE	Yes	Yes	Yes	Yes	Yes
Round FE	Yes	Yes	Yes	Yes	Yes

Appendix D: Coefficient Plot for Multilevel Regression Analysis

Code

# coefficient plots with stars 


# Model list 
model_list <- list(
  "SES Only"        = model_1,
  "SES + Geography" = model_2,
  "Full Model"      = model_3
)

# Coefficient labels 
coef_labels_glmer <- c(
  "lpi_catLow lived poverty"      = "LPI: Low",
  "lpi_catModerate lived poverty" = "LPI: Moderate",
  "lpi_catHigh lived poverty"     = "LPI: High",
  "education_catMedium Education" = "Education: Medium",
  "education_catHigh Education"   = "Education: High",
  "employment1"                   = "Employed",
  "sexFemale"                     = "Female",
  "urbanityRural"                 = "Rural",
  "roundr5"                       = "Round 5",
  "roundr6"                       = "Round 6",
  "roundr7"                       = "Round 7",
  "roundr8"                       = "Round 8",
  "roundr9"                       = "Round 9"
)

# Colour + shape scales
model_colours <- c(
  "SES Only"        = "#E05C5C",
  "SES + Geography" = "#4CAF82",
  "Full Model"      = "#4C7EAF"
)

model_shapes <- c(
  "SES Only"        = 16,
  "SES + Geography" = 17,
  "Full Model"      = 15
)

# Extract plot data
pd <- modelplot(
  model_list,
  coef_map     = coef_labels_glmer,
  exponentiate = TRUE,
  draw         = FALSE
)

# Add significance & factor ordering directly 
pd <- pd |>
  mutate(
    sig = case_when(
      p.value < 0.01 ~ "p < 0.01",
      p.value < 0.05 ~ "p < 0.05",
      p.value < 0.10 ~ "p < 0.10",
      TRUE           ~ "Not significant"
    ),
    sig   = factor(sig, levels = c("p < 0.01", "p < 0.05", "p < 0.10", "Not significant")),
    term  = factor(term, levels = rev(unique(term))),
    model = factor(model, levels = names(model_list))
  )

# Build plot
p_glmer <- ggplot(pd, aes(x = estimate, y = term,
                           xmin = conf.low, xmax = conf.high,
                           colour = model, shape = model,
                           alpha  = sig)) +
  geom_vline(xintercept = 1, linetype = "dashed", colour = "grey40") +
  geom_pointrange(position = position_dodge(width = 0.5), size = 0.55) +
  scale_colour_manual(values = model_colours) +
  scale_shape_manual(values  = model_shapes) +
  scale_alpha_manual(
    values = c(
      "p < 0.01"        = 1.00,
      "p < 0.05"        = 1.00,
      "p < 0.10"        = 0.50,
      "Not significant" = 0.25
    ),
    name = "Significance"
  ) +
  scale_x_log10() +
  labs(
    title   = "Determinants of Household Electricity Access in Liberia",
    x       = "Odds Ratio",
    y       = NULL,
    colour  = "Model",
    shape   = "Model") +
  theme_bw(base_size = 12) +
  theme(
    legend.position  = "right",
    panel.grid.minor = element_blank(),
    axis.text.y      = element_text(hjust = 1),
    plot.title       = element_text(face = "bold", size = 13)
  )

p_glmer

Table 8: Robustness Check: Determinants of Electricity Access in Liberia

Code

# robustness check model 

modelsummary(
  list(
    "Fixed Effect" = model_fe_logit,
    "Excluding Montserrado" = model_no_capital
  ),
  
  exponentiate = TRUE,
  fmt = 2,
  
  coef_map = coef_labels,
  
  statistic = "({conf.low}, {conf.high})",
  
  stars = c('*' = .1, '**' = .05, '***' = .01),
  
  gof_map = data.frame(
  raw = c("nobs", "AIC", "BIC"),
  clean = c("Observations", "AIC", "BIC"),
  fmt = c(0, 1, 1)
),
  
  title = "Robustness Check: Determinants of Household Electricity Access in Liberia",
  
  notes = "Odds ratios reported. 95% confidence intervals in parentheses.")

Robustness Check: Determinants of Household Electricity Access in Liberia
	Fixed Effect	Excluding Montserrado
* p < 0.1, p < 0.05, * p < 0.01
Odds ratios reported. 95% confidence intervals in parentheses.
Low Lived Poverty	0.69	0.54***
	(0.44, 1.09)	(0.37, 0.80)
Moderate Lived Poverty	0.59*	0.43***
	(0.33, 1.06)	(0.29, 0.62)
High Lived Poverty	0.57*	0.43***
	(0.31, 1.07)	(0.29, 0.64)
Medium Education	1.46**	1.34***
	(1.02, 2.07)	(1.09, 1.66)
High Education	1.45**	1.62**
	(1.07, 1.95)	(1.12, 2.33)
Female	1.10***	1.13
	(1.03, 1.16)	(0.93, 1.37)
Rural (vs Urban)	0.08***	0.11***
	(0.03, 0.20)	(0.09, 0.13)
Round 5		3.05***
		(2.00, 4.65)
Round 6		10.44***
		(6.96, 15.65)
Round 7		1.69**
		(1.09, 2.64)
Round 8		6.41***
		(4.25, 9.66)
Round 9		9.56***
		(6.38, 14.32)
Observations	6790	4660

Appendix F: Coefficient Plot for DiD and IV Models

Code

# colors 
COL_MAIN <- "#4C7EAF"   # blue  — main / baseline specs
COL_ALT  <- "#C0392B"   # red   — alternative outcomes / IV
COL_ROB  <- "#4CAF82"   # green — robustness variants


# DID and IV Models 

# --- Model list ---
main_model_list <- list(
  "Elec. (5km)"  = did_elec_5,
  "Elec. (10km)" = did_elec_10,
  "Elec. (20km)" = did_elec_20,
  "LPI (10km)"   = did_lpi_10,
  "LPI (IV)"     = iv_single
)

# --- Coefficient labels ---
coef_labels_main <- c(
  "did_5km"         = "Treated × Post (5km)",
  "did_10km"        = "Treated × Post (10km)",
  "did_20km"        = "Treated × Post (20km)",
  "fit_electricity" = "Electricity Access (IV)",
  "urban"           = "Urban"
)

# --- Colour + shape by model ---
main_colours <- c(
  "Elec. (5km)"  = COL_MAIN,
  "Elec. (10km)" = COL_MAIN,
  "Elec. (20km)" = COL_MAIN,
  "LPI (10km)"   = COL_ALT,
  "LPI (IV)"     = COL_ALT
)

main_shapes <- c(
  "Elec. (5km)"  = 21,
  "Elec. (10km)" = 22,
  "Elec. (20km)" = 23,
  "LPI (10km)"   = 21,
  "LPI (IV)"     = 24
)

# --- Extract plot data ---
pd_main <- modelplot(
  main_model_list,
  coef_map = coef_labels_main,
  draw     = FALSE
) |>
  mutate(
    sig = case_when(
      p.value < 0.01 ~ "p < 0.01",
      p.value < 0.05 ~ "p < 0.05",
      p.value < 0.10 ~ "p < 0.10",
      TRUE           ~ "Not significant"
    ),
    sig   = factor(sig, levels = c("p < 0.01", "p < 0.05",
                                   "p < 0.10", "Not significant")),
    term  = factor(term, levels = rev(unique(term))),
    model = factor(model, levels = names(main_model_list))
  )

# --- Build plot ---
fig_main_coef <- ggplot(pd_main,
                        aes(x = estimate, y = term,
                            xmin = conf.low, xmax = conf.high,
                            colour = model, shape = model,
                            alpha  = sig)) +
  geom_vline(xintercept = 0, colour = "grey50",
             linewidth = 0.4, linetype = "dashed") +
  geom_linerange(position = position_dodge(width = 0.55),
                 linewidth = 0.6) +
  geom_point(aes(fill = model),
             position = position_dodge(width = 0.55),
             size = 3, stroke = 1.1) +
  scale_colour_manual(values = main_colours) +
  scale_fill_manual(  values = main_colours) +
  scale_shape_manual( values = main_shapes)  +
  scale_alpha_manual(
    values = c(
      "p < 0.01"        = 1.00,
      "p < 0.05"        = 1.00,
      "p < 0.10"        = 0.55,
      "Not significant" = 0.20
    ),
    name = "Significance"
  ) +
  labs(
    title    = "Main DiD and IV Results: Estimates of Treatment Effect",
    subtitle = "County + round FE throughout | County-clustered SE | 95% CIs shown",
    x        = "Coefficient estimate",
    y        = NULL,
    colour   = "Specification",
    shape    = "Specification",
    fill     = "Specification")+
  theme_bw(base_size = 11) +
  theme(
    panel.grid.major.x = element_line(colour = "grey90", linewidth = 0.3),
    panel.grid.major.y = element_blank(),
    panel.grid.minor   = element_blank(),
    panel.border       = element_rect(colour = "grey80", linewidth = 0.4),
    axis.ticks         = element_line(colour = "grey70", linewidth = 0.3),
    axis.text          = element_text(size = 8.5, colour = "grey20"),
    axis.title.x       = element_text(size = 9, colour = "grey20",
                                      margin = margin(t = 6)),
    plot.title         = element_text(size = 10.5, face = "bold",
                                      colour = "grey10", margin = margin(b = 3)),
    plot.subtitle      = element_text(size = 8.5, colour = "grey40",
                                      margin = margin(b = 8)),
    plot.caption       = element_text(size = 7.5, colour = "grey45",
                                      hjust = 0, margin = margin(t = 6)),
    legend.position    = "right",
    legend.text        = element_text(size = 8),
    legend.key.size    = unit(0.4, "cm"),
    plot.margin        = margin(10, 14, 8, 10)
  )


fig_main_coef

Table 9: Robustness of DiD Estimates

Code

# robustness check 

rob_rows <- tribble(
  ~term,                     ~`(1) Baseline`, ~`(2) Nearby cos.`,
                             ~`(3) Co. trends`, ~`(4) LPI DiD`, ~`(5) LPI IV`,
  "County FE",               "Yes", "Yes", "Yes", "Yes", "Yes",
  "Round FE",                "Yes", "Yes", "Yes", "Yes", "Yes",
  "County × round trends",   "No",  "No",  "Yes", "No",  "No",
  "Sample",                  "Full","Adjacent","Full","Full","Full"
)

modelsummary(
  list(
    "Baseline"    = did_elec_10,
    "Nearby Counties." = did_nearby,
    "County Trends"  = did_trends,
    "LPI DiD"     = did_lpi_10,
    "LPI IV"      = iv_single
  ),
  stars    = c("*" = 0.1, "**" = 0.05, "***" = 0.01),
  coef_map = c(
    "did_10km"        = "Treated × Post (10km)",
    "fit_electricity" = "Electricity Access (IV)",
    "urban"           = "Urban"
  ),
  gof_map = tribble(
    ~raw,        ~clean,          ~fmt,
    "nobs",      "Observations",  0,
    "r.squared", "R²",            3,
    "ivf.stat",  "First-stage F", 1
  ),
  add_rows = rob_rows,
  notes    = paste(
    "County-clustered SEs in parentheses.",
    "Col. (2) restricts to adjacent counties: Bomi, Margibi, Grand Bassa,",
    "Grand Cape Mount. Col. (3) adds county-specific linear time trends.",
    "IV Sargan-Hansen test p = 0.52 (instruments valid).",
    "LPI = Lived Poverty Index (higher = more deprived)."))

	Baseline	Nearby Counties.	County Trends	LPI DiD	LPI IV
* p < 0.1, p < 0.05, * p < 0.01
County-clustered SEs in parentheses. Col. (2) restricts to adjacent counties: Bomi, Margibi, Grand Bassa, Grand Cape Mount. Col. (3) adds county-specific linear time trends. IV Sargan-Hansen test p = 0.52 (instruments valid). LPI = Lived Poverty Index (higher = more deprived).
Treated × Post (10km)	0.440***	0.547***	0.295***	-0.244***
	(0.040)	(0.066)	(0.064)	(0.065)
Electricity Access (IV)					-0.621
					(0.622)
Urban	0.329***	0.336**	0.335***	-0.202***	0.009
	(0.054)	(0.120)	(0.056)	(0.027)	(0.205)
Observations	7126	3703	7126	7019	6988
R²	0.444	0.450	0.461	0.071	0.066
County FE	Yes	Yes	Yes	Yes	Yes
Round FE	Yes	Yes	Yes	Yes	Yes
County × round trends	No	No	Yes	No	No
Sample	Full	Adjacent	Full	Full	Full

Appendix H: Coefficient plot for robustness checks on DiD and IV

Code

# Robustness 

# models
rob_model_list <- list(
  "Baseline (10km)"       = did_elec_10,
  "Nearby counties"       = did_nearby,
  "County trends"         = did_trends,
  "LPI outcome"           = did_lpi_10,
  "LPI (IV)"              = iv_single
)

# Coefficient labels
coef_labels_rob <- c(
  "did_10km"        = "Treated × Post (10km)",
  "fit_electricity" = "Electricity Access (IV)",
  "urban"           = "Urban"
)

# Colour + shape by model 
rob_colours <- c(
  "Baseline (10km)"  = COL_MAIN,
  "Nearby counties"  = COL_ROB,
  "County trends"    = COL_ROB,
  "LPI outcome"      = COL_ALT,
  "LPI (IV)"         = COL_ALT
)

rob_shapes <- c(
  "Baseline (10km)"  = 21,
  "Nearby counties"  = 22,
  "County trends"    = 23,
  "LPI outcome"      = 21,
  "LPI (IV)"         = 24
)

# Extract plot data 
pd_rob <- modelplot(
  rob_model_list,
  coef_map = coef_labels_rob,
  draw     = FALSE
) |>
  mutate(
    sig = case_when(
      p.value < 0.01 ~ "p < 0.01",
      p.value < 0.05 ~ "p < 0.05",
      p.value < 0.10 ~ "p < 0.10",
      TRUE           ~ "Not significant"
    ),
    sig   = factor(sig, levels = c("p < 0.01", "p < 0.05",
                                   "p < 0.10", "Not significant")),
    term  = factor(term, levels = rev(unique(term))),
    model = factor(model, levels = names(rob_model_list))
  )

# Build plot 
fig_rob_coef <- ggplot(pd_rob,
                       aes(x = estimate, y = term,
                           xmin = conf.low, xmax = conf.high,
                           colour = model, shape = model,
                           alpha  = sig)) +
  geom_vline(xintercept = 0, colour = "grey50",
             linewidth = 0.4, linetype = "dashed") +
  geom_linerange(position = position_dodge(width = 0.55),
                 linewidth = 0.6) +
  geom_point(aes(fill = model),
             position = position_dodge(width = 0.55),
             size = 3, stroke = 1.1) +
  scale_colour_manual(values = rob_colours) +
  scale_fill_manual(  values = rob_colours) +
  scale_shape_manual( values = rob_shapes)  +
  scale_alpha_manual(
    values = c(
      "p < 0.01"        = 1.00,
      "p < 0.05"        = 1.00,
      "p < 0.10"        = 0.55,
      "Not significant" = 0.20
    ),
    name = "Significance"
  ) +
  labs(
    title    = "Robustness Checks: Treatment Effect Estimates",
    subtitle = "County + round FE throughout | County-clustered SE | 95% CIs shown",
    x        = "Coefficient estimate",
    y        = NULL,
    colour   = "Specification",
    shape    = "Specification",
    fill     = "Specification",
  ) +
  theme_bw(base_size = 11) +
  theme(
    panel.grid.major.x = element_line(colour = "grey90", linewidth = 0.3),
    panel.grid.major.y = element_blank(),
    panel.grid.minor   = element_blank(),
    panel.border       = element_rect(colour = "grey80", linewidth = 0.4),
    axis.ticks         = element_line(colour = "grey70", linewidth = 0.3),
    axis.text          = element_text(size = 8.5, colour = "grey20"),
    axis.title.x       = element_text(size = 9, colour = "grey20",
                                      margin = margin(t = 6)),
    plot.title         = element_text(size = 10.5, face = "bold",
                                      colour = "grey10", margin = margin(b = 3)),
    plot.subtitle      = element_text(size = 8.5, colour = "grey40",
                                      margin = margin(b = 8)),
    plot.caption       = element_text(size = 7.5, colour = "grey45",
                                      hjust = 0, margin = margin(t = 6)),
    legend.position    = "right",
    legend.text        = element_text(size = 8),
    legend.key.size    = unit(0.4, "cm"),
    plot.margin        = margin(10, 14, 8, 10)
  )

fig_rob_coef