Setup

Load Libraries

library(dplyr)
library(tidyverse)
library(ggplot2)
library(knitr)
library(readxl)
library(stringr)
library(countrycode)
library(data.table)
library(vtable)
library(corrplot)
library(ggcorrplot)
library(glmnet)
library(stargazer)
library(texreg)
library(marginaleffects)
library(modelsummary)
library(tinytex)

Import Data

### Import Case Data ###
# Citation: Gen, L., R. Z. Idris, A. U. Abubakar, and M. A. B. Omar (2024, May). Identity Dynamics and Diplomatic Narratives: Malaysian Chinese Perspectives. Revista de Gestão Social e Ambiental 18 (5), e07188. https://doi.org/10.24857/rgsa.v18n5-124. Accessed April 14, 2025.

case_source <- read_excel("Sources/WTO DS Case List.xlsx", sheet = 1) %>%
  select(1, 2, 3, 4, 5, 6, 7, 19, 20, 10, 11, 14, 15) %>%
  setNames(c("case", "comp", "resp", "title", "cons_req_date", "agreement", "policy", "mas_date", "mas_day", "panel_date", "panel_day", "report_date", "report_day"))


### Import Third Party Data ###
# Citation: Gen, L., R. Z. Idris, A. U. Abubakar, and M. A. B. Omar (2024, May). Identity Dynamics and Diplomatic Narratives: Malaysian Chinese Perspectives. Revista de Gestão Social e Ambiental 18 (5), e07188. https://doi.org/10.24857/rgsa.v18n5-124. Accessed April 14, 2025.

tp_source <- read_excel("Sources/WTO DS Case List.xlsx", sheet = 2) %>%
  select(1, 2) %>%
  setNames(c("case", "tp", "")) %>%
  separate('tp', into = paste0("tp", 1:35), sep = ",", fill = "right")


### Import DESTA Proximity Data ###
# Citation: Dür, A., L. Baccini, and M. Elsig (2014). The Design of International Trade Agreements: Introducing a New Dataset. The Review of International Organizations 9 (3), 353–375. Version 2.2 (2023). https://www.designoftradeagreements.org/downloads/. Accessed April 14, 2025.

proximity_source <- read_excel("Sources/Treaty Dyad List (DESTA).xlsx", sheet = 2) %>%
  select(1, 2, 6, 7, 11) %>%
  setNames(c("country1", "country2", "base_treaty", "name", "year"))


### Import DESTA Depth Data ###
# Citation: Dür, A., L. Baccini, and M. Elsig (2014). The Design of International Trade Agreements: Introducing a New Dataset. The Review of International Organizations 9 (3), 353–375. Version 2.2 (2023). https://www.designoftradeagreements.org/downloads/. Accessed April 14, 2025.
depth_source <- read_excel("Sources/Treaty Depth (DESTA).xlsx", sheet = 2) %>%
  select(2, 14)


# Import GDP Data ###
# Citation: World Bank (2024a). GDP (Current US$). World Development Indicators. https://databank.worldbank.org/reports.aspx?source=2&series=NY.GDP.MKTP.CD&country=#. Accessed April 14, 2025.

gdp_source <- read_excel("Sources/GDP (World Bank).xlsx", sheet = 1) %>%
  mutate(across(everything(), ~ na_if(.x, ".."))) %>%
  select(3, 5:27) %>%
  setNames(c("country", as.character(1995:2017)))


### Import GDP Per Capita Data ###
# Citation: World Bank (2024b). GDP Per Capita (Current US$). World Development Indicators. https://databank.worldbank.org/reports.aspx?source=2&series=NY.GDP.PCAP.CD&country=. Accessed April 14, 2025.

gdppc_source <- read_excel("Sources/GDP Per Capita (World Bank).xlsx", sheet = 1) %>%
  mutate(across(everything(), ~ na_if(.x, ".."))) %>%
  select(3, 5:27) %>%
  setNames(c("country", as.character(1995:2017)))


### Import V-Dem Data ###
# Citation: Coppedge, M., J. Gerring, C. H. Knutsen, S. I. Lindberg, J. Teorell, D. Altman, M. Bernhard, M. S. Fish, A. Glynn, A. Hicken, A. Lührmann, K. L. Marquardt, K. McMann, P. Paxton, D. Pemstein, B. Seim, R. Sigman, S.-E. Skaaning, J. Staton, A. Cornell, L. Gastaldi, H. Gjerløw, V. Mechkova, J. von Römer, A. Sundström, E. Tzelgov, L. Uberti, Y.-t. Wang, T. Wig, and D. Ziblatt (2024). V-dem [country-year] dataset v14. https://v-dem.net/data/the-v-dem-dataset/. Accessed April 14, 2025.

vdem_source <- read_csv("Sources/Libdem Score (VDem).csv") %>%
  select(1, 4, 27) %>%
  setNames(c("country", "year", "libdem"))
  

### Import Trade Data ###
# Citation: Barbieri, K. and O. M. G. O. Keshk (2016). Correlates of War Project Trade Data Set Codebook, Version 4.0. https://correlatesofwar.org/data-sets/bilateral-trade/. Accessed April 14, 2025.

trade_source <- read_csv("Sources/Trade Dyad (COW).csv") %>%
  select(3, 4, 5, 6, 7) %>%
  setNames(c("year", "country1", "country2", "one_two", "two_one"))


### Import Case Article and Issue Data ###
article_source <- read_excel("Sources/Case Article and Issue (Fawwaz Hafizh).xlsx", sheet = 1) %>%
  select (1, 4, 6)

Clean Data

### Use Country Code ###

eu_countries <- c("AUT", "BEL", "BGR", "HRV", "CYP", "CZE", "DNK", "EST", 
                  "FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA", "LVA", 
                  "LTU", "LUX", "MLT", "NLD", "POL", "PRT", "ROU", "SVK", 
                  "SVN", "ESP", "SWE")

convert_country_codes <- function(data, columns, origin = "country.name", destination = "iso3c") {
  for (col in columns) {
    data[[col]] <- countrycode(data[[col]], origin = origin, destination = destination, nomatch = NULL)
  }
  
  # If eu_countries is provided, label them as "EU"
  if (!is.null(eu_countries)) {
    data <- data %>%
      mutate(across(all_of(columns), ~ ifelse(. %in% eu_countries, "EU", .)))
  }
  
  return(data)
}
  
columns_to_convert1 <- c("comp", "resp")
case_clean <- convert_country_codes(case_source, columns_to_convert1)

columns_to_convert2 <- c(paste0("tp", 1:35))
tp_clean <- convert_country_codes(tp_source, columns_to_convert2)

columns_to_convert3 <- c("country1", "country2")
proximity_clean <- convert_country_codes(proximity_source, columns_to_convert3)
trade_clean <- convert_country_codes(trade_source, columns_to_convert3)

columns_to_convert4 <- c("country")
vdem_clean <- convert_country_codes(vdem_source, columns_to_convert4)
gdp_clean <- convert_country_codes(gdp_source, columns_to_convert4)
gdppc_clean <- convert_country_codes(gdppc_source, columns_to_convert4)


### Time Restriction: Cases Up to January 20, 2017 ###

case_clean <- case_clean %>%
  filter(panel_date < as.Date("2017-01-20") | (is.na(panel_date) & cons_req_date < as.Date("2017-01-20")))

vdem_clean <- vdem_clean %>%
  filter(year >= 1995 & year <= 2017)

trade_clean <- trade_clean %>%
  filter(year >= 1995 & year <= 2017)


### Recode Variables ###

standardize <- function(column) {
  (column - min(column, na.rm = TRUE)) / (max(column, na.rm = TRUE) - min(column, na.rm = TRUE))
}
depth_source$depth_idx <- standardize(depth_source$depth_rasch)
depth_clean <- depth_source

gdp_clean <- gdp_clean %>%
  pivot_longer(cols = 2:24, names_to = "year", values_to = "gdp")
gdp_clean$year <- as.numeric(gdp_clean$year)

gdppc_clean <- gdppc_clean %>%
  pivot_longer(cols = 2:24, names_to = "year", values_to = "gdp_percap")
gdppc_clean$year <- as.numeric(gdppc_clean$year)

article_source <- article_source %>%
  setNames(c("case", "xxii_cited", "systemic_cited"))

Create Master Dataset and Party Weights

master_unweighted <- left_join(case_clean, tp_clean, by = "case") %>%
  group_by(case) %>%
  summarise(
    resp = first(resp),
    title = first(title),
    cons_req_date = first(cons_req_date),
    agreement = first(agreement),
    policy = first(policy),
    mas_date = first(mas_date),
    mas_day = first(mas_day),
    panel_date = first(panel_date),
    panel_day = first(panel_day),
    report_date = first(panel_date),
    report_day = first(panel_day),
    across(starts_with("tp"), first),
    comp_list = list(unique(comp[!is.na(comp)]))
  ) %>%
  
  # Expand comp_list into multiple rows
  
  unnest_longer(comp_list, values_to = "comp") %>%
  group_by(case) %>%
  mutate(comp_idx = row_number()) %>%
  ungroup() %>%
  mutate(year = as.numeric(format(as.Date(cons_req_date, format = "%Y-%m-%d"), "%Y")))


### Create GDP of Third Parties ###

master_gdp <- master_unweighted

third_party_columns <- grep("^tp[0-9]+$", colnames(master_unweighted), value = TRUE)

for (gdp_col in third_party_columns) {
  gdp_col_name <- paste0(gdp_col, "_gdp")
  master_gdp <- master_gdp %>%
    left_join(gdp_clean, by = setNames(c("country", "year"), c(gdp_col, "year")), multiple = "first") %>%
    rename(!!gdp_col_name := gdp)
}

main_party_columns <- c("resp", "comp")

for (gdp_col in main_party_columns) {
  gdp_col_name <- paste0(gdp_col, "_gdp")
  master_gdp <- master_gdp %>%
    left_join(gdp_clean, by = setNames(c("country", "year"), c(gdp_col, "year")), multiple = "first") %>%
    rename(!!gdp_col_name := gdp)
}


### Create Weight Measures ###

tp_weight <- master_gdp
tp_gdp_columns <- grep("^tp[0-9]+_gdp$", colnames(tp_weight), value = TRUE)
tp_weight[tp_gdp_columns] <- lapply(tp_weight[tp_gdp_columns], as.numeric)

tp_weight <- tp_weight %>%
  mutate(total_tp_gdp = rowSums(select(., all_of(tp_gdp_columns)), na.rm = TRUE))

for (col in tp_gdp_columns) {
  weight_col_name <- gsub("_gdp", "_weight", col)
  tp_weight[[weight_col_name]] <- tp_weight[[col]] / tp_weight$total_tp_gdp
} 

tp_weight <- tp_weight %>%
  select("case", "comp_idx", "total_tp_gdp", paste0("tp", 1:35, "_weight"))


main_weight <- master_gdp
main_gdp_columns <- c("resp_gdp", "comp_gdp")
main_weight[main_gdp_columns] <- lapply(main_weight[main_gdp_columns], as.numeric)

main_weight <- main_weight %>%
  mutate(total_main_gdp = rowSums(select(., all_of(main_gdp_columns)), na.rm = TRUE))

for (col in main_gdp_columns) {
  weight_col_name <- paste0(col, "_weight")
  main_weight[[weight_col_name]] <- main_weight[[col]] / main_weight$total_main_gdp
} 

main_weight <- main_weight %>%
  select("case", "comp_idx", "resp_gdp", "comp_gdp", "total_main_gdp", "resp_gdp_weight", "comp_gdp_weight") %>%
  rename(
    "resp_weight" = "resp_gdp_weight",
    "comp_weight" = "comp_gdp_weight"
  )


master <- left_join(master_unweighted, tp_weight, by = c("case", "comp_idx")) %>%
  left_join(main_weight, by = c("case", "comp_idx"))

names(master) <- gsub("^(tp[0-9]+)$", "\\1_name", names(master))

write_csv(master, "Data/Master.csv")


tp_long <- master %>%
  select(case, year, comp, comp_idx, resp, matches("^tp[0-9]+_(name|weight)$")) %>%
  pivot_longer(
    cols = matches("^tp[0-9]+_(name|weight)$"),
    names_to = c("tp_idx", ".value"),
    names_pattern = "(tp[0-9]+)_(name|weight)"
  ) %>%
  select(case, comp_idx, year, resp, comp, tp_idx, name, weight) %>%
  rename(
    tp_name = name,
    tp_weight = weight
  )

write_csv(tp_long, "Data/TP Long List.csv")


main_long <- master %>%
  select(case, year, comp, comp_idx, resp, resp_weight, comp_weight) %>%
  pivot_longer(
    cols = ends_with("_weight"),
    names_to = c("main_idx", ".value"),
    names_pattern = "(.*)_(.*)"
  ) %>%
  select(case, comp_idx, year, resp, comp, main_idx, weight) %>%
  rename(
    resp_name = resp,
    comp_name = comp,
    main_weight = weight
  )

write_csv(main_long, "Data/MP Long List.csv")

main_long2 <- main_long %>%
  mutate(main_name = if_else(main_idx == "resp", resp_name, comp_name)) %>%
  select(-c("resp_name", "comp_name"))

Wrangle Data

Third Party Count and Dummy

### Count Third Parties ###

tp_count <- tp_long %>%
  group_by(case, comp_idx) %>%
  summarise(tp_count = sum(!is.na(tp_name))) %>%
  left_join(master, by = c("case", "comp_idx")) %>%
  select(case, comp_idx, tp_count)

write_csv(tp_count, "Data/TP Count.csv")

### Create Dummy for Third Parties ###

tp_dummy <- tp_count %>%
  mutate(tp_dummy = ifelse(tp_count > 0, 1, 0))

MTA Count and Depth

### Prepare for MTA Count and Depth ###

mtas <- left_join(proximity_clean, depth_clean, by = "base_treaty") %>%
  select(country1, country2, base_treaty, year, depth_idx) %>%
  filter(!is.na(year)) %>%
  mutate(year = as.integer(year))

years <- 1995:2017
all_years_pairs <- mtas %>%
  distinct(country1, country2) %>%
  crossing(year = years)

mtas_long <- mtas %>%
  inner_join(all_years_pairs, by = c("country1", "country2")) %>%
  filter(year.x <= year.y) %>%
  rename(treaty_year = year.x, year = year.y)

master_unweighted$year <- as.numeric(format(as.Date(master_unweighted$cons_req_date, format = "%Y-%m-%d"), "%Y"))

names(master) <- gsub("^(tp[0-9]+)$", "\\1_name", names(master))

write_csv(mtas_long, "Data/MTAs Long.csv")


### Find MTA Cumulative Count ###                       

# Compute cumulative count
cumulative_mta_count <- mtas_long %>%
  group_by(country1, country2, year) %>%
  summarise(mta_count = n_distinct(base_treaty), .groups = "drop")

cumulative_mta_count_full <- all_years_pairs %>%
  left_join(cumulative_mta_count, by = c("country1", "country2", "year")) %>%
  mutate(mta_count = replace_na(mta_count, 0)) %>%
  arrange(country1, country2, year)


# Evaluate Respondent - Third Party Cumulative MTA Depth #

tp_mtac_1 <- tp_long %>%
  left_join(cumulative_mta_count_full, by = c("resp" = "country1", "tp_name" = "country2", "year")) %>%
  mutate(mtac = mta_count * tp_weight) %>%
  select(case, comp_idx, mtac)

tp_mtac_2 <- tp_long %>%
  left_join(cumulative_mta_count_full, by = c("tp_name" = "country1", "resp" = "country2", "year")) %>%
  mutate(mtac = mta_count * tp_weight) %>%
  select(case, comp_idx, mtac)

resp_mtac <- bind_rows(tp_mtac_1, tp_mtac_2) %>%
  group_by(case, comp_idx) %>%
  summarise(total_resp_mtac = sum(mtac, na.rm = TRUE), .groups = "drop")

# Evaluate Complainant - Third Party Cumulative MTA Depth #

comp_mtac_1 <- tp_long %>%
  left_join(cumulative_mta_count_full, by = c("comp" = "country1", "tp_name" = "country2", "year")) %>%
  mutate(mtac = mta_count * tp_weight) %>%
  select(case, comp_idx, mtac)

comp_mtac_2 <- tp_long %>%
  left_join(cumulative_mta_count_full, by = c("tp_name" = "country1", "comp" = "country2", "year")) %>%
  mutate(mtac = mta_count * tp_weight) %>%
  select(case, comp_idx, mtac)

comp_mtac <- bind_rows(comp_mtac_1, comp_mtac_2) %>%
  group_by(case, comp_idx) %>%
  summarise(total_comp_mtac = sum(mtac, na.rm = TRUE), .groups = "drop")

# Evaluate Respondent - Complainant Cumulative MTA Depth #

main_mtac_1 <- main_long %>%
  left_join(cumulative_mta_count_full, by = c("comp_name" = "country1", "resp_name" = "country2", "year")) %>%
  mutate(mtac = mta_count) %>%
  select(case, comp_idx, mtac)

main_mtac_2 <- main_long %>%
  left_join(cumulative_mta_count_full, by = c("resp_name" = "country1", "comp_name" = "country2", "year")) %>%
  mutate(mtac = mta_count) %>%
  select(case, comp_idx, mtac)

main_mtac <- bind_rows(main_mtac_1, main_mtac_2) %>%
  group_by(case, comp_idx) %>%
  slice_tail(n = 1) %>%
  ungroup() %>%
  rename(total_main_mtac = mtac)


# Combine and Summarize Repondent and Complainant #

master_mta_count <- left_join(resp_mtac, comp_mtac, by = c("case", "comp_idx")) %>%
  left_join(main_mtac, by = c("case", "comp_idx")) %>%
  mutate(
    tp_total_resp_mtac = coalesce(total_resp_mtac, 0),
    tp_total_comp_mtac = coalesce(total_comp_mtac, 0),
    tp_total_mtac = tp_total_resp_mtac + tp_total_comp_mtac,
    main_total_mtac = coalesce(total_main_mtac, 0),
    total_mtac = tp_total_mtac + main_total_mtac,      ### ADDED THIS!
    mtac_all_ratio = tp_total_mtac/total_mtac,
    mtac_tp_mp_ratio = tp_total_mtac/main_total_mtac
  )

master_mta_count$mtac_tp_mp_ratio[is.nan(master_mta_count$mtac_tp_mp_ratio)] <- 0
master_mta_count$mtac_tp_mp_ratio[is.infinite(master_mta_count$mtac_tp_mp_ratio)] <- max(master_mta_count$mtac_tp_mp_ratio[is.finite(master_mta_count$mtac_tp_mp_ratio)], na.rm = TRUE)

write_csv(master_mta_count, "Data/MTA Count.csv")


### Find Cumulative MTA Depth ###

# Compute cumulative depth
cumulative_mta_depth <- mtas_long %>%
  group_by(country1, country2, year) %>%
  summarise(cumulative_depth_idx = sum(depth_idx, na.rm = TRUE), .groups = "drop")

cumulative_mta_depth_full <- all_years_pairs %>%
  left_join(cumulative_mta_depth, by = c("country1", "country2", "year")) %>%
  mutate(cumulative_depth_idx = replace_na(cumulative_depth_idx, 0)) %>%
  arrange(country1, country2, year)


# Evaluate Respondent - Third Party Cumulative MTA Depth #

tp_mtad_1 <- tp_long %>%
  left_join(cumulative_mta_depth_full, by = c("resp" = "country1", "tp_name" = "country2", "year")) %>%
  mutate(mtad = cumulative_depth_idx * tp_weight) %>%
  select(case, comp_idx, mtad)

tp_mtad_2 <- tp_long %>%
  left_join(cumulative_mta_depth_full, by = c("tp_name" = "country1", "resp" = "country2", "year")) %>%
  mutate(mtad = cumulative_depth_idx * tp_weight) %>%
  select(case, comp_idx, mtad)

resp_mtad <- bind_rows(tp_mtad_1, tp_mtad_2) %>%
  group_by(case, comp_idx) %>%
  summarise(total_resp_mtad = sum(mtad, na.rm = TRUE), .groups = "drop")

# Evaluate Complainant - Third Party Cumulative MTA Depth #

comp_mtad_1 <- tp_long %>%
  left_join(cumulative_mta_depth_full, by = c("comp" = "country1", "tp_name" = "country2", "year")) %>%
  mutate(mtad = cumulative_depth_idx * tp_weight) %>%
  select(case, comp_idx, mtad)

comp_mtad_2 <- tp_long %>%
  left_join(cumulative_mta_depth_full, by = c("tp_name" = "country1", "comp" = "country2", "year")) %>%
  mutate(mtad = cumulative_depth_idx * tp_weight) %>%
  select(case, comp_idx, mtad)

comp_mtad <- bind_rows(comp_mtad_1, comp_mtad_2) %>%
  group_by(case, comp_idx) %>%
  summarise(total_comp_mtad = sum(mtad, na.rm = TRUE), .groups = "drop")

# Evaluate Respondent - Complainant Cumulative MTA Depth #

main_mtad_1 <- main_long %>%
  left_join(cumulative_mta_depth_full, by = c("comp_name" = "country1", "resp_name" = "country2", "year")) %>%
  mutate(mtad = cumulative_depth_idx) %>%
  select(case, comp_idx, mtad)

main_mtad_2 <- main_long %>%
  left_join(cumulative_mta_depth_full, by = c("resp_name" = "country1", "comp_name" = "country2", "year")) %>%
  mutate(mtad = cumulative_depth_idx) %>%
  select(case, comp_idx, mtad)

main_mtad <- bind_rows(main_mtad_1, main_mtad_2) %>%
  group_by(case, comp_idx) %>%
  slice_tail(n = 1) %>%
  ungroup() %>%
  rename(total_main_mtad = mtad)

# Combine and Summarize Repondent and Complainant #

master_mta_depth <- left_join(resp_mtad, comp_mtad, by = c("case", "comp_idx")) %>%
  left_join(main_mtad, by = c("case", "comp_idx")) %>%
  mutate(
    tp_total_resp_mtad = coalesce(total_resp_mtad, 0),
    tp_total_comp_mtad = coalesce(total_comp_mtad, 0),
    tp_total_mtad = tp_total_resp_mtad + tp_total_comp_mtad,
    main_total_mtad = coalesce(total_main_mtad, 0),
    total_mtad = tp_total_mtad + main_total_mtad,      
    mtad_all_ratio = tp_total_mtad/total_mtad,
    mtad_tp_mp_ratio = tp_total_mtad/main_total_mtad
  )

master_mta_depth$mtad_tp_mp_ratio[is.nan(master_mta_depth$mtad_tp_mp_ratio)] <- 0
master_mta_depth$mtad_tp_mp_ratio[is.infinite(master_mta_depth$mtad_tp_mp_ratio)] <- max(master_mta_depth$mtad_tp_mp_ratio[is.finite(master_mta_depth$mtad_tp_mp_ratio)], na.rm = TRUE)

write_csv(master_mta_depth, "Data/MTA Depth.csv")

Trade Volume

### Prepare for Trade Measures ###

trade_volume <- trade_clean %>%
  mutate(total = one_two + two_one) %>%
  group_by(country1, country2, year) %>%
  summarize(total_trade = sum(total)) %>%
  ungroup()

trade_gap <- trade_clean %>%
  mutate(total_gap = abs(one_two - two_one)) %>%
  group_by(country1, country2, year) %>%
  summarize(total_gap = sum(total_gap)) %>%
  ungroup()

trade_clean2 <- left_join(trade_volume, trade_gap, by = c("country1", "country2", "year"))


### Evaluate Respondent - Third Party Trade Measures ###

resp_trade_1 <- tp_long %>%
  left_join(trade_clean2, by = c("resp" = "country1", "tp_name" = "country2", "year")) %>%
  mutate(sum_resp_trade = total_trade * tp_weight) %>%
  mutate(gap_resp_trade = total_gap * tp_weight) %>%
  select(case, comp_idx, sum_resp_trade, gap_resp_trade)

resp_trade_2 <- tp_long %>%
  left_join(trade_clean2, by = c("tp_name" = "country1", "resp" = "country2", "year")) %>%
  mutate(sum_resp_trade = total_trade * tp_weight) %>%
  mutate(gap_resp_trade = total_gap * tp_weight) %>%
  select(case, comp_idx, sum_resp_trade, gap_resp_trade)

resp_tp_trade <- bind_rows(resp_trade_1, resp_trade_2) %>%
  group_by(case, comp_idx) %>%
  summarise(
    total_resp_volume = sum(sum_resp_trade, na.rm = TRUE),
    total_resp_gap = sum(gap_resp_trade, na.rm = TRUE),
    .groups = "drop"
  )


### Evaluate Complainant - Third Party Trade Measures ###

comp_trade_1 <- tp_long %>%
  left_join(trade_clean2, by = c("comp" = "country1", "tp_name" = "country2", "year")) %>%
  mutate(sum_comp_trade = total_trade * tp_weight) %>%
  mutate(gap_comp_trade = total_gap * tp_weight) %>%
  select(case, comp_idx, sum_comp_trade, gap_comp_trade)

comp_trade_2 <- tp_long %>%
  left_join(trade_clean2, by = c("tp_name" = "country1", "comp" = "country2", "year")) %>%
  mutate(sum_comp_trade = total_trade * tp_weight) %>%
  mutate(gap_comp_trade = total_gap * tp_weight) %>%
  select(case, comp_idx, sum_comp_trade, gap_comp_trade)

comp_tp_trade <- bind_rows(comp_trade_1, comp_trade_2) %>%
  group_by(case, comp_idx) %>%
  summarise(
    total_comp_volume = sum(sum_comp_trade, na.rm = TRUE), 
    total_comp_gap = sum(gap_comp_trade, na.rm = TRUE), 
    .groups = "drop"
    )

### Evaluate Respondent - Complainant Trade Measures ###

main_trade_1 <- main_long %>%
  left_join(trade_clean2, by = c("comp_name" = "country1", "resp_name" = "country2", "year")) %>%
  mutate(sum_main_trade = total_trade) %>%
  mutate(gap_main_trade = total_gap) %>%
  select(case, comp_idx, sum_main_trade, gap_main_trade)

main_trade_2 <- main_long %>%
  left_join(trade_clean2, by = c("resp_name" = "country1", "comp_name" = "country2", "year")) %>%
  mutate(sum_main_trade = total_trade) %>%
  mutate(gap_main_trade = total_gap) %>%
  select(case, comp_idx, sum_main_trade, gap_main_trade)

main_trade <- bind_rows(main_trade_1, main_trade_2) %>%
  group_by(case, comp_idx) %>%
  summarise(
    total_main_volume = sum(sum_main_trade, na.rm = TRUE), 
    total_main_gap = sum(gap_main_trade, na.rm = TRUE), 
    .groups = "drop"
    )

### Combine and Summarize Repondent and Complainant ###

master_trade <- full_join(resp_tp_trade, comp_tp_trade, by = c("case", "comp_idx")) %>%
  left_join(main_trade, by = c("case", "comp_idx")) %>%
  mutate(
    total_resp_volume = coalesce(total_resp_volume, 0),
    total_comp_volume = coalesce(total_comp_volume, 0),
    total_tp_volume = total_resp_volume + total_comp_volume,
    total_main_volume = coalesce(total_main_volume, 0),
    total_volume = total_tp_volume + total_main_volume,      
    trade_volume_all_ratio = total_tp_volume/total_volume,
    trade_volume_tp_mp_ratio = total_tp_volume/total_main_volume,
    log_main_volume = log(total_main_volume),
    
    total_resp_gap = coalesce(total_resp_gap, 0),
    total_comp_gap = coalesce(total_comp_gap, 0),
    total_tp_gap = total_resp_gap + total_comp_gap,
    total_main_gap = coalesce(total_main_gap, 0),
    trade_gap_index = log(1+(total_tp_gap/total_main_gap)),
    log_main_gap = log(total_main_gap)
  )

write_csv(master_trade, "Data/Trade Volume and Gap.csv")

Liberal Democracy

### Code Regime Type ###

vdem_clean <- vdem_clean %>% 
  select(country, year, libdem) %>%
  distinct()

tp_libdem <- tp_long %>%
  left_join(vdem_clean, by = c("tp_name" = "country", "year")) %>%
  mutate(tp_libdem = libdem * tp_weight) %>%
  group_by(case, comp_idx) %>%
  summarise(total_tp_libdem = sum(tp_libdem, na.rm = TRUE), .groups = "drop")

main_libdem <- main_long2 %>%
  left_join(vdem_clean, by = c("main_name" = "country", "year")) %>%
  mutate(main_libdem = libdem * main_weight) %>%
  group_by(case, comp_idx) %>%
  summarise(total_main_libdem = sum(main_libdem, na.rm = TRUE), .groups = "drop")

main_libdem_ratio <- main_long2 %>%
  left_join(vdem_clean, by = c("main_name" = "country", "year")) %>%
  mutate(main_libdem = libdem) %>%
  group_by(case, comp_idx, main_idx) %>%
  summarise(total_libdem = sum(main_libdem, na.rm = TRUE), .groups = "drop") %>%
  pivot_wider(names_from = main_idx, values_from = total_libdem, names_prefix = "libdem_") %>%
  mutate(ratio_main_libdem = libdem_resp / libdem_comp)

master_libdem <- full_join(tp_libdem, main_libdem, by = c("case", "comp_idx")) %>%
  mutate(libdem_index = total_tp_libdem/total_main_libdem) %>%
  left_join(main_libdem_ratio, by = c("case", "comp_idx"))

master_libdem$ratio_main_libdem[is.nan(master_libdem$ratio_main_libdem)] <- 0
master_libdem$ratio_main_libdem[is.infinite(master_libdem$ratio_main_libdem)] <- max(master_libdem$ratio_main_libdem[is.finite(master_libdem$ratio_main_libdem)], na.rm = TRUE)

write_csv(master_libdem, "Data/Libdem Score.csv")

Case and Control Variables

### Code Case Outcome ###

case_outcome <- master %>%
  mutate(
    settle = ifelse(!is.na(mas_date) | !is.na(mas_day), 1, 0),
    early_settle = ifelse((!is.na(mas_date) | !is.na(mas_day)) & (is.na(panel_date) | !is.na(panel_day)), 1, 0),
    no_settle = ifelse(is.na(mas_date) & is.na(mas_day), 1, 0)
  ) %>%
  select(case, comp_idx, settle, no_settle)

write_csv(case_outcome, "Data/Case Outcome.csv")


### Code Case Complexity ###

complexity <- master %>%
  unnest_wider(agreement, names_sep = "") 

complexity_raw <- master %>%
  select(case, comp_idx, agreement) %>%
  setNames(c("case", "comp_idx", "agreement", "")) %>%
  separate('agreement', into = paste0("agreement", 1:6), sep = ", ", fill = "right")

complexity_expand <- complexity_raw %>%
  pivot_longer(cols = paste0("agreement", 1:6), names_to = "agreement_idx", values_to = "agreement_name") %>%
  filter(!is.na(agreement_name) | (is.na(agreement_name) & agreement_idx == "agreement1"))

case_complexity <- complexity_expand %>%
  group_by(case, comp_idx) %>%
  summarise(complexity = sum(!is.na(agreement_name))) %>%
  left_join(master, by = c("case", "comp_idx")) %>%
  select(case, comp_idx, complexity)

write_csv(case_complexity, "Data/Case Complexity.csv")


### Code Article XXII ###

case_article_xxii <- master %>%
  left_join(article_source) %>%
  select(case, comp_idx, xxii_cited)

write_csv(case_article_xxii, "Data/Case Article XXII Cited.csv")


### Code Systemic Issues ###

case_systemic_issue <- master %>%
  left_join(article_source) %>%
  select(case, comp_idx, systemic_cited)

write_csv(case_systemic_issue, "Data/Case Systemic Issue Cited.csv")


### Code Disputed Policy ###

categorical_policy <- master %>%
  select(case, comp_idx, policy)

categorical_policy <- categorical_policy %>%
  mutate(policy_group = case_when(
    # Group 1: Discrimination
    grepl("discriminat|preferential|gsp|origin", policy, ignore.case = TRUE) ~ 1,
    # Group 2: Export, Import, Technical, Safeguards Barriers
    grepl("export|import|ban|restriction|quota|licensing|customs valuation|customs|technical|safeguard|transit|quantitative", 
          policy, ignore.case = TRUE) ~ 2,
    # Group 3: Subsidies, Anti-dumping
    grepl("subsid|anti-dumping|dumping|countervailing|zeroing|sunset review|national", 
          policy, ignore.case = TRUE) ~ 3,
    # Group 4: Intellectual Property
    grepl("intellectual property|trips|patent|copyright|geographical indication", 
          policy, ignore.case = TRUE) ~ 4,
    # Group 5: Goods, Services, Investments
    grepl("goods|services|investment|tariff|market access|content requirement|visa|tax|taxation", 
          policy, ignore.case = TRUE) ~ 5,
    TRUE ~ 0
  ))

dummy_policy <- categorical_policy %>%
  mutate(
    policy_discrimination = if_else(policy_group == 1, 1, 0),
    policy_barriers = if_else(policy_group == 2, 1, 0),
    policy_subsidies = if_else(policy_group == 3, 1, 0),
    policy_ip = if_else(policy_group == 4, 1, 0),
    policy_gs = if_else(policy_group == 5, 1, 0)
  )

case_policy <- dummy_policy

write_csv(case_policy, "Data/Case Policy.csv")

Analysis

Combine Dataset

### Combine Dataset ###

final_master <- left_join(master, case_outcome, by = c("case", "comp_idx")) %>%
  left_join(tp_count) %>%
  left_join(tp_dummy) %>%
  left_join(master_mta_count) %>%
  left_join(master_mta_depth) %>%
  left_join(master_trade) %>%
  left_join(master_legal) %>%
  left_join(master_libdem) %>%
  left_join(case_complexity) %>%
  left_join(case_article_xxii) %>%
  left_join(case_systemic_issue) %>%
  left_join(case_policy) %>%
  select(case, comp_idx, settle, tp_count, tp_dummy, 
         tp_total_mtac, mtac_all_ratio, mtac_tp_mp_ratio, 
         tp_total_mtad, mtad_all_ratio, mtad_tp_mp_ratio,
         total_tp_volume, trade_volume_all_ratio, trade_volume_tp_mp_ratio,
         total_tp_legal, legal_index,
         total_tp_libdem, libdem_index,
         complexity, xxii_cited, systemic_cited, 
         policy_group, policy_discrimination, policy_barriers, policy_subsidies, policy_ip, policy_gs) %>%
  drop_na() %>%
  filter(tp_dummy == 1) %>%
  write_csv("Data/Final Dataset Unscaled.csv") %>%
  mutate(across(4:27, ~ scale(.)[,1])) %>%
  mutate(across(3:27, ~ as.numeric(.))) %>%
  write_csv("Data/Final Dataset.csv") %>%
  select(-tp_dummy)

final_master_unscaled <- read_csv("Data/Final Dataset Unscaled.csv") %>%
  select(-tp_dummy)

final_master_unscaled_df <- as.data.frame(final_master_unscaled) %>%
  select(-case, -comp_idx) %>%
  rename(
    "Early Settlement" = settle, 
    "Third Party Count" = tp_count, 
    "TP Raw MTA Count Total" = tp_total_mtac, 
    "TP to AP MTA Count Ratio" = mtac_all_ratio, 
    "TP to MP MTA Count Ratio" = mtac_tp_mp_ratio,
    "TP Raw MTA Depth Total" = tp_total_mtad, 
    "TP to AP MTA Depth Ratio" = mtad_all_ratio, 
    "TP to MP MTA Depth Ratio" = mtad_tp_mp_ratio,
    "TP Raw Trade Volume Total*" = total_tp_volume, 
    "TP to AP Trade Volume Ratio" = trade_volume_all_ratio, 
    "TP to MP Trade Volume Ratio" = trade_volume_tp_mp_ratio,
    "Average TP Legal Capacity**" = total_tp_legal, 
    "TP to MP Legal Capacity Ratio" = legal_index,
    "Average TP LibDem Score" = total_tp_libdem, 
    "TP to MP LibDem Score Ratio" = libdem_index,
    "Case Complexity" = complexity, 
    "Article XXII Cited" = xxii_cited, 
    "Systemic Issue Cited" = systemic_cited, 
    "Policy Group" = policy_group,
    "Discriminative Policy" = policy_discrimination, 
    "Trade Barrier Policy" = policy_barriers, 
    "Subsidies Policy" = policy_subsidies, 
    "Intellectual Property Policy" = policy_ip, 
    "Goods Services Policy" = policy_gs
  ) 

stargazer(final_master_unscaled_df, type= "latex", title = "Summary Statistics", summary = TRUE, median = TRUE, iqr = TRUE, digits = 3, out = "Output/Summary Statistics.tex", column.sep.width = "5pt", notes = "TP=Third Parties; AP=All Parties; MP=Main Parties; LibDem=Liberal Democracy. *Trade in millions of 2014 US dollars. **GDP Per Capita in 2024 US dollars.")

final_master_df <- as.data.frame(final_master) %>%
  select(-case, -comp_idx) %>%
  rename(
    "Early Settlement" = settle, 
    "Third Party Count" = tp_count, 
    "TP Raw MTA Count Total" = tp_total_mtac, 
    "TP to AP MTA Count Ratio" = mtac_all_ratio, 
    "TP to MP MTA Count Ratio" = mtac_tp_mp_ratio,
    "TP Raw MTA Depth Total" = tp_total_mtad, 
    "TP to AP MTA Depth Ratio" = mtad_all_ratio, 
    "TP to MP MTA Depth Ratio" = mtad_tp_mp_ratio,
    "TP Raw Trade Volume Total" = total_tp_volume, 
    "TP to AP Trade Volume Ratio" = trade_volume_all_ratio, 
    "TP to MP Trade Volume Ratio" = trade_volume_tp_mp_ratio,
    "Average TP Legal Capacity" = total_tp_legal, 
    "TP to MP Legal Capacity Ratio" = legal_index,
    "Average TP LibDem Score" = total_tp_libdem, 
    "TP to MP LibDem Score Ratio" = libdem_index,
    "Case Complexity" = complexity, 
    "Article XXII Cited" = xxii_cited, 
    "Systemic Issue Cited" = systemic_cited, 
    "Policy Group" = policy_group,
    "Discriminative Policy" = policy_discrimination, 
    "Trade Barrier Policy" = policy_barriers, 
    "Subsidies Policy" = policy_subsidies, 
    "Intellectual Property Policy" = policy_ip, 
    "Goods Services Policy" = policy_gs
  )

stargazer(final_master_df, type= "latex", title = "Standardized Summary Statistics", summary = TRUE, median = TRUE, iqr = TRUE, digits = 3, out = "Output/Standardized Summary Statistics.tex", column.sep.width = "5pt", notes = "TP=Third Parties; AP=All Parties; MP=Main Parties; LibDem=Liberal Democracy")

Check Correlations

### Check Variable Correlations ###

cor_data <- final_master %>%
  select(where(is.numeric)) %>%
  select(-comp_idx)


# Run together - From here #

cor_matrix <- cor(cor_data, use = "complete.obs")

colnames(cor_matrix) <- c("Early Settlement", "TP Count", 
                          "TP Raw MTA Count Total", "TP-AP MTA Count Ratio", "TP-MP MTA Count Ratio", 
                          "TP Raw MTA Depth Total", "TP-AP MTA Depth Ratio", "TP-MP MTA Depth Ratio",
                          "TP Raw Trade Volume Total", "TP-AP Trade Volume Ratio", "TP-MP Trade Volume Ratio",
                          "Ave TP Legal Capacity", "TP-MP Legal Capacity Ratio", "Ave TP Liberal Democracy Score", "TP-MP Liberal Democracy Score Ratio", 
                          "Case Complexity", "Article XXII Cited", "Systemic Issue Cited", 
                          "Policy Group", "Discriminative Policy", "Trade Barrier Policy", "Subsidies Policy", "Intellectual Property Policy", "Goods Services Policy")
rownames(cor_matrix) <- colnames(cor_matrix)

png("Output/Correlation Matrix.png", width = 8000, height = 6000, res = 100)

par(family = "serif")

corrplot(cor_matrix, method = "color", type = "upper",
         tl.cex = 5.5, number.cex = 5, cl.cex = 5.5, 
         tl.col = "black", tl.srt = 45, 
         addCoef.col = "black",
         addgrid.col = "black"
         )

# Run together - To here #


cor_matrix_df <- as.data.frame(cor_matrix)

write_csv(cor_matrix_df, "Data/Correlation Matrix.csv")

Run Probit Regression

### MTA Count Models (1-3) ###

model_tp_mtac <- glm(settle ~ tp_count + tp_total_mtac + legal_index + libdem_index + complexity + xxii_cited + systemic_cited + policy_group, data = final_master, family = "binomial"(link = "probit"))

model_mtac_all_ratio <- glm(settle ~ tp_count + mtac_all_ratio + legal_index + libdem_index + complexity + xxii_cited + systemic_cited + policy_group, data = final_master, family = "binomial"(link = "probit"))

model_mtac_tp_mp_ratio <- glm(settle ~ tp_count + mtac_tp_mp_ratio + legal_index + libdem_index + complexity + xxii_cited + systemic_cited + policy_group, data = final_master, family = "binomial"(link = "probit"))


### MTA Depth Models (4-6) ###

model_tp_mtad <- glm(settle ~ tp_count + tp_total_mtad + legal_index + libdem_index + complexity + xxii_cited + systemic_cited + policy_group, data = final_master, family = "binomial"(link = "probit"))

model_mtad_all_ratio <- glm(settle ~ tp_count + mtad_all_ratio + legal_index + libdem_index + complexity + xxii_cited + systemic_cited + policy_group, data = final_master, family = "binomial"(link = "probit"))

model_mtad_tp_mp_ratio <- glm(settle ~ tp_count + mtad_tp_mp_ratio + legal_index + libdem_index + complexity + xxii_cited + systemic_cited + policy_group, data = final_master, family = "binomial"(link = "probit"))


### Trade Volume Models (7-9) ###

model_tp_trade <- glm(settle ~ tp_count + total_tp_volume + legal_index + libdem_index + complexity + xxii_cited + systemic_cited + policy_group, data = final_master, family = "binomial"(link = "probit"))

model_trade_all_ratio <- glm(settle ~ tp_count + trade_volume_all_ratio + legal_index + libdem_index + complexity + xxii_cited + systemic_cited + policy_group, data = final_master, family = "binomial"(link = "probit"))

model_trade_tp_mp_ratio <- glm(settle ~ tp_count + trade_volume_tp_mp_ratio + legal_index + libdem_index + complexity + xxii_cited + systemic_cited + policy_group, data = final_master, family = "binomial"(link = "probit"))


### Collect all models ###

models <- list(
  model_tp_mtac, model_mtac_all_ratio, model_mtac_tp_mp_ratio,
  model_tp_mtad, model_mtad_all_ratio, model_mtad_tp_mp_ratio,
  model_tp_trade, model_trade_all_ratio, model_trade_tp_mp_ratio
)

Create Regression Table

texreg(l = models,
       file = "Output/Regression Results.tex",
       stars = c(0.001, 0.01, 0.05),
       caption.above = TRUE,
       caption = "Regression Results",
       label = "Regression Results",
       booktabs = TRUE,
       scalebox = 0.8,
       custom.model.names = c(
        "(1)",
        "(2)",
        "(3)",
        "(4)",
        "(5)",
        "(6)",
        "(7)",
        "(8)",
        "(9)"
       ),
       custom.header = list("MTA Count" = 1:3, "MTA Depth" = 4:6, "Trade Volume" = 7:9),
       custom.coef.map = list(
         "(Intercept)" = "Constant",
         "tp_count" = "Third Party Count",
         "tp_total_mtac" = "Third Party Raw Total",
         "mtac_all_ratio" = "TP to All Party Ratio",
         "mtac_tp_mp_ratio" = "TP to Main Party Ratio",
         "tp_total_mtad" = "Third Party Raw Total",
         "mtad_all_ratio" = "TP to All Party Ratio",
         "mtad_tp_mp_ratio" = "TP to Main Party Ratio",
         "total_tp_volume" = "Third Party Raw Total",
         "trade_volume_all_ratio" = "TP to All Party Ratio",
         "trade_volume_tp_mp_ratio" = "TP to Main Party Ratio",
         "legal_index" = "TP to MP Legal Capacity Ratio",
         "libdem_index" = "TP to MP LibDem Score Ratio",
         "complexity" = "Case Complexity",
         "xxii_cited" = "Article XXII Cited",
         "systemic_cited" = "Systemic Issue Cited",
         "policy_group" = "Policy Group"
       ),
       custom.note = "%stars. TP=Third Parties; MP=Main Parties; LibDem=Liberal Democracy.",
       digits = 3
)

Evaluate Marginal Effects

### Average Marginal Effects and Marginal Effects at the Mean ###

ame_results <- lapply(models, avg_slopes)
modelsummary(ame_results)


### Rename Models ###

model_names <- c(
  "Model (1)",
  "Model (2)",
  "Model (3)",
  "Model (4)",
  "Model (5)",
  "Model (6)",
  "Model (7)",
  "Model (8)",
  "Model (9)"
)

for (i in seq_along(ame_results)) {
  ame_results[[i]]$model_name <- model_names[i]
}


### Create Dataset ###

ame_mtac_df <- bind_rows(ame_results) %>%
  slice(1:24)

ame_mtad_df <- bind_rows(ame_results) %>%
  slice(25:48)

ame_trade_df <- bind_rows(ame_results) %>%
  slice(49:72)


### Visualize MTA count ###

figure_3 <- ggplot(ame_mtac_df %>% filter(term == "tp_total_mtac" | term == "mtac_all_ratio" | term == "mtac_tp_mp_ratio") %>%
  mutate(term = recode(term,
    "tp_total_mtac" = "Third Party Raw Total",
    "mtac_all_ratio" = " Third Party to All Party Ratio",
    "mtac_tp_mp_ratio" = "  Third Party to Main Party Ratio"
  )), 
  aes(x = estimate, y = term, color = model_name)) +
  geom_vline(xintercept = 0, color = "red", linetype = "dashed") +
  geom_point(position = position_dodge(width = 0.7)) +
  geom_errorbar(aes(xmin = conf.low, xmax = conf.high), width = 0.2, position = position_dodge(width = 0.7)) +
  labs(
    x = "Marginal Effects (Change in Settlement Probability)",
    y = "MTA Count Variables",
    color = "Model"
  ) +
  theme_bw(base_family = "serif") +
  theme(
    plot.title = element_text(size = 16),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 12),
    legend.title = element_text(size = 13),
    legend.text = element_text(size = 11),
    axis.title.x = element_text(margin = margin(t = 20)),
    axis.title.y = element_text(margin = margin(r = 10))
    )

ggsave("Output/Marginal Effects Across MTA Count Models.png", plot = figure_3, width = 10, height = 4, units = "in")


### Visualize MTA Depth ###

figure_4 <- ggplot(ame_mtad_df %>% filter(term == "tp_total_mtad" | term == "mtad_all_ratio" | term == "mtad_tp_mp_ratio") %>%
  mutate(term = recode(term,
    "tp_total_mtad" = "Third Party Raw Total",
    "mtad_all_ratio" = " Third Party to All Party Ratio",
    "mtad_tp_mp_ratio" = "  Third Party to Main Party Ratio"
  )), 
  aes(x = estimate, y = term, color = model_name)) +
  geom_vline(xintercept = 0, color = "red", linetype = "dashed") +
  geom_point(position = position_dodge(width = 0.7)) +
  geom_errorbar(aes(xmin = conf.low, xmax = conf.high), width = 0.2, position = position_dodge(width = 0.7)) +
  labs(
    x = "Marginal Effect (Change in Settlement Probability)",
    y = "MTA Depth Variables",
    color = "Model"
  ) +
  theme_bw(base_family = "serif") +
  theme(
    plot.title = element_text(size = 16),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 12),
    legend.title = element_text(size = 13),
    legend.text = element_text(size = 11),
    axis.title.x = element_text(margin = margin(t = 20)),
    axis.title.y = element_text(margin = margin(r = 10))
    )

ggsave("Output/Marginal Effects Across MTA Depth Models.png", plot = figure_4, width = 10, height = 4, units = "in")


### Visualize Trade Volume ###

figure_5 <- ggplot(ame_trade_df %>% filter(term == "total_tp_volume" | term == "trade_volume_all_ratio" | term == "trade_volume_tp_mp_ratio") %>%
  mutate(term = recode(term,
    "total_tp_volume" = "Third Party Raw Total",
    "trade_volume_all_ratio" = " Third Party to All Party Ratio",
    "trade_volume_tp_mp_ratio" = "  Third Party to Main Party Ratio"
  )), aes(x = estimate, y = term, color = model_name)) +
  geom_vline(xintercept = 0, color = "red", linetype = "dashed") +
  geom_point(position = position_dodge(width = 0.7)) +
  geom_errorbar(aes(xmin = conf.low, xmax = conf.high), width = 0.2, position = position_dodge(width = 0.7)) +
  labs(
    x = "Marginal Effect (Change in Settlement Probability)",
    y = "Trade Volume Variables",
    color = "Model"
  ) +
  theme_bw(base_family = "serif") +
  theme(
    plot.title = element_text(size = 16),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 12),
    legend.title = element_text(size = 13),
    legend.text = element_text(size = 11),
    axis.title.x = element_text(margin = margin(t = 20)),
    axis.title.y = element_text(margin = margin(r = 10))
    )

ggsave("Output/Marginal Effects Across Trade Volume Models.png", plot = figure_5, width = 10, height = 4, units = "in")


### Evaluate Predicted Probability ###

model_list <- list(
  model_tp_mtac,
  model_mtac_all_ratio,
  model_mtac_tp_mp_ratio,
  model_tp_mtad,
  model_mtad_all_ratio,
  model_mtad_tp_mp_ratio,
  model_tp_trade,
  model_trade_all_ratio,
  model_trade_tp_mp_ratio
)

model_names <- paste0("model_", 1:9)

vars_to_vary <- c("tp_total_mtac", "mtac_all_ratio", "mtac_tp_mp_ratio",
                  "tp_total_mtad", "mtad_all_ratio", "mtad_tp_mp_ratio",
                  "total_tp_volume", "trade_volume_all_ratio", "trade_tp_mp_ratio",
                  "complexity", "xxii_cited", "systemic_cited", "legal_index", "libdem_index")

prediction_results <- list()

for (i in seq_along(model_list)) {
  model <- model_list[[i]]
  model_name <- model_names[[i]]
  
  for (var in vars_to_vary) {

    if (!(var %in% names(coef(model)))) next
    
    newdata_grid <- data.frame(var_values = seq(-2, 2, by = 0.25)) %>%
      rename(!!var := var_values)
    
    vars_in_model <- names(coef(model))
    control_vars <- setdiff(vars_in_model, c("(Intercept)", var))
    for (cv in control_vars) {
      newdata_grid[[cv]] <- 0
    }

    pred <- predictions(model, newdata = newdata_grid)
    
    pred_out <- pred %>%
      mutate(model = model_name, variable = var) %>%
      rename(var_value = !!sym(var))
    
    prediction_results[[paste(model_name, var, sep = "_")]] <- pred_out
  }
}

pred_prob <- bind_rows(prediction_results)

predicted_probabilities <- pred_prob %>%
  mutate(percentile = case_when(
    var_value == -0.75 ~ "25th",
    var_value == 0     ~ "50th (Mean)",
    var_value == 0.75  ~ "75th"
  )) %>%
  select(c("model","variable", "percentile", "estimate", "p.value", "conf.low", "conf.high")) %>%
  filter(!is.na(percentile))

write_csv(predicted_probabilities, "Data/Predicted Probabilities.csv")


### Plot Predicted Probability for Select Models ###

figure_6 <- plot_predictions(model_tp_mtac, condition = "tp_total_mtac", draw = TRUE) +
  labs(
    x = "Third Party Raw MTA Count Total",
    y = "Probability of Early Settlement",
  ) +
  theme_bw(base_family = "serif") +
  theme(
    plot.title = element_text(size = 16),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 12),
    legend.title = element_text(size = 13),
    legend.text = element_text(size = 11),
    axis.title.x = element_text(margin = margin(t = 10)),
    axis.title.y = element_text(margin = margin(r = 10))
    )

ggsave("Output/Predicted Probability of MTA Count.png", plot = figure_6, width = 6, height = 5, units = "in")


figure_7 <- plot_predictions(model_tp_mtad, condition = "tp_total_mtad") +
  labs(
    x = "Third Party Raw MTA Depth Total",
    y = "Probability of Early Settlement",
  ) +
  theme_bw(base_family = "serif") +
  theme(
    plot.title = element_text(size = 16),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 12),
    legend.title = element_text(size = 13),
    legend.text = element_text(size = 11),
    axis.title.x = element_text(margin = margin(t = 10)),
    axis.title.y = element_text(margin = margin(r = 10))
  )

ggsave("Output/Predicted Probability of MTA Depth.png", plot = figure_7, width = 6, height = 5, units = "in")


figure_8 <- plot_predictions(model_tp_trade, condition = "total_tp_volume") +
  labs(
    x = "Third Party Raw Trade Volume Total",
    y = "Probability of Early Settlement",
  ) +
  theme_bw(base_family = "serif") +
  theme(
    plot.title = element_text(size = 16),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 12),
    legend.title = element_text(size = 13),
    legend.text = element_text(size = 11),
    axis.title.x = element_text(margin = margin(t = 10)),
    axis.title.y = element_text(margin = margin(r = 10))
  )

ggsave("Output/Predicted Probability of Trade Volume.png", plot = figure_8, width = 6, height = 5, units = "in")