Setup
Load Libraries
library(dplyr)
library(tidyverse)
library(ggplot2)
library(knitr)
library(readxl)
library(stringr)
library(countrycode)
library(data.table)
library(vtable)
library(corrplot)
library(ggcorrplot)
library(glmnet)
library(stargazer)
library(texreg)
library(marginaleffects)
library(modelsummary)
library(tinytex)
Import Data
### Import Case Data ###
# Citation: Gen, L., R. Z. Idris, A. U. Abubakar, and M. A. B. Omar (2024, May). Identity Dynamics and Diplomatic Narratives: Malaysian Chinese Perspectives. Revista de Gestão Social e Ambiental 18 (5), e07188. https://doi.org/10.24857/rgsa.v18n5-124. Accessed April 14, 2025.
case_source <- read_excel("Sources/WTO DS Case List.xlsx", sheet = 1) %>%
select(1, 2, 3, 4, 5, 6, 7, 19, 20, 10, 11, 14, 15) %>%
setNames(c("case", "comp", "resp", "title", "cons_req_date", "agreement", "policy", "mas_date", "mas_day", "panel_date", "panel_day", "report_date", "report_day"))
### Import Third Party Data ###
# Citation: Gen, L., R. Z. Idris, A. U. Abubakar, and M. A. B. Omar (2024, May). Identity Dynamics and Diplomatic Narratives: Malaysian Chinese Perspectives. Revista de Gestão Social e Ambiental 18 (5), e07188. https://doi.org/10.24857/rgsa.v18n5-124. Accessed April 14, 2025.
tp_source <- read_excel("Sources/WTO DS Case List.xlsx", sheet = 2) %>%
select(1, 2) %>%
setNames(c("case", "tp", "")) %>%
separate('tp', into = paste0("tp", 1:35), sep = ",", fill = "right")
### Import DESTA Proximity Data ###
# Citation: Dür, A., L. Baccini, and M. Elsig (2014). The Design of International Trade Agreements: Introducing a New Dataset. The Review of International Organizations 9 (3), 353–375. Version 2.2 (2023). https://www.designoftradeagreements.org/downloads/. Accessed April 14, 2025.
proximity_source <- read_excel("Sources/Treaty Dyad List (DESTA).xlsx", sheet = 2) %>%
select(1, 2, 6, 7, 11) %>%
setNames(c("country1", "country2", "base_treaty", "name", "year"))
### Import DESTA Depth Data ###
# Citation: Dür, A., L. Baccini, and M. Elsig (2014). The Design of International Trade Agreements: Introducing a New Dataset. The Review of International Organizations 9 (3), 353–375. Version 2.2 (2023). https://www.designoftradeagreements.org/downloads/. Accessed April 14, 2025.
depth_source <- read_excel("Sources/Treaty Depth (DESTA).xlsx", sheet = 2) %>%
select(2, 14)
# Import GDP Data ###
# Citation: World Bank (2024a). GDP (Current US$). World Development Indicators. https://databank.worldbank.org/reports.aspx?source=2&series=NY.GDP.MKTP.CD&country=#. Accessed April 14, 2025.
gdp_source <- read_excel("Sources/GDP (World Bank).xlsx", sheet = 1) %>%
mutate(across(everything(), ~ na_if(.x, ".."))) %>%
select(3, 5:27) %>%
setNames(c("country", as.character(1995:2017)))
### Import GDP Per Capita Data ###
# Citation: World Bank (2024b). GDP Per Capita (Current US$). World Development Indicators. https://databank.worldbank.org/reports.aspx?source=2&series=NY.GDP.PCAP.CD&country=. Accessed April 14, 2025.
gdppc_source <- read_excel("Sources/GDP Per Capita (World Bank).xlsx", sheet = 1) %>%
mutate(across(everything(), ~ na_if(.x, ".."))) %>%
select(3, 5:27) %>%
setNames(c("country", as.character(1995:2017)))
### Import V-Dem Data ###
# Citation: Coppedge, M., J. Gerring, C. H. Knutsen, S. I. Lindberg, J. Teorell, D. Altman, M. Bernhard, M. S. Fish, A. Glynn, A. Hicken, A. Lührmann, K. L. Marquardt, K. McMann, P. Paxton, D. Pemstein, B. Seim, R. Sigman, S.-E. Skaaning, J. Staton, A. Cornell, L. Gastaldi, H. Gjerløw, V. Mechkova, J. von Römer, A. Sundström, E. Tzelgov, L. Uberti, Y.-t. Wang, T. Wig, and D. Ziblatt (2024). V-dem [country-year] dataset v14. https://v-dem.net/data/the-v-dem-dataset/. Accessed April 14, 2025.
vdem_source <- read_csv("Sources/Libdem Score (VDem).csv") %>%
select(1, 4, 27) %>%
setNames(c("country", "year", "libdem"))
### Import Trade Data ###
# Citation: Barbieri, K. and O. M. G. O. Keshk (2016). Correlates of War Project Trade Data Set Codebook, Version 4.0. https://correlatesofwar.org/data-sets/bilateral-trade/. Accessed April 14, 2025.
trade_source <- read_csv("Sources/Trade Dyad (COW).csv") %>%
select(3, 4, 5, 6, 7) %>%
setNames(c("year", "country1", "country2", "one_two", "two_one"))
### Import Case Article and Issue Data ###
article_source <- read_excel("Sources/Case Article and Issue (Fawwaz Hafizh).xlsx", sheet = 1) %>%
select (1, 4, 6)
Clean Data
### Use Country Code ###
eu_countries <- c("AUT", "BEL", "BGR", "HRV", "CYP", "CZE", "DNK", "EST",
"FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA", "LVA",
"LTU", "LUX", "MLT", "NLD", "POL", "PRT", "ROU", "SVK",
"SVN", "ESP", "SWE")
convert_country_codes <- function(data, columns, origin = "country.name", destination = "iso3c") {
for (col in columns) {
data[[col]] <- countrycode(data[[col]], origin = origin, destination = destination, nomatch = NULL)
}
# If eu_countries is provided, label them as "EU"
if (!is.null(eu_countries)) {
data <- data %>%
mutate(across(all_of(columns), ~ ifelse(. %in% eu_countries, "EU", .)))
}
return(data)
}
columns_to_convert1 <- c("comp", "resp")
case_clean <- convert_country_codes(case_source, columns_to_convert1)
columns_to_convert2 <- c(paste0("tp", 1:35))
tp_clean <- convert_country_codes(tp_source, columns_to_convert2)
columns_to_convert3 <- c("country1", "country2")
proximity_clean <- convert_country_codes(proximity_source, columns_to_convert3)
trade_clean <- convert_country_codes(trade_source, columns_to_convert3)
columns_to_convert4 <- c("country")
vdem_clean <- convert_country_codes(vdem_source, columns_to_convert4)
gdp_clean <- convert_country_codes(gdp_source, columns_to_convert4)
gdppc_clean <- convert_country_codes(gdppc_source, columns_to_convert4)
### Time Restriction: Cases Up to January 20, 2017 ###
case_clean <- case_clean %>%
filter(panel_date < as.Date("2017-01-20") | (is.na(panel_date) & cons_req_date < as.Date("2017-01-20")))
vdem_clean <- vdem_clean %>%
filter(year >= 1995 & year <= 2017)
trade_clean <- trade_clean %>%
filter(year >= 1995 & year <= 2017)
### Recode Variables ###
standardize <- function(column) {
(column - min(column, na.rm = TRUE)) / (max(column, na.rm = TRUE) - min(column, na.rm = TRUE))
}
depth_source$depth_idx <- standardize(depth_source$depth_rasch)
depth_clean <- depth_source
gdp_clean <- gdp_clean %>%
pivot_longer(cols = 2:24, names_to = "year", values_to = "gdp")
gdp_clean$year <- as.numeric(gdp_clean$year)
gdppc_clean <- gdppc_clean %>%
pivot_longer(cols = 2:24, names_to = "year", values_to = "gdp_percap")
gdppc_clean$year <- as.numeric(gdppc_clean$year)
article_source <- article_source %>%
setNames(c("case", "xxii_cited", "systemic_cited"))
Create Master Dataset and Party Weights
master_unweighted <- left_join(case_clean, tp_clean, by = "case") %>%
group_by(case) %>%
summarise(
resp = first(resp),
title = first(title),
cons_req_date = first(cons_req_date),
agreement = first(agreement),
policy = first(policy),
mas_date = first(mas_date),
mas_day = first(mas_day),
panel_date = first(panel_date),
panel_day = first(panel_day),
report_date = first(panel_date),
report_day = first(panel_day),
across(starts_with("tp"), first),
comp_list = list(unique(comp[!is.na(comp)]))
) %>%
# Expand comp_list into multiple rows
unnest_longer(comp_list, values_to = "comp") %>%
group_by(case) %>%
mutate(comp_idx = row_number()) %>%
ungroup() %>%
mutate(year = as.numeric(format(as.Date(cons_req_date, format = "%Y-%m-%d"), "%Y")))
### Create GDP of Third Parties ###
master_gdp <- master_unweighted
third_party_columns <- grep("^tp[0-9]+$", colnames(master_unweighted), value = TRUE)
for (gdp_col in third_party_columns) {
gdp_col_name <- paste0(gdp_col, "_gdp")
master_gdp <- master_gdp %>%
left_join(gdp_clean, by = setNames(c("country", "year"), c(gdp_col, "year")), multiple = "first") %>%
rename(!!gdp_col_name := gdp)
}
main_party_columns <- c("resp", "comp")
for (gdp_col in main_party_columns) {
gdp_col_name <- paste0(gdp_col, "_gdp")
master_gdp <- master_gdp %>%
left_join(gdp_clean, by = setNames(c("country", "year"), c(gdp_col, "year")), multiple = "first") %>%
rename(!!gdp_col_name := gdp)
}
### Create Weight Measures ###
tp_weight <- master_gdp
tp_gdp_columns <- grep("^tp[0-9]+_gdp$", colnames(tp_weight), value = TRUE)
tp_weight[tp_gdp_columns] <- lapply(tp_weight[tp_gdp_columns], as.numeric)
tp_weight <- tp_weight %>%
mutate(total_tp_gdp = rowSums(select(., all_of(tp_gdp_columns)), na.rm = TRUE))
for (col in tp_gdp_columns) {
weight_col_name <- gsub("_gdp", "_weight", col)
tp_weight[[weight_col_name]] <- tp_weight[[col]] / tp_weight$total_tp_gdp
}
tp_weight <- tp_weight %>%
select("case", "comp_idx", "total_tp_gdp", paste0("tp", 1:35, "_weight"))
main_weight <- master_gdp
main_gdp_columns <- c("resp_gdp", "comp_gdp")
main_weight[main_gdp_columns] <- lapply(main_weight[main_gdp_columns], as.numeric)
main_weight <- main_weight %>%
mutate(total_main_gdp = rowSums(select(., all_of(main_gdp_columns)), na.rm = TRUE))
for (col in main_gdp_columns) {
weight_col_name <- paste0(col, "_weight")
main_weight[[weight_col_name]] <- main_weight[[col]] / main_weight$total_main_gdp
}
main_weight <- main_weight %>%
select("case", "comp_idx", "resp_gdp", "comp_gdp", "total_main_gdp", "resp_gdp_weight", "comp_gdp_weight") %>%
rename(
"resp_weight" = "resp_gdp_weight",
"comp_weight" = "comp_gdp_weight"
)
master <- left_join(master_unweighted, tp_weight, by = c("case", "comp_idx")) %>%
left_join(main_weight, by = c("case", "comp_idx"))
names(master) <- gsub("^(tp[0-9]+)$", "\\1_name", names(master))
write_csv(master, "Data/Master.csv")
tp_long <- master %>%
select(case, year, comp, comp_idx, resp, matches("^tp[0-9]+_(name|weight)$")) %>%
pivot_longer(
cols = matches("^tp[0-9]+_(name|weight)$"),
names_to = c("tp_idx", ".value"),
names_pattern = "(tp[0-9]+)_(name|weight)"
) %>%
select(case, comp_idx, year, resp, comp, tp_idx, name, weight) %>%
rename(
tp_name = name,
tp_weight = weight
)
write_csv(tp_long, "Data/TP Long List.csv")
main_long <- master %>%
select(case, year, comp, comp_idx, resp, resp_weight, comp_weight) %>%
pivot_longer(
cols = ends_with("_weight"),
names_to = c("main_idx", ".value"),
names_pattern = "(.*)_(.*)"
) %>%
select(case, comp_idx, year, resp, comp, main_idx, weight) %>%
rename(
resp_name = resp,
comp_name = comp,
main_weight = weight
)
write_csv(main_long, "Data/MP Long List.csv")
main_long2 <- main_long %>%
mutate(main_name = if_else(main_idx == "resp", resp_name, comp_name)) %>%
select(-c("resp_name", "comp_name"))
Wrangle Data
Third Party Count and Dummy
### Count Third Parties ###
tp_count <- tp_long %>%
group_by(case, comp_idx) %>%
summarise(tp_count = sum(!is.na(tp_name))) %>%
left_join(master, by = c("case", "comp_idx")) %>%
select(case, comp_idx, tp_count)
write_csv(tp_count, "Data/TP Count.csv")
### Create Dummy for Third Parties ###
tp_dummy <- tp_count %>%
mutate(tp_dummy = ifelse(tp_count > 0, 1, 0))
MTA Count and Depth
### Prepare for MTA Count and Depth ###
mtas <- left_join(proximity_clean, depth_clean, by = "base_treaty") %>%
select(country1, country2, base_treaty, year, depth_idx) %>%
filter(!is.na(year)) %>%
mutate(year = as.integer(year))
years <- 1995:2017
all_years_pairs <- mtas %>%
distinct(country1, country2) %>%
crossing(year = years)
mtas_long <- mtas %>%
inner_join(all_years_pairs, by = c("country1", "country2")) %>%
filter(year.x <= year.y) %>%
rename(treaty_year = year.x, year = year.y)
master_unweighted$year <- as.numeric(format(as.Date(master_unweighted$cons_req_date, format = "%Y-%m-%d"), "%Y"))
names(master) <- gsub("^(tp[0-9]+)$", "\\1_name", names(master))
write_csv(mtas_long, "Data/MTAs Long.csv")
### Find MTA Cumulative Count ###
# Compute cumulative count
cumulative_mta_count <- mtas_long %>%
group_by(country1, country2, year) %>%
summarise(mta_count = n_distinct(base_treaty), .groups = "drop")
cumulative_mta_count_full <- all_years_pairs %>%
left_join(cumulative_mta_count, by = c("country1", "country2", "year")) %>%
mutate(mta_count = replace_na(mta_count, 0)) %>%
arrange(country1, country2, year)
# Evaluate Respondent - Third Party Cumulative MTA Depth #
tp_mtac_1 <- tp_long %>%
left_join(cumulative_mta_count_full, by = c("resp" = "country1", "tp_name" = "country2", "year")) %>%
mutate(mtac = mta_count * tp_weight) %>%
select(case, comp_idx, mtac)
tp_mtac_2 <- tp_long %>%
left_join(cumulative_mta_count_full, by = c("tp_name" = "country1", "resp" = "country2", "year")) %>%
mutate(mtac = mta_count * tp_weight) %>%
select(case, comp_idx, mtac)
resp_mtac <- bind_rows(tp_mtac_1, tp_mtac_2) %>%
group_by(case, comp_idx) %>%
summarise(total_resp_mtac = sum(mtac, na.rm = TRUE), .groups = "drop")
# Evaluate Complainant - Third Party Cumulative MTA Depth #
comp_mtac_1 <- tp_long %>%
left_join(cumulative_mta_count_full, by = c("comp" = "country1", "tp_name" = "country2", "year")) %>%
mutate(mtac = mta_count * tp_weight) %>%
select(case, comp_idx, mtac)
comp_mtac_2 <- tp_long %>%
left_join(cumulative_mta_count_full, by = c("tp_name" = "country1", "comp" = "country2", "year")) %>%
mutate(mtac = mta_count * tp_weight) %>%
select(case, comp_idx, mtac)
comp_mtac <- bind_rows(comp_mtac_1, comp_mtac_2) %>%
group_by(case, comp_idx) %>%
summarise(total_comp_mtac = sum(mtac, na.rm = TRUE), .groups = "drop")
# Evaluate Respondent - Complainant Cumulative MTA Depth #
main_mtac_1 <- main_long %>%
left_join(cumulative_mta_count_full, by = c("comp_name" = "country1", "resp_name" = "country2", "year")) %>%
mutate(mtac = mta_count) %>%
select(case, comp_idx, mtac)
main_mtac_2 <- main_long %>%
left_join(cumulative_mta_count_full, by = c("resp_name" = "country1", "comp_name" = "country2", "year")) %>%
mutate(mtac = mta_count) %>%
select(case, comp_idx, mtac)
main_mtac <- bind_rows(main_mtac_1, main_mtac_2) %>%
group_by(case, comp_idx) %>%
slice_tail(n = 1) %>%
ungroup() %>%
rename(total_main_mtac = mtac)
# Combine and Summarize Repondent and Complainant #
master_mta_count <- left_join(resp_mtac, comp_mtac, by = c("case", "comp_idx")) %>%
left_join(main_mtac, by = c("case", "comp_idx")) %>%
mutate(
tp_total_resp_mtac = coalesce(total_resp_mtac, 0),
tp_total_comp_mtac = coalesce(total_comp_mtac, 0),
tp_total_mtac = tp_total_resp_mtac + tp_total_comp_mtac,
main_total_mtac = coalesce(total_main_mtac, 0),
total_mtac = tp_total_mtac + main_total_mtac, ### ADDED THIS!
mtac_all_ratio = tp_total_mtac/total_mtac,
mtac_tp_mp_ratio = tp_total_mtac/main_total_mtac
)
master_mta_count$mtac_tp_mp_ratio[is.nan(master_mta_count$mtac_tp_mp_ratio)] <- 0
master_mta_count$mtac_tp_mp_ratio[is.infinite(master_mta_count$mtac_tp_mp_ratio)] <- max(master_mta_count$mtac_tp_mp_ratio[is.finite(master_mta_count$mtac_tp_mp_ratio)], na.rm = TRUE)
write_csv(master_mta_count, "Data/MTA Count.csv")
### Find Cumulative MTA Depth ###
# Compute cumulative depth
cumulative_mta_depth <- mtas_long %>%
group_by(country1, country2, year) %>%
summarise(cumulative_depth_idx = sum(depth_idx, na.rm = TRUE), .groups = "drop")
cumulative_mta_depth_full <- all_years_pairs %>%
left_join(cumulative_mta_depth, by = c("country1", "country2", "year")) %>%
mutate(cumulative_depth_idx = replace_na(cumulative_depth_idx, 0)) %>%
arrange(country1, country2, year)
# Evaluate Respondent - Third Party Cumulative MTA Depth #
tp_mtad_1 <- tp_long %>%
left_join(cumulative_mta_depth_full, by = c("resp" = "country1", "tp_name" = "country2", "year")) %>%
mutate(mtad = cumulative_depth_idx * tp_weight) %>%
select(case, comp_idx, mtad)
tp_mtad_2 <- tp_long %>%
left_join(cumulative_mta_depth_full, by = c("tp_name" = "country1", "resp" = "country2", "year")) %>%
mutate(mtad = cumulative_depth_idx * tp_weight) %>%
select(case, comp_idx, mtad)
resp_mtad <- bind_rows(tp_mtad_1, tp_mtad_2) %>%
group_by(case, comp_idx) %>%
summarise(total_resp_mtad = sum(mtad, na.rm = TRUE), .groups = "drop")
# Evaluate Complainant - Third Party Cumulative MTA Depth #
comp_mtad_1 <- tp_long %>%
left_join(cumulative_mta_depth_full, by = c("comp" = "country1", "tp_name" = "country2", "year")) %>%
mutate(mtad = cumulative_depth_idx * tp_weight) %>%
select(case, comp_idx, mtad)
comp_mtad_2 <- tp_long %>%
left_join(cumulative_mta_depth_full, by = c("tp_name" = "country1", "comp" = "country2", "year")) %>%
mutate(mtad = cumulative_depth_idx * tp_weight) %>%
select(case, comp_idx, mtad)
comp_mtad <- bind_rows(comp_mtad_1, comp_mtad_2) %>%
group_by(case, comp_idx) %>%
summarise(total_comp_mtad = sum(mtad, na.rm = TRUE), .groups = "drop")
# Evaluate Respondent - Complainant Cumulative MTA Depth #
main_mtad_1 <- main_long %>%
left_join(cumulative_mta_depth_full, by = c("comp_name" = "country1", "resp_name" = "country2", "year")) %>%
mutate(mtad = cumulative_depth_idx) %>%
select(case, comp_idx, mtad)
main_mtad_2 <- main_long %>%
left_join(cumulative_mta_depth_full, by = c("resp_name" = "country1", "comp_name" = "country2", "year")) %>%
mutate(mtad = cumulative_depth_idx) %>%
select(case, comp_idx, mtad)
main_mtad <- bind_rows(main_mtad_1, main_mtad_2) %>%
group_by(case, comp_idx) %>%
slice_tail(n = 1) %>%
ungroup() %>%
rename(total_main_mtad = mtad)
# Combine and Summarize Repondent and Complainant #
master_mta_depth <- left_join(resp_mtad, comp_mtad, by = c("case", "comp_idx")) %>%
left_join(main_mtad, by = c("case", "comp_idx")) %>%
mutate(
tp_total_resp_mtad = coalesce(total_resp_mtad, 0),
tp_total_comp_mtad = coalesce(total_comp_mtad, 0),
tp_total_mtad = tp_total_resp_mtad + tp_total_comp_mtad,
main_total_mtad = coalesce(total_main_mtad, 0),
total_mtad = tp_total_mtad + main_total_mtad,
mtad_all_ratio = tp_total_mtad/total_mtad,
mtad_tp_mp_ratio = tp_total_mtad/main_total_mtad
)
master_mta_depth$mtad_tp_mp_ratio[is.nan(master_mta_depth$mtad_tp_mp_ratio)] <- 0
master_mta_depth$mtad_tp_mp_ratio[is.infinite(master_mta_depth$mtad_tp_mp_ratio)] <- max(master_mta_depth$mtad_tp_mp_ratio[is.finite(master_mta_depth$mtad_tp_mp_ratio)], na.rm = TRUE)
write_csv(master_mta_depth, "Data/MTA Depth.csv")
Trade Volume
### Prepare for Trade Measures ###
trade_volume <- trade_clean %>%
mutate(total = one_two + two_one) %>%
group_by(country1, country2, year) %>%
summarize(total_trade = sum(total)) %>%
ungroup()
trade_gap <- trade_clean %>%
mutate(total_gap = abs(one_two - two_one)) %>%
group_by(country1, country2, year) %>%
summarize(total_gap = sum(total_gap)) %>%
ungroup()
trade_clean2 <- left_join(trade_volume, trade_gap, by = c("country1", "country2", "year"))
### Evaluate Respondent - Third Party Trade Measures ###
resp_trade_1 <- tp_long %>%
left_join(trade_clean2, by = c("resp" = "country1", "tp_name" = "country2", "year")) %>%
mutate(sum_resp_trade = total_trade * tp_weight) %>%
mutate(gap_resp_trade = total_gap * tp_weight) %>%
select(case, comp_idx, sum_resp_trade, gap_resp_trade)
resp_trade_2 <- tp_long %>%
left_join(trade_clean2, by = c("tp_name" = "country1", "resp" = "country2", "year")) %>%
mutate(sum_resp_trade = total_trade * tp_weight) %>%
mutate(gap_resp_trade = total_gap * tp_weight) %>%
select(case, comp_idx, sum_resp_trade, gap_resp_trade)
resp_tp_trade <- bind_rows(resp_trade_1, resp_trade_2) %>%
group_by(case, comp_idx) %>%
summarise(
total_resp_volume = sum(sum_resp_trade, na.rm = TRUE),
total_resp_gap = sum(gap_resp_trade, na.rm = TRUE),
.groups = "drop"
)
### Evaluate Complainant - Third Party Trade Measures ###
comp_trade_1 <- tp_long %>%
left_join(trade_clean2, by = c("comp" = "country1", "tp_name" = "country2", "year")) %>%
mutate(sum_comp_trade = total_trade * tp_weight) %>%
mutate(gap_comp_trade = total_gap * tp_weight) %>%
select(case, comp_idx, sum_comp_trade, gap_comp_trade)
comp_trade_2 <- tp_long %>%
left_join(trade_clean2, by = c("tp_name" = "country1", "comp" = "country2", "year")) %>%
mutate(sum_comp_trade = total_trade * tp_weight) %>%
mutate(gap_comp_trade = total_gap * tp_weight) %>%
select(case, comp_idx, sum_comp_trade, gap_comp_trade)
comp_tp_trade <- bind_rows(comp_trade_1, comp_trade_2) %>%
group_by(case, comp_idx) %>%
summarise(
total_comp_volume = sum(sum_comp_trade, na.rm = TRUE),
total_comp_gap = sum(gap_comp_trade, na.rm = TRUE),
.groups = "drop"
)
### Evaluate Respondent - Complainant Trade Measures ###
main_trade_1 <- main_long %>%
left_join(trade_clean2, by = c("comp_name" = "country1", "resp_name" = "country2", "year")) %>%
mutate(sum_main_trade = total_trade) %>%
mutate(gap_main_trade = total_gap) %>%
select(case, comp_idx, sum_main_trade, gap_main_trade)
main_trade_2 <- main_long %>%
left_join(trade_clean2, by = c("resp_name" = "country1", "comp_name" = "country2", "year")) %>%
mutate(sum_main_trade = total_trade) %>%
mutate(gap_main_trade = total_gap) %>%
select(case, comp_idx, sum_main_trade, gap_main_trade)
main_trade <- bind_rows(main_trade_1, main_trade_2) %>%
group_by(case, comp_idx) %>%
summarise(
total_main_volume = sum(sum_main_trade, na.rm = TRUE),
total_main_gap = sum(gap_main_trade, na.rm = TRUE),
.groups = "drop"
)
### Combine and Summarize Repondent and Complainant ###
master_trade <- full_join(resp_tp_trade, comp_tp_trade, by = c("case", "comp_idx")) %>%
left_join(main_trade, by = c("case", "comp_idx")) %>%
mutate(
total_resp_volume = coalesce(total_resp_volume, 0),
total_comp_volume = coalesce(total_comp_volume, 0),
total_tp_volume = total_resp_volume + total_comp_volume,
total_main_volume = coalesce(total_main_volume, 0),
total_volume = total_tp_volume + total_main_volume,
trade_volume_all_ratio = total_tp_volume/total_volume,
trade_volume_tp_mp_ratio = total_tp_volume/total_main_volume,
log_main_volume = log(total_main_volume),
total_resp_gap = coalesce(total_resp_gap, 0),
total_comp_gap = coalesce(total_comp_gap, 0),
total_tp_gap = total_resp_gap + total_comp_gap,
total_main_gap = coalesce(total_main_gap, 0),
trade_gap_index = log(1+(total_tp_gap/total_main_gap)),
log_main_gap = log(total_main_gap)
)
write_csv(master_trade, "Data/Trade Volume and Gap.csv")
Legal Capacity
### Code Legal Capacity ###
gdppc_clean <- gdppc_clean %>%
select(country, year, gdp_percap) %>%
distinct() %>%
mutate(gdp_percap = as.numeric(as.character(gdp_percap)))
tp_legal <- tp_long %>%
left_join(gdppc_clean, by = c("tp_name" = "country", "year")) %>%
mutate(tp_legal = gdp_percap * tp_weight) %>%
group_by(case, comp_idx) %>%
summarise(total_tp_legal = sum(tp_legal, na.rm = TRUE), .groups = "drop")
main_legal <- main_long2 %>%
left_join(gdppc_clean, by = c("main_name" = "country", "year")) %>%
mutate(main_legal = gdp_percap) %>%
group_by(case, comp_idx) %>%
summarise(total_main_legal = sum(main_legal, na.rm = TRUE), .groups = "drop")
main_legal_ratio <- main_long2 %>%
left_join(gdppc_clean, by = c("main_name" = "country", "year")) %>%
mutate(main_legal = gdp_percap * main_weight) %>%
group_by(case, comp_idx, main_idx) %>%
summarise(total_legal = sum(main_legal, na.rm = TRUE), .groups = "drop") %>%
pivot_wider(names_from = main_idx, values_from = total_legal, names_prefix = "legal_") %>%
mutate(ratio_main_legal = legal_resp / legal_comp)
master_legal <- full_join(tp_legal, main_legal, by = c("case", "comp_idx")) %>%
mutate(
legal_index = total_tp_legal/total_main_legal
) %>%
left_join(main_legal_ratio, by = c("case", "comp_idx"))
master_legal$ratio_main_legal[is.nan(master_legal$ratio_main_legal)] <- 0
master_legal$ratio_main_legal[is.infinite(master_legal$ratio_main_legal)] <- max(master_legal$ratio_main_legal[is.finite(master_legal$ratio_main_legal)], na.rm = TRUE)
write_csv(master_legal, "Data/Legal Capacity.csv")
Liberal Democracy
### Code Regime Type ###
vdem_clean <- vdem_clean %>%
select(country, year, libdem) %>%
distinct()
tp_libdem <- tp_long %>%
left_join(vdem_clean, by = c("tp_name" = "country", "year")) %>%
mutate(tp_libdem = libdem * tp_weight) %>%
group_by(case, comp_idx) %>%
summarise(total_tp_libdem = sum(tp_libdem, na.rm = TRUE), .groups = "drop")
main_libdem <- main_long2 %>%
left_join(vdem_clean, by = c("main_name" = "country", "year")) %>%
mutate(main_libdem = libdem * main_weight) %>%
group_by(case, comp_idx) %>%
summarise(total_main_libdem = sum(main_libdem, na.rm = TRUE), .groups = "drop")
main_libdem_ratio <- main_long2 %>%
left_join(vdem_clean, by = c("main_name" = "country", "year")) %>%
mutate(main_libdem = libdem) %>%
group_by(case, comp_idx, main_idx) %>%
summarise(total_libdem = sum(main_libdem, na.rm = TRUE), .groups = "drop") %>%
pivot_wider(names_from = main_idx, values_from = total_libdem, names_prefix = "libdem_") %>%
mutate(ratio_main_libdem = libdem_resp / libdem_comp)
master_libdem <- full_join(tp_libdem, main_libdem, by = c("case", "comp_idx")) %>%
mutate(libdem_index = total_tp_libdem/total_main_libdem) %>%
left_join(main_libdem_ratio, by = c("case", "comp_idx"))
master_libdem$ratio_main_libdem[is.nan(master_libdem$ratio_main_libdem)] <- 0
master_libdem$ratio_main_libdem[is.infinite(master_libdem$ratio_main_libdem)] <- max(master_libdem$ratio_main_libdem[is.finite(master_libdem$ratio_main_libdem)], na.rm = TRUE)
write_csv(master_libdem, "Data/Libdem Score.csv")
Case and Control Variables
### Code Case Outcome ###
case_outcome <- master %>%
mutate(
settle = ifelse(!is.na(mas_date) | !is.na(mas_day), 1, 0),
early_settle = ifelse((!is.na(mas_date) | !is.na(mas_day)) & (is.na(panel_date) | !is.na(panel_day)), 1, 0),
no_settle = ifelse(is.na(mas_date) & is.na(mas_day), 1, 0)
) %>%
select(case, comp_idx, settle, no_settle)
write_csv(case_outcome, "Data/Case Outcome.csv")
### Code Case Complexity ###
complexity <- master %>%
unnest_wider(agreement, names_sep = "")
complexity_raw <- master %>%
select(case, comp_idx, agreement) %>%
setNames(c("case", "comp_idx", "agreement", "")) %>%
separate('agreement', into = paste0("agreement", 1:6), sep = ", ", fill = "right")
complexity_expand <- complexity_raw %>%
pivot_longer(cols = paste0("agreement", 1:6), names_to = "agreement_idx", values_to = "agreement_name") %>%
filter(!is.na(agreement_name) | (is.na(agreement_name) & agreement_idx == "agreement1"))
case_complexity <- complexity_expand %>%
group_by(case, comp_idx) %>%
summarise(complexity = sum(!is.na(agreement_name))) %>%
left_join(master, by = c("case", "comp_idx")) %>%
select(case, comp_idx, complexity)
write_csv(case_complexity, "Data/Case Complexity.csv")
### Code Article XXII ###
case_article_xxii <- master %>%
left_join(article_source) %>%
select(case, comp_idx, xxii_cited)
write_csv(case_article_xxii, "Data/Case Article XXII Cited.csv")
### Code Systemic Issues ###
case_systemic_issue <- master %>%
left_join(article_source) %>%
select(case, comp_idx, systemic_cited)
write_csv(case_systemic_issue, "Data/Case Systemic Issue Cited.csv")
### Code Disputed Policy ###
categorical_policy <- master %>%
select(case, comp_idx, policy)
categorical_policy <- categorical_policy %>%
mutate(policy_group = case_when(
# Group 1: Discrimination
grepl("discriminat|preferential|gsp|origin", policy, ignore.case = TRUE) ~ 1,
# Group 2: Export, Import, Technical, Safeguards Barriers
grepl("export|import|ban|restriction|quota|licensing|customs valuation|customs|technical|safeguard|transit|quantitative",
policy, ignore.case = TRUE) ~ 2,
# Group 3: Subsidies, Anti-dumping
grepl("subsid|anti-dumping|dumping|countervailing|zeroing|sunset review|national",
policy, ignore.case = TRUE) ~ 3,
# Group 4: Intellectual Property
grepl("intellectual property|trips|patent|copyright|geographical indication",
policy, ignore.case = TRUE) ~ 4,
# Group 5: Goods, Services, Investments
grepl("goods|services|investment|tariff|market access|content requirement|visa|tax|taxation",
policy, ignore.case = TRUE) ~ 5,
TRUE ~ 0
))
dummy_policy <- categorical_policy %>%
mutate(
policy_discrimination = if_else(policy_group == 1, 1, 0),
policy_barriers = if_else(policy_group == 2, 1, 0),
policy_subsidies = if_else(policy_group == 3, 1, 0),
policy_ip = if_else(policy_group == 4, 1, 0),
policy_gs = if_else(policy_group == 5, 1, 0)
)
case_policy <- dummy_policy
write_csv(case_policy, "Data/Case Policy.csv")
Analysis
Combine Dataset
### Combine Dataset ###
final_master <- left_join(master, case_outcome, by = c("case", "comp_idx")) %>%
left_join(tp_count) %>%
left_join(tp_dummy) %>%
left_join(master_mta_count) %>%
left_join(master_mta_depth) %>%
left_join(master_trade) %>%
left_join(master_legal) %>%
left_join(master_libdem) %>%
left_join(case_complexity) %>%
left_join(case_article_xxii) %>%
left_join(case_systemic_issue) %>%
left_join(case_policy) %>%
select(case, comp_idx, settle, tp_count, tp_dummy,
tp_total_mtac, mtac_all_ratio, mtac_tp_mp_ratio,
tp_total_mtad, mtad_all_ratio, mtad_tp_mp_ratio,
total_tp_volume, trade_volume_all_ratio, trade_volume_tp_mp_ratio,
total_tp_legal, legal_index,
total_tp_libdem, libdem_index,
complexity, xxii_cited, systemic_cited,
policy_group, policy_discrimination, policy_barriers, policy_subsidies, policy_ip, policy_gs) %>%
drop_na() %>%
filter(tp_dummy == 1) %>%
write_csv("Data/Final Dataset Unscaled.csv") %>%
mutate(across(4:27, ~ scale(.)[,1])) %>%
mutate(across(3:27, ~ as.numeric(.))) %>%
write_csv("Data/Final Dataset.csv") %>%
select(-tp_dummy)
final_master_unscaled <- read_csv("Data/Final Dataset Unscaled.csv") %>%
select(-tp_dummy)
final_master_unscaled_df <- as.data.frame(final_master_unscaled) %>%
select(-case, -comp_idx) %>%
rename(
"Early Settlement" = settle,
"Third Party Count" = tp_count,
"TP Raw MTA Count Total" = tp_total_mtac,
"TP to AP MTA Count Ratio" = mtac_all_ratio,
"TP to MP MTA Count Ratio" = mtac_tp_mp_ratio,
"TP Raw MTA Depth Total" = tp_total_mtad,
"TP to AP MTA Depth Ratio" = mtad_all_ratio,
"TP to MP MTA Depth Ratio" = mtad_tp_mp_ratio,
"TP Raw Trade Volume Total*" = total_tp_volume,
"TP to AP Trade Volume Ratio" = trade_volume_all_ratio,
"TP to MP Trade Volume Ratio" = trade_volume_tp_mp_ratio,
"Average TP Legal Capacity**" = total_tp_legal,
"TP to MP Legal Capacity Ratio" = legal_index,
"Average TP LibDem Score" = total_tp_libdem,
"TP to MP LibDem Score Ratio" = libdem_index,
"Case Complexity" = complexity,
"Article XXII Cited" = xxii_cited,
"Systemic Issue Cited" = systemic_cited,
"Policy Group" = policy_group,
"Discriminative Policy" = policy_discrimination,
"Trade Barrier Policy" = policy_barriers,
"Subsidies Policy" = policy_subsidies,
"Intellectual Property Policy" = policy_ip,
"Goods Services Policy" = policy_gs
)
stargazer(final_master_unscaled_df, type= "latex", title = "Summary Statistics", summary = TRUE, median = TRUE, iqr = TRUE, digits = 3, out = "Output/Summary Statistics.tex", column.sep.width = "5pt", notes = "TP=Third Parties; AP=All Parties; MP=Main Parties; LibDem=Liberal Democracy. *Trade in millions of 2014 US dollars. **GDP Per Capita in 2024 US dollars.")
final_master_df <- as.data.frame(final_master) %>%
select(-case, -comp_idx) %>%
rename(
"Early Settlement" = settle,
"Third Party Count" = tp_count,
"TP Raw MTA Count Total" = tp_total_mtac,
"TP to AP MTA Count Ratio" = mtac_all_ratio,
"TP to MP MTA Count Ratio" = mtac_tp_mp_ratio,
"TP Raw MTA Depth Total" = tp_total_mtad,
"TP to AP MTA Depth Ratio" = mtad_all_ratio,
"TP to MP MTA Depth Ratio" = mtad_tp_mp_ratio,
"TP Raw Trade Volume Total" = total_tp_volume,
"TP to AP Trade Volume Ratio" = trade_volume_all_ratio,
"TP to MP Trade Volume Ratio" = trade_volume_tp_mp_ratio,
"Average TP Legal Capacity" = total_tp_legal,
"TP to MP Legal Capacity Ratio" = legal_index,
"Average TP LibDem Score" = total_tp_libdem,
"TP to MP LibDem Score Ratio" = libdem_index,
"Case Complexity" = complexity,
"Article XXII Cited" = xxii_cited,
"Systemic Issue Cited" = systemic_cited,
"Policy Group" = policy_group,
"Discriminative Policy" = policy_discrimination,
"Trade Barrier Policy" = policy_barriers,
"Subsidies Policy" = policy_subsidies,
"Intellectual Property Policy" = policy_ip,
"Goods Services Policy" = policy_gs
)
stargazer(final_master_df, type= "latex", title = "Standardized Summary Statistics", summary = TRUE, median = TRUE, iqr = TRUE, digits = 3, out = "Output/Standardized Summary Statistics.tex", column.sep.width = "5pt", notes = "TP=Third Parties; AP=All Parties; MP=Main Parties; LibDem=Liberal Democracy")
Check Correlations
### Check Variable Correlations ###
cor_data <- final_master %>%
select(where(is.numeric)) %>%
select(-comp_idx)
# Run together - From here #
cor_matrix <- cor(cor_data, use = "complete.obs")
colnames(cor_matrix) <- c("Early Settlement", "TP Count",
"TP Raw MTA Count Total", "TP-AP MTA Count Ratio", "TP-MP MTA Count Ratio",
"TP Raw MTA Depth Total", "TP-AP MTA Depth Ratio", "TP-MP MTA Depth Ratio",
"TP Raw Trade Volume Total", "TP-AP Trade Volume Ratio", "TP-MP Trade Volume Ratio",
"Ave TP Legal Capacity", "TP-MP Legal Capacity Ratio", "Ave TP Liberal Democracy Score", "TP-MP Liberal Democracy Score Ratio",
"Case Complexity", "Article XXII Cited", "Systemic Issue Cited",
"Policy Group", "Discriminative Policy", "Trade Barrier Policy", "Subsidies Policy", "Intellectual Property Policy", "Goods Services Policy")
rownames(cor_matrix) <- colnames(cor_matrix)
png("Output/Correlation Matrix.png", width = 8000, height = 6000, res = 100)
par(family = "serif")
corrplot(cor_matrix, method = "color", type = "upper",
tl.cex = 5.5, number.cex = 5, cl.cex = 5.5,
tl.col = "black", tl.srt = 45,
addCoef.col = "black",
addgrid.col = "black"
)
# Run together - To here #
cor_matrix_df <- as.data.frame(cor_matrix)
write_csv(cor_matrix_df, "Data/Correlation Matrix.csv")
Run Probit Regression
### MTA Count Models (1-3) ###
model_tp_mtac <- glm(settle ~ tp_count + tp_total_mtac + legal_index + libdem_index + complexity + xxii_cited + systemic_cited + policy_group, data = final_master, family = "binomial"(link = "probit"))
model_mtac_all_ratio <- glm(settle ~ tp_count + mtac_all_ratio + legal_index + libdem_index + complexity + xxii_cited + systemic_cited + policy_group, data = final_master, family = "binomial"(link = "probit"))
model_mtac_tp_mp_ratio <- glm(settle ~ tp_count + mtac_tp_mp_ratio + legal_index + libdem_index + complexity + xxii_cited + systemic_cited + policy_group, data = final_master, family = "binomial"(link = "probit"))
### MTA Depth Models (4-6) ###
model_tp_mtad <- glm(settle ~ tp_count + tp_total_mtad + legal_index + libdem_index + complexity + xxii_cited + systemic_cited + policy_group, data = final_master, family = "binomial"(link = "probit"))
model_mtad_all_ratio <- glm(settle ~ tp_count + mtad_all_ratio + legal_index + libdem_index + complexity + xxii_cited + systemic_cited + policy_group, data = final_master, family = "binomial"(link = "probit"))
model_mtad_tp_mp_ratio <- glm(settle ~ tp_count + mtad_tp_mp_ratio + legal_index + libdem_index + complexity + xxii_cited + systemic_cited + policy_group, data = final_master, family = "binomial"(link = "probit"))
### Trade Volume Models (7-9) ###
model_tp_trade <- glm(settle ~ tp_count + total_tp_volume + legal_index + libdem_index + complexity + xxii_cited + systemic_cited + policy_group, data = final_master, family = "binomial"(link = "probit"))
model_trade_all_ratio <- glm(settle ~ tp_count + trade_volume_all_ratio + legal_index + libdem_index + complexity + xxii_cited + systemic_cited + policy_group, data = final_master, family = "binomial"(link = "probit"))
model_trade_tp_mp_ratio <- glm(settle ~ tp_count + trade_volume_tp_mp_ratio + legal_index + libdem_index + complexity + xxii_cited + systemic_cited + policy_group, data = final_master, family = "binomial"(link = "probit"))
### Collect all models ###
models <- list(
model_tp_mtac, model_mtac_all_ratio, model_mtac_tp_mp_ratio,
model_tp_mtad, model_mtad_all_ratio, model_mtad_tp_mp_ratio,
model_tp_trade, model_trade_all_ratio, model_trade_tp_mp_ratio
)
Create Regression Table
texreg(l = models,
file = "Output/Regression Results.tex",
stars = c(0.001, 0.01, 0.05),
caption.above = TRUE,
caption = "Regression Results",
label = "Regression Results",
booktabs = TRUE,
scalebox = 0.8,
custom.model.names = c(
"(1)",
"(2)",
"(3)",
"(4)",
"(5)",
"(6)",
"(7)",
"(8)",
"(9)"
),
custom.header = list("MTA Count" = 1:3, "MTA Depth" = 4:6, "Trade Volume" = 7:9),
custom.coef.map = list(
"(Intercept)" = "Constant",
"tp_count" = "Third Party Count",
"tp_total_mtac" = "Third Party Raw Total",
"mtac_all_ratio" = "TP to All Party Ratio",
"mtac_tp_mp_ratio" = "TP to Main Party Ratio",
"tp_total_mtad" = "Third Party Raw Total",
"mtad_all_ratio" = "TP to All Party Ratio",
"mtad_tp_mp_ratio" = "TP to Main Party Ratio",
"total_tp_volume" = "Third Party Raw Total",
"trade_volume_all_ratio" = "TP to All Party Ratio",
"trade_volume_tp_mp_ratio" = "TP to Main Party Ratio",
"legal_index" = "TP to MP Legal Capacity Ratio",
"libdem_index" = "TP to MP LibDem Score Ratio",
"complexity" = "Case Complexity",
"xxii_cited" = "Article XXII Cited",
"systemic_cited" = "Systemic Issue Cited",
"policy_group" = "Policy Group"
),
custom.note = "%stars. TP=Third Parties; MP=Main Parties; LibDem=Liberal Democracy.",
digits = 3
)
Evaluate Marginal Effects
### Average Marginal Effects and Marginal Effects at the Mean ###
ame_results <- lapply(models, avg_slopes)
modelsummary(ame_results)
### Rename Models ###
model_names <- c(
"Model (1)",
"Model (2)",
"Model (3)",
"Model (4)",
"Model (5)",
"Model (6)",
"Model (7)",
"Model (8)",
"Model (9)"
)
for (i in seq_along(ame_results)) {
ame_results[[i]]$model_name <- model_names[i]
}
### Create Dataset ###
ame_mtac_df <- bind_rows(ame_results) %>%
slice(1:24)
ame_mtad_df <- bind_rows(ame_results) %>%
slice(25:48)
ame_trade_df <- bind_rows(ame_results) %>%
slice(49:72)
### Visualize MTA count ###
figure_3 <- ggplot(ame_mtac_df %>% filter(term == "tp_total_mtac" | term == "mtac_all_ratio" | term == "mtac_tp_mp_ratio") %>%
mutate(term = recode(term,
"tp_total_mtac" = "Third Party Raw Total",
"mtac_all_ratio" = " Third Party to All Party Ratio",
"mtac_tp_mp_ratio" = " Third Party to Main Party Ratio"
)),
aes(x = estimate, y = term, color = model_name)) +
geom_vline(xintercept = 0, color = "red", linetype = "dashed") +
geom_point(position = position_dodge(width = 0.7)) +
geom_errorbar(aes(xmin = conf.low, xmax = conf.high), width = 0.2, position = position_dodge(width = 0.7)) +
labs(
x = "Marginal Effects (Change in Settlement Probability)",
y = "MTA Count Variables",
color = "Model"
) +
theme_bw(base_family = "serif") +
theme(
plot.title = element_text(size = 16),
axis.title = element_text(size = 14),
axis.text = element_text(size = 12),
legend.title = element_text(size = 13),
legend.text = element_text(size = 11),
axis.title.x = element_text(margin = margin(t = 20)),
axis.title.y = element_text(margin = margin(r = 10))
)
ggsave("Output/Marginal Effects Across MTA Count Models.png", plot = figure_3, width = 10, height = 4, units = "in")
### Visualize MTA Depth ###
figure_4 <- ggplot(ame_mtad_df %>% filter(term == "tp_total_mtad" | term == "mtad_all_ratio" | term == "mtad_tp_mp_ratio") %>%
mutate(term = recode(term,
"tp_total_mtad" = "Third Party Raw Total",
"mtad_all_ratio" = " Third Party to All Party Ratio",
"mtad_tp_mp_ratio" = " Third Party to Main Party Ratio"
)),
aes(x = estimate, y = term, color = model_name)) +
geom_vline(xintercept = 0, color = "red", linetype = "dashed") +
geom_point(position = position_dodge(width = 0.7)) +
geom_errorbar(aes(xmin = conf.low, xmax = conf.high), width = 0.2, position = position_dodge(width = 0.7)) +
labs(
x = "Marginal Effect (Change in Settlement Probability)",
y = "MTA Depth Variables",
color = "Model"
) +
theme_bw(base_family = "serif") +
theme(
plot.title = element_text(size = 16),
axis.title = element_text(size = 14),
axis.text = element_text(size = 12),
legend.title = element_text(size = 13),
legend.text = element_text(size = 11),
axis.title.x = element_text(margin = margin(t = 20)),
axis.title.y = element_text(margin = margin(r = 10))
)
ggsave("Output/Marginal Effects Across MTA Depth Models.png", plot = figure_4, width = 10, height = 4, units = "in")
### Visualize Trade Volume ###
figure_5 <- ggplot(ame_trade_df %>% filter(term == "total_tp_volume" | term == "trade_volume_all_ratio" | term == "trade_volume_tp_mp_ratio") %>%
mutate(term = recode(term,
"total_tp_volume" = "Third Party Raw Total",
"trade_volume_all_ratio" = " Third Party to All Party Ratio",
"trade_volume_tp_mp_ratio" = " Third Party to Main Party Ratio"
)), aes(x = estimate, y = term, color = model_name)) +
geom_vline(xintercept = 0, color = "red", linetype = "dashed") +
geom_point(position = position_dodge(width = 0.7)) +
geom_errorbar(aes(xmin = conf.low, xmax = conf.high), width = 0.2, position = position_dodge(width = 0.7)) +
labs(
x = "Marginal Effect (Change in Settlement Probability)",
y = "Trade Volume Variables",
color = "Model"
) +
theme_bw(base_family = "serif") +
theme(
plot.title = element_text(size = 16),
axis.title = element_text(size = 14),
axis.text = element_text(size = 12),
legend.title = element_text(size = 13),
legend.text = element_text(size = 11),
axis.title.x = element_text(margin = margin(t = 20)),
axis.title.y = element_text(margin = margin(r = 10))
)
ggsave("Output/Marginal Effects Across Trade Volume Models.png", plot = figure_5, width = 10, height = 4, units = "in")
### Evaluate Predicted Probability ###
model_list <- list(
model_tp_mtac,
model_mtac_all_ratio,
model_mtac_tp_mp_ratio,
model_tp_mtad,
model_mtad_all_ratio,
model_mtad_tp_mp_ratio,
model_tp_trade,
model_trade_all_ratio,
model_trade_tp_mp_ratio
)
model_names <- paste0("model_", 1:9)
vars_to_vary <- c("tp_total_mtac", "mtac_all_ratio", "mtac_tp_mp_ratio",
"tp_total_mtad", "mtad_all_ratio", "mtad_tp_mp_ratio",
"total_tp_volume", "trade_volume_all_ratio", "trade_tp_mp_ratio",
"complexity", "xxii_cited", "systemic_cited", "legal_index", "libdem_index")
prediction_results <- list()
for (i in seq_along(model_list)) {
model <- model_list[[i]]
model_name <- model_names[[i]]
for (var in vars_to_vary) {
if (!(var %in% names(coef(model)))) next
newdata_grid <- data.frame(var_values = seq(-2, 2, by = 0.25)) %>%
rename(!!var := var_values)
vars_in_model <- names(coef(model))
control_vars <- setdiff(vars_in_model, c("(Intercept)", var))
for (cv in control_vars) {
newdata_grid[[cv]] <- 0
}
pred <- predictions(model, newdata = newdata_grid)
pred_out <- pred %>%
mutate(model = model_name, variable = var) %>%
rename(var_value = !!sym(var))
prediction_results[[paste(model_name, var, sep = "_")]] <- pred_out
}
}
pred_prob <- bind_rows(prediction_results)
predicted_probabilities <- pred_prob %>%
mutate(percentile = case_when(
var_value == -0.75 ~ "25th",
var_value == 0 ~ "50th (Mean)",
var_value == 0.75 ~ "75th"
)) %>%
select(c("model","variable", "percentile", "estimate", "p.value", "conf.low", "conf.high")) %>%
filter(!is.na(percentile))
write_csv(predicted_probabilities, "Data/Predicted Probabilities.csv")
### Plot Predicted Probability for Select Models ###
figure_6 <- plot_predictions(model_tp_mtac, condition = "tp_total_mtac", draw = TRUE) +
labs(
x = "Third Party Raw MTA Count Total",
y = "Probability of Early Settlement",
) +
theme_bw(base_family = "serif") +
theme(
plot.title = element_text(size = 16),
axis.title = element_text(size = 14),
axis.text = element_text(size = 12),
legend.title = element_text(size = 13),
legend.text = element_text(size = 11),
axis.title.x = element_text(margin = margin(t = 10)),
axis.title.y = element_text(margin = margin(r = 10))
)
ggsave("Output/Predicted Probability of MTA Count.png", plot = figure_6, width = 6, height = 5, units = "in")
figure_7 <- plot_predictions(model_tp_mtad, condition = "tp_total_mtad") +
labs(
x = "Third Party Raw MTA Depth Total",
y = "Probability of Early Settlement",
) +
theme_bw(base_family = "serif") +
theme(
plot.title = element_text(size = 16),
axis.title = element_text(size = 14),
axis.text = element_text(size = 12),
legend.title = element_text(size = 13),
legend.text = element_text(size = 11),
axis.title.x = element_text(margin = margin(t = 10)),
axis.title.y = element_text(margin = margin(r = 10))
)
ggsave("Output/Predicted Probability of MTA Depth.png", plot = figure_7, width = 6, height = 5, units = "in")
figure_8 <- plot_predictions(model_tp_trade, condition = "total_tp_volume") +
labs(
x = "Third Party Raw Trade Volume Total",
y = "Probability of Early Settlement",
) +
theme_bw(base_family = "serif") +
theme(
plot.title = element_text(size = 16),
axis.title = element_text(size = 14),
axis.text = element_text(size = 12),
legend.title = element_text(size = 13),
legend.text = element_text(size = 11),
axis.title.x = element_text(margin = margin(t = 10)),
axis.title.y = element_text(margin = margin(r = 10))
)
ggsave("Output/Predicted Probability of Trade Volume.png", plot = figure_8, width = 6, height = 5, units = "in")