Spaces:

bics-berkeley
/

partisan-disease-simulator

Sleeping

App Files Files Community

partisan-disease-simulator / R /data_processing.R

chrissoria

Fix invisible lines in plots and Total infected percentage bug

edefa8f 3 months ago

raw

history blame contribute delete

33 kB

	#' Add epidemic metrics to simulation output
	#' Matches variable naming from disease_model_data_prep_helper.R
	#'
	#' @param data Raw simulation output from sir_three_group_pu()
	#' @return Data frame with added epidemic metrics
	add_epidemic_metrics <- function(data) {
	data %>%
	mutate(
	# Core calculations - prevalence (current infections)
	total_prevalence = (IUa + IPa) + (IUb + IPb) + (IUc + IPc),
	rep_prevalence = IUa + IPa,
	dem_prevalence = IUb + IPb,
	ind_prevalence = IUc + IPc,

	# Susceptible
	rep_susceptible = SUa + SPa,
	dem_susceptible = SUb + SPb,
	ind_susceptible = SUc + SPc,

	# Recovered
	rep_recovered = RUa + RPa,
	dem_recovered = RUb + RPb,
	ind_recovered = RUc + RPc,

	# Population denominators
	rep_pop = SUa + IUa + RUa + SPa + IPa + RPa,
	dem_pop = SUb + IUb + RUb + SPb + IPb + RPb,
	ind_pop = SUc + IUc + RUc + SPc + IPc + RPc,
	total_pop = rep_pop + dem_pop + ind_pop,

	# Susceptible proportions
	rep_prop_susceptible = (SUa + SPa) / rep_pop,
	dem_prop_susceptible = (SUb + SPb) / dem_pop,
	ind_prop_susceptible = (SUc + SPc) / ind_pop,

	# Prevalence proportions
	rep_prop_prevalence = (IUa + IPa) / rep_pop,
	dem_prop_prevalence = (IUb + IPb) / dem_pop,
	ind_prop_prevalence = (IUc + IPc) / ind_pop,
	total_prop_prevalence = (IUa + IPa + IUb + IPb + IUc + IPc) / total_pop,

	# Recovered proportions
	rep_prop_recovered = (RUa + RPa) / rep_pop,
	dem_prop_recovered = (RUb + RPb) / dem_pop,
	ind_prop_recovered = (RUc + RPc) / ind_pop,

	# Mortality
	total_dead = DUa + DPa + DUb + DPb + DUc + DPc,
	rep_dead = DUa + DPa,
	dem_dead = DUb + DPb,
	ind_dead = DUc + DPc,
	total_new_deaths = total_dead - lag(total_dead, default = 0),

	# Proportion dead (using current population as denominator)
	total_prop_dead = total_dead / total_pop,
	rep_prop_dead = rep_dead / rep_pop,
	dem_prop_dead = dem_dead / dem_pop,
	ind_prop_dead = ind_dead / ind_pop
	)
	}

	#' Add parameters to dataset as columns
	#' Matches function from disease_model_data_prep_helper.R
	#'
	#' @param data Simulation data frame
	#' @param parameters_list Named list of parameters
	#' @return Data frame with parameter columns added
	add_parameters_to_dataset <- function(data, parameters_list) {
	for (param_name in names(parameters_list)) {
	col_name <- if (param_name == "time") "max_time" else param_name
	data[[col_name]] <- parameters_list[[param_name]]
	}
	return(data)
	}

	#' Generate contact matrices for each time step
	#' Matches function from disease_model_data_prep_helper.R
	#'
	#' @param df Simulation data frame with epidemic metrics
	#' @param params List with beta_a and beta_b parameters
	#' @return List with modified_df and ac_mats
	generate_contact_matrices <- function(df, params = list()) {
	# Add empty contact columns directly to input dataframe
	contact_cols <- c("R_R", "R_D", "R_I", "D_R", "D_D", "D_I", "I_R", "I_D", "I_I")
	df[contact_cols] <- NA

	# Initialize list to store matrices
	ac_mats <- list()

	# Calculate contact matrices and populate columns directly in df
	for (r in 1:nrow(df)) {
	ac_mat <- avg_contact_matrix_3gp(
	dbar_a = df$ca[r],
	dbar_b = df$cb[r],
	dbar_c = df$cc[r],
	N_a = df$rep_pop[r],
	N_b = df$dem_pop[r],
	N_c = df$ind_pop[r],
	beta_a = params$beta_a,
	beta_b = params$beta_b
	)

	# Store matrix and update df directly
	ac_mats[[paste0("step_", r)]] <- ac_mat
	df[r, c("R_R", "R_D", "R_I")] <- ac_mat[1, ]
	df[r, c("D_R", "D_D", "D_I")] <- ac_mat[2, ]
	df[r, c("I_R", "I_D", "I_I")] <- ac_mat[3, ]
	}

	# Add summary calculations to original df
	df <- df %>%
	mutate(
	total_ca = R_R + R_D + R_I,
	ca_a = R_R,
	ca_b = R_D,
	ca_c = R_I,
	total_cb = D_R + D_D + D_I,
	cb_a = D_R,
	cb_b = D_D,
	cb_c = D_I,
	total_cc = I_R + I_D + I_I,
	cc_a = I_R,
	cc_b = I_D,
	cc_c = I_I
	)

	return(list(modified_df = df, ac_mats = ac_mats))
	}

	#' Add additional epidemic columns including force of infection and incidence
	#' Matches function from disease_model_data_prep_helper.R
	#'
	#' @param data Simulation data frame with contact matrices
	#' @return Data frame with additional columns
	add_additional_epidemic_columns <- function(data) {
	data %>%
	mutate(
	# Force of infection components
	lambda_aa = trans_p * (ca_a * (IUa / rep_pop + kappa * IPa / rep_pop)),
	lambda_ab = trans_p * (ca_b * (IUb / dem_pop + kappa * IPb / dem_pop)),
	lambda_ac = trans_p * (ca_c * (IUc / ind_pop + kappa * IPc / ind_pop)),
	lambda_a = lambda_aa + lambda_ab + lambda_ac,
	lambda_ba = trans_p * (cb_a * (IUa / rep_pop + kappa * IPa / rep_pop)),
	lambda_bb = trans_p * (cb_b * (IUb / dem_pop + kappa * IPb / dem_pop)),
	lambda_bc = trans_p * (cb_c * (IUc / ind_pop + kappa * IPc / ind_pop)),
	lambda_b = lambda_ba + lambda_bb + lambda_bc,
	lambda_ca = trans_p * (cc_a * (IUa / rep_pop + kappa * IPa / rep_pop)),
	lambda_cb = trans_p * (cc_b * (IUb / dem_pop + kappa * IPb / dem_pop)),
	lambda_cc = trans_p * (cc_c * (IUc / ind_pop + kappa * IPc / ind_pop)),
	lambda_c = lambda_ca + lambda_cb + lambda_cc,
	lambda = lambda_a + lambda_b + lambda_c,

	# Source of infections - proportions
	prop_rep_infections_from_reps = lambda_aa / lambda_a,
	prop_rep_infections_from_dems = lambda_ab / lambda_a,
	prop_rep_infections_from_inds = lambda_ac / lambda_a,
	prop_dem_infections_from_reps = lambda_ba / lambda_b,
	prop_dem_infections_from_dems = lambda_bb / lambda_b,
	prop_dem_infections_from_inds = lambda_bc / lambda_b,
	prop_ind_infections_from_reps = lambda_ca / lambda_c,
	prop_ind_infections_from_dems = lambda_cb / lambda_c,
	prop_ind_infections_from_inds = lambda_cc / lambda_c,

	# Incidence by protection status
	rep_unprotected_incidence = if_else(row_number() == 1, I0_a[1],
	(SUa * lambda_a)
	),
	rep_protected_incidence = if_else(row_number() == 1, 0,
	(SPa * lambda_a * kappa)
	),
	dem_unprotected_incidence = if_else(row_number() == 1, I0_b[1],
	(SUb * lambda_b)
	),
	dem_protected_incidence = if_else(row_number() == 1, 0,
	(SPb * lambda_b * kappa)
	),
	ind_unprotected_incidence = if_else(row_number() == 1, I0_c[1],
	(SUc * lambda_c)
	),
	ind_protected_incidence = if_else(row_number() == 1, 0,
	(SPc * lambda_c * kappa)
	),

	# Total incidence
	rep_incidence = if_else(row_number() == 1, I0_a[1],
	(SUa * lambda_a) + (SPa * lambda_a * kappa)
	),
	rep_incidence_rate = (rep_incidence / rep_pop) * 100000,
	dem_incidence = if_else(row_number() == 1, I0_b[1],
	(SUb * lambda_b) + (SPb * lambda_b * kappa)
	),
	dem_incidence_rate = (dem_incidence / dem_pop) * 100000,
	ind_incidence = if_else(row_number() == 1, I0_c[1],
	(SUc * lambda_c) + (SPc * lambda_c * kappa)
	),
	ind_incidence_rate = (ind_incidence / ind_pop) * 100000,
	unprotected_incidence = rep_unprotected_incidence + dem_unprotected_incidence + ind_unprotected_incidence,
	protected_incidence = rep_protected_incidence + dem_protected_incidence + ind_protected_incidence,
	total_incidence = unprotected_incidence + protected_incidence,
	total_incidence_rate = (total_incidence / total_pop) * 100000,

	# Incidence as proportions
	rep_prop_infected = rep_incidence / rep_pop,
	dem_prop_infected = dem_incidence / dem_pop,
	ind_prop_infected = ind_incidence / ind_pop,

	# Deaths as proportions (using initial population)
	rep_prop_dead = rep_dead / (N0[1] * frac_a[1]),
	dem_prop_dead = dem_dead / (N0[1] * frac_b[1]),
	ind_prop_dead = ind_dead / (N0[1] * (1 - (frac_a[1] + frac_b[1]))),
	total_prop_dead = total_dead / (N0[1]),

	# Protection proportions
	rep_prop_protected = (SPa + IPa + RPa) / rep_pop,
	dem_prop_protected = (SPb + IPb + RPb) / dem_pop,
	ind_prop_protected = (SPc + IPc + RPc) / ind_pop,

	# Cases from disease model
	new_cases_a = Casesa - lag(Casesa, default = 1),
	new_cases_b = Casesb - lag(Casesb, default = 1),
	new_cases_c = Casesc - lag(Casesc, default = 1),

	# Cumulative cases
	rep_cumulative_cases = Casesa,
	dem_cumulative_cases = Casesb,
	ind_cumulative_cases = Casesc,

	# Total contacts in population
	total_pop_contacts = ((ca * rep_pop) + (cb * dem_pop) + (cc * ind_pop)) / total_pop
	)
	}

	#' Calculate R0 from simulation data
	#' Matches function from disease_model_data_prep_helper.R
	#'
	#' @param data Simulation data frame with all metrics
	#' @return Numeric R0 value
	calculate_R0 <- function(data) {
	# Pull initial parameters from data

	trans_p <- data$trans_p[1]
	ca_a <- data$ca_a[1]
	ca_b <- data$ca_b[1]
	ca_c <- data$ca_c[1]
	cb_a <- data$cb_a[1]
	cb_b <- data$cb_b[1]
	cb_c <- data$cb_c[1]
	cc_a <- data$cc_a[1]
	cc_b <- data$cc_b[1]
	cc_c <- data$cc_c[1]

	# Fractions
	frac_a <- data$frac_a[1]
	frac_b <- data$frac_b[1]
	frac_c <- 1 - frac_a - frac_b

	# Protection parameters
	kappa <- data$kappa[1]

	# Get the proportion of each party protected
	prop_protected_a <- data$rep_prop_protected[1]
	prop_protected_b <- data$dem_prop_protected[1]
	prop_protected_c <- data$ind_prop_protected[1]

	# Effective transmissibility for each group based on protection status
	eff_trans_a <- trans_p * ((1 - prop_protected_a) + kappa * prop_protected_a)
	eff_trans_b <- trans_p * ((1 - prop_protected_b) + kappa * prop_protected_b)
	eff_trans_c <- trans_p * ((1 - prop_protected_c) + kappa * prop_protected_c)

	# Receiving infection (rows of NGM)
	eff_susc_a <- ((1 - prop_protected_a) + kappa * prop_protected_a)
	eff_susc_b <- ((1 - prop_protected_b) + kappa * prop_protected_b)
	eff_susc_c <- ((1 - prop_protected_c) + kappa * prop_protected_c)

	# Infectious period (inverse of recovery rate)
	infectious_period <- 1 / data$rho[1]

	# Next-generation matrix
	NGM <- matrix(0, nrow = 3, ncol = 3)

	# Fill the NGM with transmission rates between groups
	NGM[1, 1] <- eff_trans_a * ca_a * infectious_period * eff_susc_a
	NGM[1, 2] <- eff_trans_b * ca_b * infectious_period * eff_susc_a
	NGM[1, 3] <- eff_trans_c * ca_c * infectious_period * eff_susc_a

	NGM[2, 1] <- eff_trans_a * cb_a * infectious_period * eff_susc_b
	NGM[2, 2] <- eff_trans_b * cb_b * infectious_period * eff_susc_b
	NGM[2, 3] <- eff_trans_c * cb_c * infectious_period * eff_susc_b

	NGM[3, 1] <- eff_trans_a * cc_a * infectious_period * eff_susc_c
	NGM[3, 2] <- eff_trans_b * cc_b * infectious_period * eff_susc_c
	NGM[3, 3] <- eff_trans_c * cc_c * infectious_period * eff_susc_c

	# R0 as the dominant eigenvalue of the NGM
	R0 <- max(abs(eigen(NGM)$values))

	return(R0)
	}

	#' Calculate epidemic peak and timing metrics
	#'
	#' @param sim_data Processed simulation data
	#' @return List with peak and timing metrics
	calculate_peak_metrics <- function(sim_data) {
	# Support both naming conventions
	if ("total_prevalence" %in% names(sim_data)) {
	peak_prevalence <- max(sim_data$total_prevalence, na.rm = TRUE)
	} else {
	peak_prevalence <- max(sim_data$total_infected, na.rm = TRUE)
	}

	if ("total_prop_prevalence" %in% names(sim_data)) {
	peak_prop_prevalence <- max(sim_data$total_prop_prevalence, na.rm = TRUE)
	crossing_time <- sim_data %>%
	filter(total_prop_prevalence >= peak_prop_prevalence) %>%
	slice(1) %>%
	pull(time)
	} else {
	peak_prop_prevalence <- max(sim_data$total_prop_infected, na.rm = TRUE)
	crossing_time <- sim_data %>%
	filter(total_prop_infected >= peak_prop_prevalence) %>%
	slice(1) %>%
	pull(time)
	}

	inflection_point <- sim_data %>%
	filter(total_new_deaths == max(total_new_deaths, na.rm = TRUE)) %>%
	pull(time)

	list(
	peak_prevalence = peak_prevalence,
	peak_prop_prevalence = peak_prop_prevalence,
	# Legacy names for backward compatibility
	peak_infected = peak_prevalence,
	peak_proportion = peak_prop_prevalence,
	crossing_time = crossing_time,
	inflection_point = inflection_point
	)
	}

	#' Calculate outbreak size and AUC metrics
	#'
	#' @param sim_data Processed simulation data
	#' @return List with outbreak size metrics
	calculate_outbreak_metrics <- function(sim_data) {
	# Use total_incidence if available, then total_prevalence, then total_infected (legacy)
	if ("total_incidence" %in% names(sim_data)) {
	outbreak_size <- auc(x = sim_data$time, y = sim_data$total_incidence)
	} else if ("total_prevalence" %in% names(sim_data)) {
	outbreak_size <- auc(x = sim_data$time, y = sim_data$total_prevalence)
	} else {
	outbreak_size <- auc(x = sim_data$time, y = sim_data$total_infected)
	}

	# Support both naming conventions for proportion prevalence
	if ("total_prop_prevalence" %in% names(sim_data)) {
	auc_value <- auc(x = sim_data$time, y = sim_data$total_prop_prevalence)
	} else {
	auc_value <- auc(x = sim_data$time, y = sim_data$total_prop_infected)
	}
	auc_value <- round(auc_value, digits = 2)

	# Calculate cumulative proportion ever infected (recovered + dead) / initial pop
	last_row <- sim_data %>% slice(n())
	first_row <- sim_data %>% slice(1)
	initial_pop <- first_row$total_pop

	# Total ever infected = recovered + dead (those who are no longer susceptible or currently infected)
	total_recovered <- with(last_row, RUa + RPa + RUb + RPb + RUc + RPc)
	total_dead <- with(last_row, DUa + DPa + DUb + DPb + DUc + DPc)
	cumulative_infected_prop <- (total_recovered + total_dead) / initial_pop
	cumulative_infected_prop <- round(cumulative_infected_prop, digits = 4)

	list(
	outbreak_size = outbreak_size,
	auc = auc_value,
	cumulative_proportion_infected = cumulative_infected_prop
	)
	}

	#' Calculate population shrinkage metrics
	#'
	#' @param sim_data Processed simulation data
	#' @return List with population shrinkage percentages
	calculate_population_shrinkage <- function(sim_data) {
	last_row <- sim_data %>% slice(n())
	first_row <- sim_data %>% slice(1)

	# Support both naming conventions (rep_pop vs pop_a)
	if ("rep_pop" %in% names(sim_data)) {
	final_rep_pop <- last_row %>% pull(rep_pop)
	final_dem_pop <- last_row %>% pull(dem_pop)
	final_ind_pop <- last_row %>% pull(ind_pop)
	init_rep_pop <- first_row %>% pull(rep_pop)
	init_dem_pop <- first_row %>% pull(dem_pop)
	init_ind_pop <- first_row %>% pull(ind_pop)
	} else {
	final_rep_pop <- last_row %>% pull(pop_a)
	final_dem_pop <- last_row %>% pull(pop_b)
	final_ind_pop <- last_row %>% pull(pop_c)
	init_rep_pop <- first_row %>% pull(pop_a)
	init_dem_pop <- first_row %>% pull(pop_b)
	init_ind_pop <- first_row %>% pull(pop_c)
	}
	final_tot_pop <- last_row %>% pull(total_pop)
	init_tot_pop <- first_row %>% pull(total_pop)

	list(
	rep_shrink = round(((init_rep_pop - final_rep_pop) / init_rep_pop) * 100, 2),
	dem_shrink = round(((init_dem_pop - final_dem_pop) / init_dem_pop) * 100, 2),
	ind_shrink = round(((init_ind_pop - final_ind_pop) / init_ind_pop) * 100, 2),
	tot_shrink = round(((init_tot_pop - final_tot_pop) / init_tot_pop) * 100, 2)
	)
	}

	#' Calculate partisan death comparison metrics
	#'
	#' @param sim_data Processed simulation data
	#' @return List with death comparison metrics
	calculate_death_metrics <- function(sim_data) {
	last_row <- sim_data %>% slice_tail(n = 1)

	# Support both naming conventions (rep_dead vs dead_reps)
	if ("rep_dead" %in% names(sim_data)) {
	rep_deaths <- last_row %>% pull(rep_dead)
	dem_deaths <- last_row %>% pull(dem_dead)
	ind_deaths <- last_row %>% pull(ind_dead)
	} else {
	rep_deaths <- last_row %>% pull(dead_reps)
	dem_deaths <- last_row %>% pull(dead_dems)
	ind_deaths <- last_row %>% pull(dead_inds)
	}

	diff_deaths <- ((rep_deaths - dem_deaths) / dem_deaths) * 100
	diff_deaths <- round(diff_deaths, digits = 2)

	list(
	rep_dead = rep_deaths,
	dem_dead = dem_deaths,
	ind_dead = ind_deaths,
	# Legacy names for backward compatibility
	rep_deaths = rep_deaths,
	dem_deaths = dem_deaths,
	ind_deaths = ind_deaths,
	diff_deaths = diff_deaths
	)
	}

	#' Calculate partisan-specific outbreak sizes
	#'
	#' @param sim_data Processed simulation data with incidence
	#' @return List with partisan outbreak sizes and comparisons
	calculate_partisan_outbreak_sizes <- function(sim_data) {
	# Support both naming conventions (rep_incidence and rep_new_cases)
	rep_inc_col <- if ("rep_incidence" %in% names(sim_data)) {
	sim_data$rep_incidence
	} else {
	sim_data$rep_new_cases
	}
	dem_inc_col <- if ("dem_incidence" %in% names(sim_data)) {
	sim_data$dem_incidence
	} else {
	sim_data$dem_new_cases
	}
	ind_inc_col <- if ("ind_incidence" %in% names(sim_data)) {
	sim_data$ind_incidence
	} else {
	sim_data$ind_new_cases
	}

	rep_outbreak <- auc(x = sim_data$time, y = rep_inc_col)
	dem_outbreak <- auc(x = sim_data$time, y = dem_inc_col)
	ind_outbreak <- auc(x = sim_data$time, y = ind_inc_col)

	rep_vs_dem <- round(((rep_outbreak - dem_outbreak) / dem_outbreak) * 100, 2)

	list(
	rep_outbreak = rep_outbreak,
	dem_outbreak = dem_outbreak,
	ind_outbreak = ind_outbreak,
	rep_vs_dem_percentage = rep_vs_dem
	)
	}

	#' Calculate comparison metrics between current and null models
	#'
	#' @param current_metrics List of metrics for current model
	#' @param null_metrics List of metrics for null model
	#' @return List with comparison metrics
	calculate_comparison_metrics <- function(current_metrics, null_metrics) {
	outbreak_increase <- ((current_metrics$outbreak_size - null_metrics$outbreak_size) /
	null_metrics$outbreak_size) * 100
	outbreak_increase <- round(outbreak_increase, 2)

	# Support both naming conventions (peak_prop_prevalence and peak_proportion)
	current_peak <- if (!is.null(current_metrics$peak_prop_prevalence)) {
	current_metrics$peak_prop_prevalence
	} else {
	current_metrics$peak_proportion
	}

	null_peak <- if (!is.null(null_metrics$peak_prop_prevalence)) {
	null_metrics$peak_prop_prevalence
	} else {
	null_metrics$peak_proportion
	}

	peak_increase <- ((current_peak - null_peak) / null_peak) * 100
	peak_increase <- round(peak_increase, 2)

	crossing_time_diff <- null_metrics$crossing_time - current_metrics$crossing_time
	peak_diff <- current_peak - null_peak

	list(
	outbreak_size_increase = outbreak_increase,
	peak_increase = peak_increase,
	crossing_time_diff = crossing_time_diff,
	peak_diff = peak_diff
	)
	}

	#' Run full data processing pipeline
	#' Processes raw simulation output through all stages
	#'
	#' @param sim_output Raw simulation output from sir_three_group_pu()
	#' @param current_params List of current parameters
	#' @param null_model Optional processed null model for comparisons
	#' @param dem_model Optional processed dem model for comparisons
	#' @return List with processed simulation data and all metrics
	run_full_processing <- function(sim_output, current_params, null_model = NULL, dem_model = NULL) {
	# Step 1: Add basic epidemic metrics

	sim_data <- add_epidemic_metrics(sim_output)

	# Step 2: Add parameters to dataset
	sim_data <- add_parameters_to_dataset(sim_data, current_params)

	# Step 3: Generate contact matrices
	contact_result <- generate_contact_matrices(
	sim_data,
	list(beta_a = current_params$beta_a, beta_b = current_params$beta_b)
	)
	sim_data <- contact_result$modified_df

	# Step 4: Add additional epidemic columns (force of infection, incidence, etc.)
	sim_data <- add_additional_epidemic_columns(sim_data)

	# Step 5: Add comparison columns if null/dem models provided
	if (!is.null(null_model)) {
	sim_data <- sim_data %>%
	mutate(
	null_total_prop_prevalence = null_model$total_prop_prevalence,
	null_total_prop_dead = null_model$total_prop_dead,
	null_total_pop = null_model$total_pop,
	null_total_incidence = null_model$total_incidence
	)
	}

	if (!is.null(dem_model)) {
	sim_data <- sim_data %>%
	mutate(
	dem_total_prop_prevalence = dem_model$total_prop_prevalence,
	dem_total_prop_dead = dem_model$total_prop_dead,
	dem_total_pop = dem_model$total_pop,
	dem_total_incidence = dem_model$total_incidence
	)
	}

	# Calculate all metrics
	peak_metrics <- calculate_peak_metrics(sim_data)
	outbreak_metrics <- calculate_outbreak_metrics(sim_data)
	pop_shrinkage <- calculate_population_shrinkage(sim_data)
	death_metrics <- calculate_death_metrics(sim_data)
	partisan_outbreaks <- calculate_partisan_outbreak_sizes(sim_data)
	R0 <- calculate_R0(sim_data)

	list(
	sim_data = sim_data,
	contact_matrices = contact_result$ac_mats,
	peak_metrics = peak_metrics,
	outbreak_metrics = outbreak_metrics,
	pop_shrinkage = pop_shrinkage,
	death_metrics = death_metrics,
	partisan_outbreaks = partisan_outbreaks,
	R0 = R0
	)
	}

	# ============================================================================
	# Legacy function names for backward compatibility with existing Shiny code
	# These wrap the new functions but maintain the old interface
	# ============================================================================

	#' Process simulation output (legacy wrapper)
	#' @param sim_output Raw simulation output
	#' @return Data frame with epidemic metrics
	process_simulation_output <- function(sim_output) {
	add_epidemic_metrics(sim_output)
	}

	#' Process main simulation with comparison models (legacy wrapper)
	#' @param sim_output Raw simulation output
	#' @param null_model Processed null model
	#' @param dem_model Processed dem model
	#' @return Data frame with all metrics
	process_main_simulation <- function(sim_output, null_model, dem_model) {
	sim_data <- add_epidemic_metrics(sim_output)

	# Add comparison columns

	sim_data %>%
	mutate(
	# Legacy column names for compatibility
	total_infected = total_prevalence,
	rep_infected = rep_prevalence,
	dem_infected = dem_prevalence,
	ind_infected = ind_prevalence,
	cumulative_infected = cumsum(total_prevalence),
	cumulative_rep_infected = cumsum(rep_prevalence),
	cumulative_dem_infected = cumsum(dem_prevalence),
	cumulative_ind_infected = cumsum(ind_prevalence),
	total_new_cases = total_prevalence - lag(total_prevalence, default = 0),
	pop_a = rep_pop,
	pop_b = dem_pop,
	pop_c = ind_pop,
	total_prop_infected = total_prop_prevalence,
	rep_prop_infected = rep_prop_prevalence,
	dem_prop_infected = dem_prop_prevalence,
	ind_prop_infected = ind_prop_prevalence,
	cumulative_prop_infected = cumsum(total_prop_prevalence) / n(),
	cumulative_rep_prop_infected = cumsum(rep_prop_prevalence),
	cumulative_dem_prop_infected = cumsum(dem_prop_prevalence),
	cumulative_ind_prop_infected = cumsum(ind_prop_prevalence),
	null_total_prop_infected = null_model$total_prop_prevalence,
	dem_total_prop_infected = dem_model$total_prop_prevalence,
	dead_reps = rep_dead,
	dead_dems = dem_dead,
	dead_inds = ind_dead,
	total_prop_dead = total_dead / total_pop,
	null_total_prop_dead = null_model$total_prop_dead,
	dem_total_prop_dead = dem_model$total_prop_dead,
	total_daily_mortality_rate = ifelse(is.na(total_pop) \| total_pop == 0, NA,
	total_new_deaths / total_pop
	),
	dem_total_pop = dem_model$total_pop,
	dem_total_new_cases = dem_model$total_prevalence - lag(dem_model$total_prevalence, default = 0),
	null_total_pop = null_model$total_pop,
	null_total_new_cases = null_model$total_prevalence - lag(null_model$total_prevalence, default = 0),
	null_total_prop_cases = (null_model$total_prevalence - lag(null_model$total_prevalence, default = 0)) / null_model$total_pop,
	# Calculate null incidence rate (per 100,000)
	null_total_incidence_rate = (null_model$total_prevalence - lag(null_model$total_prevalence, default = 0)) / null_model$total_pop * 100000
	)
	}

	#' Calculate contacts over time (legacy wrapper)
	#' @param sim_data Processed simulation data
	#' @param beta_a Homophily parameter A
	#' @param beta_b Homophily parameter B
	#' @return Data frame with contact matrices
	calculate_contacts_over_time <- function(sim_data, beta_a, beta_b) {
	# Create contacts dataframe structure expected by legacy code
	contacts_df <- data.frame(time = sim_data$time) %>%
	mutate(
	R_R = NA, R_D = NA, R_I = NA,
	D_D = NA, D_R = NA, D_I = NA,
	I_I = NA, I_R = NA, I_D = NA
	)

	for (r in 1:nrow(sim_data)) {
	# Use rep_pop/dem_pop/ind_pop if available, otherwise fall back to pop_a/pop_b/pop_c
	N_a <- if ("rep_pop" %in% names(sim_data)) sim_data$rep_pop[r] else sim_data$pop_a[r]
	N_b <- if ("dem_pop" %in% names(sim_data)) sim_data$dem_pop[r] else sim_data$pop_b[r]
	N_c <- if ("ind_pop" %in% names(sim_data)) sim_data$ind_pop[r] else sim_data$pop_c[r]

	ac_mat <- avg_contact_matrix_3gp(
	dbar_a = sim_data$ca[r],
	dbar_b = sim_data$cb[r],
	dbar_c = sim_data$cc[r],
	N_a = N_a,
	N_b = N_b,
	N_c = N_c,
	beta_a = beta_a,
	beta_b = beta_b
	)

	contacts_df$R_R[r] <- ac_mat[1, 1]
	contacts_df$R_D[r] <- ac_mat[1, 2]
	contacts_df$R_I[r] <- ac_mat[1, 3]

	contacts_df$D_R[r] <- ac_mat[2, 1]
	contacts_df$D_D[r] <- ac_mat[2, 2]
	contacts_df$D_I[r] <- ac_mat[2, 3]

	contacts_df$I_R[r] <- ac_mat[3, 1]
	contacts_df$I_D[r] <- ac_mat[3, 2]
	contacts_df$I_I[r] <- ac_mat[3, 3]
	}

	contacts_df <- contacts_df %>%
	mutate(
	Total_R_Contacts = R_R + R_D + R_I,
	Total_D_Contacts = D_R + D_D + D_I,
	Total_I_Contacts = I_R + I_D + I_I,
	N_a = if ("rep_pop" %in% names(sim_data)) sim_data$rep_pop else sim_data$pop_a,
	N_b = if ("dem_pop" %in% names(sim_data)) sim_data$dem_pop else sim_data$pop_b,
	N_c = if ("ind_pop" %in% names(sim_data)) sim_data$ind_pop else sim_data$pop_c
	)

	return(contacts_df)
	}

	#' Add force of infection to simulation data (legacy wrapper)
	#' @param sim_data Processed simulation data
	#' @param contacts_df Contact matrix data
	#' @param current_params Current parameters list
	#' @return Data frame with force of infection metrics
	add_force_of_infection <- function(sim_data, contacts_df, current_params) {
	# Use rep_pop/dem_pop/ind_pop if available
	pop_a_col <- if ("rep_pop" %in% names(sim_data)) sim_data$rep_pop else sim_data$pop_a
	pop_b_col <- if ("dem_pop" %in% names(sim_data)) sim_data$dem_pop else sim_data$pop_b
	pop_c_col <- if ("ind_pop" %in% names(sim_data)) sim_data$ind_pop else sim_data$pop_c

	sim_data %>%
	mutate(
	N0 = current_params$N0,
	trans_p = current_params$trans_p,
	kappa = current_params$kappa,
	ell = current_params$ell,
	rho = current_params$rho,
	vstart = current_params$vstart,
	gamma = current_params$gamma,
	frac_a = current_params$frac_a,
	frac_b = current_params$frac_b,
	pi_a = current_params$pi_a,
	pi_b = current_params$pi_b,
	pi_c = current_params$pi_c,
	phi_a = current_params$phi_a,
	phi_b = current_params$phi_b,
	phi_c = current_params$phi_c,
	zeta_a = current_params$zeta_a,
	zeta_b = current_params$zeta_b,
	zeta_c = current_params$zeta_c,
	mu_a = current_params$mu_a,
	mu_b = current_params$mu_b,
	mu_c = current_params$mu_c,
	vacc_a = current_params$vacc_a,
	vacc_b = current_params$vacc_b,
	vacc_c = current_params$vacc_c,
	I0_a = current_params$I0_a,
	I0_b = current_params$I0_b,
	I0_c = current_params$I0_c,
	beta_a = current_params$beta_a,
	beta_b = current_params$beta_b,
	ca_a = contacts_df$R_R,
	ca_b = contacts_df$R_D,
	ca_c = contacts_df$R_I,
	cb_a = contacts_df$D_R,
	cb_b = contacts_df$D_D,
	cb_c = contacts_df$D_I,
	cc_a = contacts_df$I_R,
	cc_b = contacts_df$I_D,
	cc_c = contacts_df$I_I,
	# Force of infection components
	lambda_aa = trans_p * (ca_a * (IUa / pop_a_col + kappa * IPa / pop_a_col)),
	lambda_ab = trans_p * (ca_b * (IUb / pop_b_col + kappa * IPb / pop_b_col)),
	lambda_ac = trans_p * (ca_c * (IUc / pop_c_col + kappa * IPc / pop_c_col)),
	lambda_a = lambda_aa + lambda_ab + lambda_ac,
	lambda_ba = trans_p * (cb_a * (IUa / pop_a_col + kappa * IPa / pop_a_col)),
	lambda_bb = trans_p * (cb_b * (IUb / pop_b_col + kappa * IPb / pop_b_col)),
	lambda_bc = trans_p * (cb_c * (IUc / pop_c_col + kappa * IPc / pop_c_col)),
	lambda_b = lambda_ba + lambda_bb + lambda_bc,
	lambda_ca = trans_p * (cc_a * (IUa / pop_a_col + kappa * IPa / pop_a_col)),
	lambda_cb = trans_p * (cc_b * (IUb / pop_b_col + kappa * IPb / pop_b_col)),
	lambda_cc = trans_p * (cc_c * (IUc / pop_c_col + kappa * IPc / pop_c_col)),
	lambda_c = lambda_ca + lambda_cb + lambda_cc,
	# Proportion of infections by source
	prop_rep_infections_from_reps = lambda_aa / lambda_a,
	prop_rep_infections_from_dems = lambda_ab / lambda_a,
	prop_rep_infections_from_inds = lambda_ac / lambda_a,
	prop_dem_infections_from_reps = lambda_ba / lambda_b,
	prop_dem_infections_from_dems = lambda_bb / lambda_b,
	prop_dem_infections_from_inds = lambda_bc / lambda_b,
	prop_ind_infections_from_reps = lambda_ca / lambda_c,
	prop_ind_infections_from_dems = lambda_cb / lambda_c,
	prop_ind_infections_from_inds = lambda_cc / lambda_c,
	# Incidence by source for Republicans
	rep_from_rep_new_cases = if_else(row_number() == 1, 1 / 3,
	(SUa * lambda_aa) + (SPa * lambda_aa * kappa)
	),
	rep_from_dem_new_cases = if_else(row_number() == 1, 1 / 3,
	(SUa * lambda_ab) + (SPa * lambda_ab * kappa)
	),
	rep_from_ind_new_cases = if_else(row_number() == 1, 1 / 3,
	(SUa * lambda_ac) + (SPa * lambda_ac * kappa)
	),
	rep_new_cases = rep_from_rep_new_cases + rep_from_dem_new_cases + rep_from_ind_new_cases,
	# Incidence by source for Democrats
	dem_from_rep_new_cases = if_else(row_number() == 1, 1 / 3,
	(SUb * lambda_ba) + (SPb * lambda_ba * kappa)
	),
	dem_from_dem_new_cases = if_else(row_number() == 1, 1 / 3,
	(SUb * lambda_bb) + (SPb * lambda_bb * kappa)
	),
	dem_from_ind_new_cases = if_else(row_number() == 1, 1 / 3,
	(SUb * lambda_bc) + (SPb * lambda_bc * kappa)
	),
	dem_new_cases = dem_from_rep_new_cases + dem_from_dem_new_cases + dem_from_ind_new_cases,
	# Incidence by source for Independents
	ind_from_rep_new_cases = if_else(row_number() == 1, 1 / 3,
	(SUc * lambda_ca) + (SPc * lambda_ca * kappa)
	),
	ind_from_dem_new_cases = if_else(row_number() == 1, 1 / 3,
	(SUc * lambda_cb) + (SPc * lambda_cb * kappa)
	),
	ind_from_ind_new_cases = if_else(row_number() == 1, 1 / 3,
	(SUc * lambda_cc) + (SPc * lambda_cc * kappa)
	),
	ind_new_cases = ind_from_rep_new_cases + ind_from_dem_new_cases + ind_from_ind_new_cases,
	total_new_cases = rep_new_cases + dem_new_cases + ind_new_cases,
	# Incidence rates (per 100,000)
	rep_incidence_rate = (rep_new_cases / pop_a_col) * 100000,
	dem_incidence_rate = (dem_new_cases / pop_b_col) * 100000,
	ind_incidence_rate = (ind_new_cases / pop_c_col) * 100000,
	total_incidence_rate = (total_new_cases / total_pop) * 100000,
	# Protection proportions
	rep_prop_protected = (SPa + IPa + RPa) / pop_a_col,
	dem_prop_protected = (SPb + IPb + RPb) / pop_b_col,
	ind_prop_protected = (SPc + IPc + RPc) / pop_c_col
	)
	}

	#' Extract All Model Metrics
	#'
	#' Convenience function to extract all relevant metrics from null and cautious models.
	#'
	#' @param null_model Processed null model output
	#' @param cautious_model Processed cautious model output
	#' @return List containing all extracted metrics
	extract_reference_model_metrics <- function(null_model, cautious_model) {
	# Calculate null model metrics

	null_peak <- calculate_peak_metrics(null_model)
	null_outbreak <- calculate_outbreak_metrics(null_model)

	# Calculate cautious model metrics
	cautious_peak <- calculate_peak_metrics(cautious_model)

	list(
	# Null (baseline) model metrics
	null = list(
	inflection_point = null_peak$inflection_point,
	outbreak_size = null_outbreak$outbreak_size,
	auc = null_outbreak$auc,
	peak_infected = null_peak$peak_infected,
	peak_proportion = null_peak$peak_proportion,
	crossing_time = null_peak$crossing_time
	),
	# Cautious model metrics
	cautious = list(
	peak_proportion = cautious_peak$peak_proportion,
	crossing_time = cautious_peak$crossing_time
	)
	)
	}