partisan-disease-simulator / R /data_processing.R
chrissoria's picture
Fix invisible lines in plots and Total infected percentage bug
edefa8f
#' Add epidemic metrics to simulation output
#' Matches variable naming from disease_model_data_prep_helper.R
#'
#' @param data Raw simulation output from sir_three_group_pu()
#' @return Data frame with added epidemic metrics
add_epidemic_metrics <- function(data) {
data %>%
mutate(
# Core calculations - prevalence (current infections)
total_prevalence = (IUa + IPa) + (IUb + IPb) + (IUc + IPc),
rep_prevalence = IUa + IPa,
dem_prevalence = IUb + IPb,
ind_prevalence = IUc + IPc,
# Susceptible
rep_susceptible = SUa + SPa,
dem_susceptible = SUb + SPb,
ind_susceptible = SUc + SPc,
# Recovered
rep_recovered = RUa + RPa,
dem_recovered = RUb + RPb,
ind_recovered = RUc + RPc,
# Population denominators
rep_pop = SUa + IUa + RUa + SPa + IPa + RPa,
dem_pop = SUb + IUb + RUb + SPb + IPb + RPb,
ind_pop = SUc + IUc + RUc + SPc + IPc + RPc,
total_pop = rep_pop + dem_pop + ind_pop,
# Susceptible proportions
rep_prop_susceptible = (SUa + SPa) / rep_pop,
dem_prop_susceptible = (SUb + SPb) / dem_pop,
ind_prop_susceptible = (SUc + SPc) / ind_pop,
# Prevalence proportions
rep_prop_prevalence = (IUa + IPa) / rep_pop,
dem_prop_prevalence = (IUb + IPb) / dem_pop,
ind_prop_prevalence = (IUc + IPc) / ind_pop,
total_prop_prevalence = (IUa + IPa + IUb + IPb + IUc + IPc) / total_pop,
# Recovered proportions
rep_prop_recovered = (RUa + RPa) / rep_pop,
dem_prop_recovered = (RUb + RPb) / dem_pop,
ind_prop_recovered = (RUc + RPc) / ind_pop,
# Mortality
total_dead = DUa + DPa + DUb + DPb + DUc + DPc,
rep_dead = DUa + DPa,
dem_dead = DUb + DPb,
ind_dead = DUc + DPc,
total_new_deaths = total_dead - lag(total_dead, default = 0),
# Proportion dead (using current population as denominator)
total_prop_dead = total_dead / total_pop,
rep_prop_dead = rep_dead / rep_pop,
dem_prop_dead = dem_dead / dem_pop,
ind_prop_dead = ind_dead / ind_pop
)
}
#' Add parameters to dataset as columns
#' Matches function from disease_model_data_prep_helper.R
#'
#' @param data Simulation data frame
#' @param parameters_list Named list of parameters
#' @return Data frame with parameter columns added
add_parameters_to_dataset <- function(data, parameters_list) {
for (param_name in names(parameters_list)) {
col_name <- if (param_name == "time") "max_time" else param_name
data[[col_name]] <- parameters_list[[param_name]]
}
return(data)
}
#' Generate contact matrices for each time step
#' Matches function from disease_model_data_prep_helper.R
#'
#' @param df Simulation data frame with epidemic metrics
#' @param params List with beta_a and beta_b parameters
#' @return List with modified_df and ac_mats
generate_contact_matrices <- function(df, params = list()) {
# Add empty contact columns directly to input dataframe
contact_cols <- c("R_R", "R_D", "R_I", "D_R", "D_D", "D_I", "I_R", "I_D", "I_I")
df[contact_cols] <- NA
# Initialize list to store matrices
ac_mats <- list()
# Calculate contact matrices and populate columns directly in df
for (r in 1:nrow(df)) {
ac_mat <- avg_contact_matrix_3gp(
dbar_a = df$ca[r],
dbar_b = df$cb[r],
dbar_c = df$cc[r],
N_a = df$rep_pop[r],
N_b = df$dem_pop[r],
N_c = df$ind_pop[r],
beta_a = params$beta_a,
beta_b = params$beta_b
)
# Store matrix and update df directly
ac_mats[[paste0("step_", r)]] <- ac_mat
df[r, c("R_R", "R_D", "R_I")] <- ac_mat[1, ]
df[r, c("D_R", "D_D", "D_I")] <- ac_mat[2, ]
df[r, c("I_R", "I_D", "I_I")] <- ac_mat[3, ]
}
# Add summary calculations to original df
df <- df %>%
mutate(
total_ca = R_R + R_D + R_I,
ca_a = R_R,
ca_b = R_D,
ca_c = R_I,
total_cb = D_R + D_D + D_I,
cb_a = D_R,
cb_b = D_D,
cb_c = D_I,
total_cc = I_R + I_D + I_I,
cc_a = I_R,
cc_b = I_D,
cc_c = I_I
)
return(list(modified_df = df, ac_mats = ac_mats))
}
#' Add additional epidemic columns including force of infection and incidence
#' Matches function from disease_model_data_prep_helper.R
#'
#' @param data Simulation data frame with contact matrices
#' @return Data frame with additional columns
add_additional_epidemic_columns <- function(data) {
data %>%
mutate(
# Force of infection components
lambda_aa = trans_p * (ca_a * (IUa / rep_pop + kappa * IPa / rep_pop)),
lambda_ab = trans_p * (ca_b * (IUb / dem_pop + kappa * IPb / dem_pop)),
lambda_ac = trans_p * (ca_c * (IUc / ind_pop + kappa * IPc / ind_pop)),
lambda_a = lambda_aa + lambda_ab + lambda_ac,
lambda_ba = trans_p * (cb_a * (IUa / rep_pop + kappa * IPa / rep_pop)),
lambda_bb = trans_p * (cb_b * (IUb / dem_pop + kappa * IPb / dem_pop)),
lambda_bc = trans_p * (cb_c * (IUc / ind_pop + kappa * IPc / ind_pop)),
lambda_b = lambda_ba + lambda_bb + lambda_bc,
lambda_ca = trans_p * (cc_a * (IUa / rep_pop + kappa * IPa / rep_pop)),
lambda_cb = trans_p * (cc_b * (IUb / dem_pop + kappa * IPb / dem_pop)),
lambda_cc = trans_p * (cc_c * (IUc / ind_pop + kappa * IPc / ind_pop)),
lambda_c = lambda_ca + lambda_cb + lambda_cc,
lambda = lambda_a + lambda_b + lambda_c,
# Source of infections - proportions
prop_rep_infections_from_reps = lambda_aa / lambda_a,
prop_rep_infections_from_dems = lambda_ab / lambda_a,
prop_rep_infections_from_inds = lambda_ac / lambda_a,
prop_dem_infections_from_reps = lambda_ba / lambda_b,
prop_dem_infections_from_dems = lambda_bb / lambda_b,
prop_dem_infections_from_inds = lambda_bc / lambda_b,
prop_ind_infections_from_reps = lambda_ca / lambda_c,
prop_ind_infections_from_dems = lambda_cb / lambda_c,
prop_ind_infections_from_inds = lambda_cc / lambda_c,
# Incidence by protection status
rep_unprotected_incidence = if_else(row_number() == 1, I0_a[1],
(SUa * lambda_a)
),
rep_protected_incidence = if_else(row_number() == 1, 0,
(SPa * lambda_a * kappa)
),
dem_unprotected_incidence = if_else(row_number() == 1, I0_b[1],
(SUb * lambda_b)
),
dem_protected_incidence = if_else(row_number() == 1, 0,
(SPb * lambda_b * kappa)
),
ind_unprotected_incidence = if_else(row_number() == 1, I0_c[1],
(SUc * lambda_c)
),
ind_protected_incidence = if_else(row_number() == 1, 0,
(SPc * lambda_c * kappa)
),
# Total incidence
rep_incidence = if_else(row_number() == 1, I0_a[1],
(SUa * lambda_a) + (SPa * lambda_a * kappa)
),
rep_incidence_rate = (rep_incidence / rep_pop) * 100000,
dem_incidence = if_else(row_number() == 1, I0_b[1],
(SUb * lambda_b) + (SPb * lambda_b * kappa)
),
dem_incidence_rate = (dem_incidence / dem_pop) * 100000,
ind_incidence = if_else(row_number() == 1, I0_c[1],
(SUc * lambda_c) + (SPc * lambda_c * kappa)
),
ind_incidence_rate = (ind_incidence / ind_pop) * 100000,
unprotected_incidence = rep_unprotected_incidence + dem_unprotected_incidence + ind_unprotected_incidence,
protected_incidence = rep_protected_incidence + dem_protected_incidence + ind_protected_incidence,
total_incidence = unprotected_incidence + protected_incidence,
total_incidence_rate = (total_incidence / total_pop) * 100000,
# Incidence as proportions
rep_prop_infected = rep_incidence / rep_pop,
dem_prop_infected = dem_incidence / dem_pop,
ind_prop_infected = ind_incidence / ind_pop,
# Deaths as proportions (using initial population)
rep_prop_dead = rep_dead / (N0[1] * frac_a[1]),
dem_prop_dead = dem_dead / (N0[1] * frac_b[1]),
ind_prop_dead = ind_dead / (N0[1] * (1 - (frac_a[1] + frac_b[1]))),
total_prop_dead = total_dead / (N0[1]),
# Protection proportions
rep_prop_protected = (SPa + IPa + RPa) / rep_pop,
dem_prop_protected = (SPb + IPb + RPb) / dem_pop,
ind_prop_protected = (SPc + IPc + RPc) / ind_pop,
# Cases from disease model
new_cases_a = Casesa - lag(Casesa, default = 1),
new_cases_b = Casesb - lag(Casesb, default = 1),
new_cases_c = Casesc - lag(Casesc, default = 1),
# Cumulative cases
rep_cumulative_cases = Casesa,
dem_cumulative_cases = Casesb,
ind_cumulative_cases = Casesc,
# Total contacts in population
total_pop_contacts = ((ca * rep_pop) + (cb * dem_pop) + (cc * ind_pop)) / total_pop
)
}
#' Calculate R0 from simulation data
#' Matches function from disease_model_data_prep_helper.R
#'
#' @param data Simulation data frame with all metrics
#' @return Numeric R0 value
calculate_R0 <- function(data) {
# Pull initial parameters from data
trans_p <- data$trans_p[1]
ca_a <- data$ca_a[1]
ca_b <- data$ca_b[1]
ca_c <- data$ca_c[1]
cb_a <- data$cb_a[1]
cb_b <- data$cb_b[1]
cb_c <- data$cb_c[1]
cc_a <- data$cc_a[1]
cc_b <- data$cc_b[1]
cc_c <- data$cc_c[1]
# Fractions
frac_a <- data$frac_a[1]
frac_b <- data$frac_b[1]
frac_c <- 1 - frac_a - frac_b
# Protection parameters
kappa <- data$kappa[1]
# Get the proportion of each party protected
prop_protected_a <- data$rep_prop_protected[1]
prop_protected_b <- data$dem_prop_protected[1]
prop_protected_c <- data$ind_prop_protected[1]
# Effective transmissibility for each group based on protection status
eff_trans_a <- trans_p * ((1 - prop_protected_a) + kappa * prop_protected_a)
eff_trans_b <- trans_p * ((1 - prop_protected_b) + kappa * prop_protected_b)
eff_trans_c <- trans_p * ((1 - prop_protected_c) + kappa * prop_protected_c)
# Receiving infection (rows of NGM)
eff_susc_a <- ((1 - prop_protected_a) + kappa * prop_protected_a)
eff_susc_b <- ((1 - prop_protected_b) + kappa * prop_protected_b)
eff_susc_c <- ((1 - prop_protected_c) + kappa * prop_protected_c)
# Infectious period (inverse of recovery rate)
infectious_period <- 1 / data$rho[1]
# Next-generation matrix
NGM <- matrix(0, nrow = 3, ncol = 3)
# Fill the NGM with transmission rates between groups
NGM[1, 1] <- eff_trans_a * ca_a * infectious_period * eff_susc_a
NGM[1, 2] <- eff_trans_b * ca_b * infectious_period * eff_susc_a
NGM[1, 3] <- eff_trans_c * ca_c * infectious_period * eff_susc_a
NGM[2, 1] <- eff_trans_a * cb_a * infectious_period * eff_susc_b
NGM[2, 2] <- eff_trans_b * cb_b * infectious_period * eff_susc_b
NGM[2, 3] <- eff_trans_c * cb_c * infectious_period * eff_susc_b
NGM[3, 1] <- eff_trans_a * cc_a * infectious_period * eff_susc_c
NGM[3, 2] <- eff_trans_b * cc_b * infectious_period * eff_susc_c
NGM[3, 3] <- eff_trans_c * cc_c * infectious_period * eff_susc_c
# R0 as the dominant eigenvalue of the NGM
R0 <- max(abs(eigen(NGM)$values))
return(R0)
}
#' Calculate epidemic peak and timing metrics
#'
#' @param sim_data Processed simulation data
#' @return List with peak and timing metrics
calculate_peak_metrics <- function(sim_data) {
# Support both naming conventions
if ("total_prevalence" %in% names(sim_data)) {
peak_prevalence <- max(sim_data$total_prevalence, na.rm = TRUE)
} else {
peak_prevalence <- max(sim_data$total_infected, na.rm = TRUE)
}
if ("total_prop_prevalence" %in% names(sim_data)) {
peak_prop_prevalence <- max(sim_data$total_prop_prevalence, na.rm = TRUE)
crossing_time <- sim_data %>%
filter(total_prop_prevalence >= peak_prop_prevalence) %>%
slice(1) %>%
pull(time)
} else {
peak_prop_prevalence <- max(sim_data$total_prop_infected, na.rm = TRUE)
crossing_time <- sim_data %>%
filter(total_prop_infected >= peak_prop_prevalence) %>%
slice(1) %>%
pull(time)
}
inflection_point <- sim_data %>%
filter(total_new_deaths == max(total_new_deaths, na.rm = TRUE)) %>%
pull(time)
list(
peak_prevalence = peak_prevalence,
peak_prop_prevalence = peak_prop_prevalence,
# Legacy names for backward compatibility
peak_infected = peak_prevalence,
peak_proportion = peak_prop_prevalence,
crossing_time = crossing_time,
inflection_point = inflection_point
)
}
#' Calculate outbreak size and AUC metrics
#'
#' @param sim_data Processed simulation data
#' @return List with outbreak size metrics
calculate_outbreak_metrics <- function(sim_data) {
# Use total_incidence if available, then total_prevalence, then total_infected (legacy)
if ("total_incidence" %in% names(sim_data)) {
outbreak_size <- auc(x = sim_data$time, y = sim_data$total_incidence)
} else if ("total_prevalence" %in% names(sim_data)) {
outbreak_size <- auc(x = sim_data$time, y = sim_data$total_prevalence)
} else {
outbreak_size <- auc(x = sim_data$time, y = sim_data$total_infected)
}
# Support both naming conventions for proportion prevalence
if ("total_prop_prevalence" %in% names(sim_data)) {
auc_value <- auc(x = sim_data$time, y = sim_data$total_prop_prevalence)
} else {
auc_value <- auc(x = sim_data$time, y = sim_data$total_prop_infected)
}
auc_value <- round(auc_value, digits = 2)
# Calculate cumulative proportion ever infected (recovered + dead) / initial pop
last_row <- sim_data %>% slice(n())
first_row <- sim_data %>% slice(1)
initial_pop <- first_row$total_pop
# Total ever infected = recovered + dead (those who are no longer susceptible or currently infected)
total_recovered <- with(last_row, RUa + RPa + RUb + RPb + RUc + RPc)
total_dead <- with(last_row, DUa + DPa + DUb + DPb + DUc + DPc)
cumulative_infected_prop <- (total_recovered + total_dead) / initial_pop
cumulative_infected_prop <- round(cumulative_infected_prop, digits = 4)
list(
outbreak_size = outbreak_size,
auc = auc_value,
cumulative_proportion_infected = cumulative_infected_prop
)
}
#' Calculate population shrinkage metrics
#'
#' @param sim_data Processed simulation data
#' @return List with population shrinkage percentages
calculate_population_shrinkage <- function(sim_data) {
last_row <- sim_data %>% slice(n())
first_row <- sim_data %>% slice(1)
# Support both naming conventions (rep_pop vs pop_a)
if ("rep_pop" %in% names(sim_data)) {
final_rep_pop <- last_row %>% pull(rep_pop)
final_dem_pop <- last_row %>% pull(dem_pop)
final_ind_pop <- last_row %>% pull(ind_pop)
init_rep_pop <- first_row %>% pull(rep_pop)
init_dem_pop <- first_row %>% pull(dem_pop)
init_ind_pop <- first_row %>% pull(ind_pop)
} else {
final_rep_pop <- last_row %>% pull(pop_a)
final_dem_pop <- last_row %>% pull(pop_b)
final_ind_pop <- last_row %>% pull(pop_c)
init_rep_pop <- first_row %>% pull(pop_a)
init_dem_pop <- first_row %>% pull(pop_b)
init_ind_pop <- first_row %>% pull(pop_c)
}
final_tot_pop <- last_row %>% pull(total_pop)
init_tot_pop <- first_row %>% pull(total_pop)
list(
rep_shrink = round(((init_rep_pop - final_rep_pop) / init_rep_pop) * 100, 2),
dem_shrink = round(((init_dem_pop - final_dem_pop) / init_dem_pop) * 100, 2),
ind_shrink = round(((init_ind_pop - final_ind_pop) / init_ind_pop) * 100, 2),
tot_shrink = round(((init_tot_pop - final_tot_pop) / init_tot_pop) * 100, 2)
)
}
#' Calculate partisan death comparison metrics
#'
#' @param sim_data Processed simulation data
#' @return List with death comparison metrics
calculate_death_metrics <- function(sim_data) {
last_row <- sim_data %>% slice_tail(n = 1)
# Support both naming conventions (rep_dead vs dead_reps)
if ("rep_dead" %in% names(sim_data)) {
rep_deaths <- last_row %>% pull(rep_dead)
dem_deaths <- last_row %>% pull(dem_dead)
ind_deaths <- last_row %>% pull(ind_dead)
} else {
rep_deaths <- last_row %>% pull(dead_reps)
dem_deaths <- last_row %>% pull(dead_dems)
ind_deaths <- last_row %>% pull(dead_inds)
}
diff_deaths <- ((rep_deaths - dem_deaths) / dem_deaths) * 100
diff_deaths <- round(diff_deaths, digits = 2)
list(
rep_dead = rep_deaths,
dem_dead = dem_deaths,
ind_dead = ind_deaths,
# Legacy names for backward compatibility
rep_deaths = rep_deaths,
dem_deaths = dem_deaths,
ind_deaths = ind_deaths,
diff_deaths = diff_deaths
)
}
#' Calculate partisan-specific outbreak sizes
#'
#' @param sim_data Processed simulation data with incidence
#' @return List with partisan outbreak sizes and comparisons
calculate_partisan_outbreak_sizes <- function(sim_data) {
# Support both naming conventions (rep_incidence and rep_new_cases)
rep_inc_col <- if ("rep_incidence" %in% names(sim_data)) {
sim_data$rep_incidence
} else {
sim_data$rep_new_cases
}
dem_inc_col <- if ("dem_incidence" %in% names(sim_data)) {
sim_data$dem_incidence
} else {
sim_data$dem_new_cases
}
ind_inc_col <- if ("ind_incidence" %in% names(sim_data)) {
sim_data$ind_incidence
} else {
sim_data$ind_new_cases
}
rep_outbreak <- auc(x = sim_data$time, y = rep_inc_col)
dem_outbreak <- auc(x = sim_data$time, y = dem_inc_col)
ind_outbreak <- auc(x = sim_data$time, y = ind_inc_col)
rep_vs_dem <- round(((rep_outbreak - dem_outbreak) / dem_outbreak) * 100, 2)
list(
rep_outbreak = rep_outbreak,
dem_outbreak = dem_outbreak,
ind_outbreak = ind_outbreak,
rep_vs_dem_percentage = rep_vs_dem
)
}
#' Calculate comparison metrics between current and null models
#'
#' @param current_metrics List of metrics for current model
#' @param null_metrics List of metrics for null model
#' @return List with comparison metrics
calculate_comparison_metrics <- function(current_metrics, null_metrics) {
outbreak_increase <- ((current_metrics$outbreak_size - null_metrics$outbreak_size) /
null_metrics$outbreak_size) * 100
outbreak_increase <- round(outbreak_increase, 2)
# Support both naming conventions (peak_prop_prevalence and peak_proportion)
current_peak <- if (!is.null(current_metrics$peak_prop_prevalence)) {
current_metrics$peak_prop_prevalence
} else {
current_metrics$peak_proportion
}
null_peak <- if (!is.null(null_metrics$peak_prop_prevalence)) {
null_metrics$peak_prop_prevalence
} else {
null_metrics$peak_proportion
}
peak_increase <- ((current_peak - null_peak) / null_peak) * 100
peak_increase <- round(peak_increase, 2)
crossing_time_diff <- null_metrics$crossing_time - current_metrics$crossing_time
peak_diff <- current_peak - null_peak
list(
outbreak_size_increase = outbreak_increase,
peak_increase = peak_increase,
crossing_time_diff = crossing_time_diff,
peak_diff = peak_diff
)
}
#' Run full data processing pipeline
#' Processes raw simulation output through all stages
#'
#' @param sim_output Raw simulation output from sir_three_group_pu()
#' @param current_params List of current parameters
#' @param null_model Optional processed null model for comparisons
#' @param dem_model Optional processed dem model for comparisons
#' @return List with processed simulation data and all metrics
run_full_processing <- function(sim_output, current_params, null_model = NULL, dem_model = NULL) {
# Step 1: Add basic epidemic metrics
sim_data <- add_epidemic_metrics(sim_output)
# Step 2: Add parameters to dataset
sim_data <- add_parameters_to_dataset(sim_data, current_params)
# Step 3: Generate contact matrices
contact_result <- generate_contact_matrices(
sim_data,
list(beta_a = current_params$beta_a, beta_b = current_params$beta_b)
)
sim_data <- contact_result$modified_df
# Step 4: Add additional epidemic columns (force of infection, incidence, etc.)
sim_data <- add_additional_epidemic_columns(sim_data)
# Step 5: Add comparison columns if null/dem models provided
if (!is.null(null_model)) {
sim_data <- sim_data %>%
mutate(
null_total_prop_prevalence = null_model$total_prop_prevalence,
null_total_prop_dead = null_model$total_prop_dead,
null_total_pop = null_model$total_pop,
null_total_incidence = null_model$total_incidence
)
}
if (!is.null(dem_model)) {
sim_data <- sim_data %>%
mutate(
dem_total_prop_prevalence = dem_model$total_prop_prevalence,
dem_total_prop_dead = dem_model$total_prop_dead,
dem_total_pop = dem_model$total_pop,
dem_total_incidence = dem_model$total_incidence
)
}
# Calculate all metrics
peak_metrics <- calculate_peak_metrics(sim_data)
outbreak_metrics <- calculate_outbreak_metrics(sim_data)
pop_shrinkage <- calculate_population_shrinkage(sim_data)
death_metrics <- calculate_death_metrics(sim_data)
partisan_outbreaks <- calculate_partisan_outbreak_sizes(sim_data)
R0 <- calculate_R0(sim_data)
list(
sim_data = sim_data,
contact_matrices = contact_result$ac_mats,
peak_metrics = peak_metrics,
outbreak_metrics = outbreak_metrics,
pop_shrinkage = pop_shrinkage,
death_metrics = death_metrics,
partisan_outbreaks = partisan_outbreaks,
R0 = R0
)
}
# ============================================================================
# Legacy function names for backward compatibility with existing Shiny code
# These wrap the new functions but maintain the old interface
# ============================================================================
#' Process simulation output (legacy wrapper)
#' @param sim_output Raw simulation output
#' @return Data frame with epidemic metrics
process_simulation_output <- function(sim_output) {
add_epidemic_metrics(sim_output)
}
#' Process main simulation with comparison models (legacy wrapper)
#' @param sim_output Raw simulation output
#' @param null_model Processed null model
#' @param dem_model Processed dem model
#' @return Data frame with all metrics
process_main_simulation <- function(sim_output, null_model, dem_model) {
sim_data <- add_epidemic_metrics(sim_output)
# Add comparison columns
sim_data %>%
mutate(
# Legacy column names for compatibility
total_infected = total_prevalence,
rep_infected = rep_prevalence,
dem_infected = dem_prevalence,
ind_infected = ind_prevalence,
cumulative_infected = cumsum(total_prevalence),
cumulative_rep_infected = cumsum(rep_prevalence),
cumulative_dem_infected = cumsum(dem_prevalence),
cumulative_ind_infected = cumsum(ind_prevalence),
total_new_cases = total_prevalence - lag(total_prevalence, default = 0),
pop_a = rep_pop,
pop_b = dem_pop,
pop_c = ind_pop,
total_prop_infected = total_prop_prevalence,
rep_prop_infected = rep_prop_prevalence,
dem_prop_infected = dem_prop_prevalence,
ind_prop_infected = ind_prop_prevalence,
cumulative_prop_infected = cumsum(total_prop_prevalence) / n(),
cumulative_rep_prop_infected = cumsum(rep_prop_prevalence),
cumulative_dem_prop_infected = cumsum(dem_prop_prevalence),
cumulative_ind_prop_infected = cumsum(ind_prop_prevalence),
null_total_prop_infected = null_model$total_prop_prevalence,
dem_total_prop_infected = dem_model$total_prop_prevalence,
dead_reps = rep_dead,
dead_dems = dem_dead,
dead_inds = ind_dead,
total_prop_dead = total_dead / total_pop,
null_total_prop_dead = null_model$total_prop_dead,
dem_total_prop_dead = dem_model$total_prop_dead,
total_daily_mortality_rate = ifelse(is.na(total_pop) | total_pop == 0, NA,
total_new_deaths / total_pop
),
dem_total_pop = dem_model$total_pop,
dem_total_new_cases = dem_model$total_prevalence - lag(dem_model$total_prevalence, default = 0),
null_total_pop = null_model$total_pop,
null_total_new_cases = null_model$total_prevalence - lag(null_model$total_prevalence, default = 0),
null_total_prop_cases = (null_model$total_prevalence - lag(null_model$total_prevalence, default = 0)) / null_model$total_pop,
# Calculate null incidence rate (per 100,000)
null_total_incidence_rate = (null_model$total_prevalence - lag(null_model$total_prevalence, default = 0)) / null_model$total_pop * 100000
)
}
#' Calculate contacts over time (legacy wrapper)
#' @param sim_data Processed simulation data
#' @param beta_a Homophily parameter A
#' @param beta_b Homophily parameter B
#' @return Data frame with contact matrices
calculate_contacts_over_time <- function(sim_data, beta_a, beta_b) {
# Create contacts dataframe structure expected by legacy code
contacts_df <- data.frame(time = sim_data$time) %>%
mutate(
R_R = NA, R_D = NA, R_I = NA,
D_D = NA, D_R = NA, D_I = NA,
I_I = NA, I_R = NA, I_D = NA
)
for (r in 1:nrow(sim_data)) {
# Use rep_pop/dem_pop/ind_pop if available, otherwise fall back to pop_a/pop_b/pop_c
N_a <- if ("rep_pop" %in% names(sim_data)) sim_data$rep_pop[r] else sim_data$pop_a[r]
N_b <- if ("dem_pop" %in% names(sim_data)) sim_data$dem_pop[r] else sim_data$pop_b[r]
N_c <- if ("ind_pop" %in% names(sim_data)) sim_data$ind_pop[r] else sim_data$pop_c[r]
ac_mat <- avg_contact_matrix_3gp(
dbar_a = sim_data$ca[r],
dbar_b = sim_data$cb[r],
dbar_c = sim_data$cc[r],
N_a = N_a,
N_b = N_b,
N_c = N_c,
beta_a = beta_a,
beta_b = beta_b
)
contacts_df$R_R[r] <- ac_mat[1, 1]
contacts_df$R_D[r] <- ac_mat[1, 2]
contacts_df$R_I[r] <- ac_mat[1, 3]
contacts_df$D_R[r] <- ac_mat[2, 1]
contacts_df$D_D[r] <- ac_mat[2, 2]
contacts_df$D_I[r] <- ac_mat[2, 3]
contacts_df$I_R[r] <- ac_mat[3, 1]
contacts_df$I_D[r] <- ac_mat[3, 2]
contacts_df$I_I[r] <- ac_mat[3, 3]
}
contacts_df <- contacts_df %>%
mutate(
Total_R_Contacts = R_R + R_D + R_I,
Total_D_Contacts = D_R + D_D + D_I,
Total_I_Contacts = I_R + I_D + I_I,
N_a = if ("rep_pop" %in% names(sim_data)) sim_data$rep_pop else sim_data$pop_a,
N_b = if ("dem_pop" %in% names(sim_data)) sim_data$dem_pop else sim_data$pop_b,
N_c = if ("ind_pop" %in% names(sim_data)) sim_data$ind_pop else sim_data$pop_c
)
return(contacts_df)
}
#' Add force of infection to simulation data (legacy wrapper)
#' @param sim_data Processed simulation data
#' @param contacts_df Contact matrix data
#' @param current_params Current parameters list
#' @return Data frame with force of infection metrics
add_force_of_infection <- function(sim_data, contacts_df, current_params) {
# Use rep_pop/dem_pop/ind_pop if available
pop_a_col <- if ("rep_pop" %in% names(sim_data)) sim_data$rep_pop else sim_data$pop_a
pop_b_col <- if ("dem_pop" %in% names(sim_data)) sim_data$dem_pop else sim_data$pop_b
pop_c_col <- if ("ind_pop" %in% names(sim_data)) sim_data$ind_pop else sim_data$pop_c
sim_data %>%
mutate(
N0 = current_params$N0,
trans_p = current_params$trans_p,
kappa = current_params$kappa,
ell = current_params$ell,
rho = current_params$rho,
vstart = current_params$vstart,
gamma = current_params$gamma,
frac_a = current_params$frac_a,
frac_b = current_params$frac_b,
pi_a = current_params$pi_a,
pi_b = current_params$pi_b,
pi_c = current_params$pi_c,
phi_a = current_params$phi_a,
phi_b = current_params$phi_b,
phi_c = current_params$phi_c,
zeta_a = current_params$zeta_a,
zeta_b = current_params$zeta_b,
zeta_c = current_params$zeta_c,
mu_a = current_params$mu_a,
mu_b = current_params$mu_b,
mu_c = current_params$mu_c,
vacc_a = current_params$vacc_a,
vacc_b = current_params$vacc_b,
vacc_c = current_params$vacc_c,
I0_a = current_params$I0_a,
I0_b = current_params$I0_b,
I0_c = current_params$I0_c,
beta_a = current_params$beta_a,
beta_b = current_params$beta_b,
ca_a = contacts_df$R_R,
ca_b = contacts_df$R_D,
ca_c = contacts_df$R_I,
cb_a = contacts_df$D_R,
cb_b = contacts_df$D_D,
cb_c = contacts_df$D_I,
cc_a = contacts_df$I_R,
cc_b = contacts_df$I_D,
cc_c = contacts_df$I_I,
# Force of infection components
lambda_aa = trans_p * (ca_a * (IUa / pop_a_col + kappa * IPa / pop_a_col)),
lambda_ab = trans_p * (ca_b * (IUb / pop_b_col + kappa * IPb / pop_b_col)),
lambda_ac = trans_p * (ca_c * (IUc / pop_c_col + kappa * IPc / pop_c_col)),
lambda_a = lambda_aa + lambda_ab + lambda_ac,
lambda_ba = trans_p * (cb_a * (IUa / pop_a_col + kappa * IPa / pop_a_col)),
lambda_bb = trans_p * (cb_b * (IUb / pop_b_col + kappa * IPb / pop_b_col)),
lambda_bc = trans_p * (cb_c * (IUc / pop_c_col + kappa * IPc / pop_c_col)),
lambda_b = lambda_ba + lambda_bb + lambda_bc,
lambda_ca = trans_p * (cc_a * (IUa / pop_a_col + kappa * IPa / pop_a_col)),
lambda_cb = trans_p * (cc_b * (IUb / pop_b_col + kappa * IPb / pop_b_col)),
lambda_cc = trans_p * (cc_c * (IUc / pop_c_col + kappa * IPc / pop_c_col)),
lambda_c = lambda_ca + lambda_cb + lambda_cc,
# Proportion of infections by source
prop_rep_infections_from_reps = lambda_aa / lambda_a,
prop_rep_infections_from_dems = lambda_ab / lambda_a,
prop_rep_infections_from_inds = lambda_ac / lambda_a,
prop_dem_infections_from_reps = lambda_ba / lambda_b,
prop_dem_infections_from_dems = lambda_bb / lambda_b,
prop_dem_infections_from_inds = lambda_bc / lambda_b,
prop_ind_infections_from_reps = lambda_ca / lambda_c,
prop_ind_infections_from_dems = lambda_cb / lambda_c,
prop_ind_infections_from_inds = lambda_cc / lambda_c,
# Incidence by source for Republicans
rep_from_rep_new_cases = if_else(row_number() == 1, 1 / 3,
(SUa * lambda_aa) + (SPa * lambda_aa * kappa)
),
rep_from_dem_new_cases = if_else(row_number() == 1, 1 / 3,
(SUa * lambda_ab) + (SPa * lambda_ab * kappa)
),
rep_from_ind_new_cases = if_else(row_number() == 1, 1 / 3,
(SUa * lambda_ac) + (SPa * lambda_ac * kappa)
),
rep_new_cases = rep_from_rep_new_cases + rep_from_dem_new_cases + rep_from_ind_new_cases,
# Incidence by source for Democrats
dem_from_rep_new_cases = if_else(row_number() == 1, 1 / 3,
(SUb * lambda_ba) + (SPb * lambda_ba * kappa)
),
dem_from_dem_new_cases = if_else(row_number() == 1, 1 / 3,
(SUb * lambda_bb) + (SPb * lambda_bb * kappa)
),
dem_from_ind_new_cases = if_else(row_number() == 1, 1 / 3,
(SUb * lambda_bc) + (SPb * lambda_bc * kappa)
),
dem_new_cases = dem_from_rep_new_cases + dem_from_dem_new_cases + dem_from_ind_new_cases,
# Incidence by source for Independents
ind_from_rep_new_cases = if_else(row_number() == 1, 1 / 3,
(SUc * lambda_ca) + (SPc * lambda_ca * kappa)
),
ind_from_dem_new_cases = if_else(row_number() == 1, 1 / 3,
(SUc * lambda_cb) + (SPc * lambda_cb * kappa)
),
ind_from_ind_new_cases = if_else(row_number() == 1, 1 / 3,
(SUc * lambda_cc) + (SPc * lambda_cc * kappa)
),
ind_new_cases = ind_from_rep_new_cases + ind_from_dem_new_cases + ind_from_ind_new_cases,
total_new_cases = rep_new_cases + dem_new_cases + ind_new_cases,
# Incidence rates (per 100,000)
rep_incidence_rate = (rep_new_cases / pop_a_col) * 100000,
dem_incidence_rate = (dem_new_cases / pop_b_col) * 100000,
ind_incidence_rate = (ind_new_cases / pop_c_col) * 100000,
total_incidence_rate = (total_new_cases / total_pop) * 100000,
# Protection proportions
rep_prop_protected = (SPa + IPa + RPa) / pop_a_col,
dem_prop_protected = (SPb + IPb + RPb) / pop_b_col,
ind_prop_protected = (SPc + IPc + RPc) / pop_c_col
)
}
#' Extract All Model Metrics
#'
#' Convenience function to extract all relevant metrics from null and cautious models.
#'
#' @param null_model Processed null model output
#' @param cautious_model Processed cautious model output
#' @return List containing all extracted metrics
extract_reference_model_metrics <- function(null_model, cautious_model) {
# Calculate null model metrics
null_peak <- calculate_peak_metrics(null_model)
null_outbreak <- calculate_outbreak_metrics(null_model)
# Calculate cautious model metrics
cautious_peak <- calculate_peak_metrics(cautious_model)
list(
# Null (baseline) model metrics
null = list(
inflection_point = null_peak$inflection_point,
outbreak_size = null_outbreak$outbreak_size,
auc = null_outbreak$auc,
peak_infected = null_peak$peak_infected,
peak_proportion = null_peak$peak_proportion,
crossing_time = null_peak$crossing_time
),
# Cautious model metrics
cautious = list(
peak_proportion = cautious_peak$peak_proportion,
crossing_time = cautious_peak$crossing_time
)
)
}