Spaces:

bics-berkeley
/

partisan-disease-simulator

Sleeping

File size: 32,996 Bytes

#' Add epidemic metrics to simulation output
#' Matches variable naming from disease_model_data_prep_helper.R
#'
#' @param data Raw simulation output from sir_three_group_pu()
#' @return Data frame with added epidemic metrics
add_epidemic_metrics <- function(data) {
  data %>%
    mutate(
      # Core calculations - prevalence (current infections)
      total_prevalence = (IUa + IPa) + (IUb + IPb) + (IUc + IPc),
      rep_prevalence = IUa + IPa,
      dem_prevalence = IUb + IPb,
      ind_prevalence = IUc + IPc,

      # Susceptible
      rep_susceptible = SUa + SPa,
      dem_susceptible = SUb + SPb,
      ind_susceptible = SUc + SPc,

      # Recovered
      rep_recovered = RUa + RPa,
      dem_recovered = RUb + RPb,
      ind_recovered = RUc + RPc,

      # Population denominators
      rep_pop = SUa + IUa + RUa + SPa + IPa + RPa,
      dem_pop = SUb + IUb + RUb + SPb + IPb + RPb,
      ind_pop = SUc + IUc + RUc + SPc + IPc + RPc,
      total_pop = rep_pop + dem_pop + ind_pop,

      # Susceptible proportions
      rep_prop_susceptible = (SUa + SPa) / rep_pop,
      dem_prop_susceptible = (SUb + SPb) / dem_pop,
      ind_prop_susceptible = (SUc + SPc) / ind_pop,

      # Prevalence proportions
      rep_prop_prevalence = (IUa + IPa) / rep_pop,
      dem_prop_prevalence = (IUb + IPb) / dem_pop,
      ind_prop_prevalence = (IUc + IPc) / ind_pop,
      total_prop_prevalence = (IUa + IPa + IUb + IPb + IUc + IPc) / total_pop,

      # Recovered proportions
      rep_prop_recovered = (RUa + RPa) / rep_pop,
      dem_prop_recovered = (RUb + RPb) / dem_pop,
      ind_prop_recovered = (RUc + RPc) / ind_pop,

      # Mortality
      total_dead = DUa + DPa + DUb + DPb + DUc + DPc,
      rep_dead = DUa + DPa,
      dem_dead = DUb + DPb,
      ind_dead = DUc + DPc,
      total_new_deaths = total_dead - lag(total_dead, default = 0),

      # Proportion dead (using current population as denominator)
      total_prop_dead = total_dead / total_pop,
      rep_prop_dead = rep_dead / rep_pop,
      dem_prop_dead = dem_dead / dem_pop,
      ind_prop_dead = ind_dead / ind_pop
    )
}

#' Add parameters to dataset as columns
#' Matches function from disease_model_data_prep_helper.R
#'
#' @param data Simulation data frame
#' @param parameters_list Named list of parameters
#' @return Data frame with parameter columns added
add_parameters_to_dataset <- function(data, parameters_list) {
  for (param_name in names(parameters_list)) {
    col_name <- if (param_name == "time") "max_time" else param_name
    data[[col_name]] <- parameters_list[[param_name]]
  }
  return(data)
}

#' Generate contact matrices for each time step
#' Matches function from disease_model_data_prep_helper.R
#'
#' @param df Simulation data frame with epidemic metrics
#' @param params List with beta_a and beta_b parameters
#' @return List with modified_df and ac_mats
generate_contact_matrices <- function(df, params = list()) {
  # Add empty contact columns directly to input dataframe
  contact_cols <- c("R_R", "R_D", "R_I", "D_R", "D_D", "D_I", "I_R", "I_D", "I_I")
  df[contact_cols] <- NA

  # Initialize list to store matrices
  ac_mats <- list()

  # Calculate contact matrices and populate columns directly in df
  for (r in 1:nrow(df)) {
    ac_mat <- avg_contact_matrix_3gp(
      dbar_a = df$ca[r],
      dbar_b = df$cb[r],
      dbar_c = df$cc[r],
      N_a = df$rep_pop[r],
      N_b = df$dem_pop[r],
      N_c = df$ind_pop[r],
      beta_a = params$beta_a,
      beta_b = params$beta_b
    )

    # Store matrix and update df directly
    ac_mats[[paste0("step_", r)]] <- ac_mat
    df[r, c("R_R", "R_D", "R_I")] <- ac_mat[1, ]
    df[r, c("D_R", "D_D", "D_I")] <- ac_mat[2, ]
    df[r, c("I_R", "I_D", "I_I")] <- ac_mat[3, ]
  }

  # Add summary calculations to original df
  df <- df %>%
    mutate(
      total_ca = R_R + R_D + R_I,
      ca_a = R_R,
      ca_b = R_D,
      ca_c = R_I,
      total_cb = D_R + D_D + D_I,
      cb_a = D_R,
      cb_b = D_D,
      cb_c = D_I,
      total_cc = I_R + I_D + I_I,
      cc_a = I_R,
      cc_b = I_D,
      cc_c = I_I
    )

  return(list(modified_df = df, ac_mats = ac_mats))
}

#' Add additional epidemic columns including force of infection and incidence
#' Matches function from disease_model_data_prep_helper.R
#'
#' @param data Simulation data frame with contact matrices
#' @return Data frame with additional columns
add_additional_epidemic_columns <- function(data) {
  data %>%
    mutate(
      # Force of infection components
      lambda_aa = trans_p * (ca_a * (IUa / rep_pop + kappa * IPa / rep_pop)),
      lambda_ab = trans_p * (ca_b * (IUb / dem_pop + kappa * IPb / dem_pop)),
      lambda_ac = trans_p * (ca_c * (IUc / ind_pop + kappa * IPc / ind_pop)),
      lambda_a = lambda_aa + lambda_ab + lambda_ac,
      lambda_ba = trans_p * (cb_a * (IUa / rep_pop + kappa * IPa / rep_pop)),
      lambda_bb = trans_p * (cb_b * (IUb / dem_pop + kappa * IPb / dem_pop)),
      lambda_bc = trans_p * (cb_c * (IUc / ind_pop + kappa * IPc / ind_pop)),
      lambda_b = lambda_ba + lambda_bb + lambda_bc,
      lambda_ca = trans_p * (cc_a * (IUa / rep_pop + kappa * IPa / rep_pop)),
      lambda_cb = trans_p * (cc_b * (IUb / dem_pop + kappa * IPb / dem_pop)),
      lambda_cc = trans_p * (cc_c * (IUc / ind_pop + kappa * IPc / ind_pop)),
      lambda_c = lambda_ca + lambda_cb + lambda_cc,
      lambda = lambda_a + lambda_b + lambda_c,

      # Source of infections - proportions
      prop_rep_infections_from_reps = lambda_aa / lambda_a,
      prop_rep_infections_from_dems = lambda_ab / lambda_a,
      prop_rep_infections_from_inds = lambda_ac / lambda_a,
      prop_dem_infections_from_reps = lambda_ba / lambda_b,
      prop_dem_infections_from_dems = lambda_bb / lambda_b,
      prop_dem_infections_from_inds = lambda_bc / lambda_b,
      prop_ind_infections_from_reps = lambda_ca / lambda_c,
      prop_ind_infections_from_dems = lambda_cb / lambda_c,
      prop_ind_infections_from_inds = lambda_cc / lambda_c,

      # Incidence by protection status
      rep_unprotected_incidence = if_else(row_number() == 1, I0_a[1],
        (SUa * lambda_a)
      ),
      rep_protected_incidence = if_else(row_number() == 1, 0,
        (SPa * lambda_a * kappa)
      ),
      dem_unprotected_incidence = if_else(row_number() == 1, I0_b[1],
        (SUb * lambda_b)
      ),
      dem_protected_incidence = if_else(row_number() == 1, 0,
        (SPb * lambda_b * kappa)
      ),
      ind_unprotected_incidence = if_else(row_number() == 1, I0_c[1],
        (SUc * lambda_c)
      ),
      ind_protected_incidence = if_else(row_number() == 1, 0,
        (SPc * lambda_c * kappa)
      ),

      # Total incidence
      rep_incidence = if_else(row_number() == 1, I0_a[1],
        (SUa * lambda_a) + (SPa * lambda_a * kappa)
      ),
      rep_incidence_rate = (rep_incidence / rep_pop) * 100000,
      dem_incidence = if_else(row_number() == 1, I0_b[1],
        (SUb * lambda_b) + (SPb * lambda_b * kappa)
      ),
      dem_incidence_rate = (dem_incidence / dem_pop) * 100000,
      ind_incidence = if_else(row_number() == 1, I0_c[1],
        (SUc * lambda_c) + (SPc * lambda_c * kappa)
      ),
      ind_incidence_rate = (ind_incidence / ind_pop) * 100000,
      unprotected_incidence = rep_unprotected_incidence + dem_unprotected_incidence + ind_unprotected_incidence,
      protected_incidence = rep_protected_incidence + dem_protected_incidence + ind_protected_incidence,
      total_incidence = unprotected_incidence + protected_incidence,
      total_incidence_rate = (total_incidence / total_pop) * 100000,

      # Incidence as proportions
      rep_prop_infected = rep_incidence / rep_pop,
      dem_prop_infected = dem_incidence / dem_pop,
      ind_prop_infected = ind_incidence / ind_pop,

      # Deaths as proportions (using initial population)
      rep_prop_dead = rep_dead / (N0[1] * frac_a[1]),
      dem_prop_dead = dem_dead / (N0[1] * frac_b[1]),
      ind_prop_dead = ind_dead / (N0[1] * (1 - (frac_a[1] + frac_b[1]))),
      total_prop_dead = total_dead / (N0[1]),

      # Protection proportions
      rep_prop_protected = (SPa + IPa + RPa) / rep_pop,
      dem_prop_protected = (SPb + IPb + RPb) / dem_pop,
      ind_prop_protected = (SPc + IPc + RPc) / ind_pop,

      # Cases from disease model
      new_cases_a = Casesa - lag(Casesa, default = 1),
      new_cases_b = Casesb - lag(Casesb, default = 1),
      new_cases_c = Casesc - lag(Casesc, default = 1),

      # Cumulative cases
      rep_cumulative_cases = Casesa,
      dem_cumulative_cases = Casesb,
      ind_cumulative_cases = Casesc,

      # Total contacts in population
      total_pop_contacts = ((ca * rep_pop) + (cb * dem_pop) + (cc * ind_pop)) / total_pop
    )
}

#' Calculate R0 from simulation data
#' Matches function from disease_model_data_prep_helper.R
#'
#' @param data Simulation data frame with all metrics
#' @return Numeric R0 value
calculate_R0 <- function(data) {
  # Pull initial parameters from data

  trans_p <- data$trans_p[1]
  ca_a <- data$ca_a[1]
  ca_b <- data$ca_b[1]
  ca_c <- data$ca_c[1]
  cb_a <- data$cb_a[1]
  cb_b <- data$cb_b[1]
  cb_c <- data$cb_c[1]
  cc_a <- data$cc_a[1]
  cc_b <- data$cc_b[1]
  cc_c <- data$cc_c[1]

  # Fractions
  frac_a <- data$frac_a[1]
  frac_b <- data$frac_b[1]
  frac_c <- 1 - frac_a - frac_b

  # Protection parameters
  kappa <- data$kappa[1]

  # Get the proportion of each party protected
  prop_protected_a <- data$rep_prop_protected[1]
  prop_protected_b <- data$dem_prop_protected[1]
  prop_protected_c <- data$ind_prop_protected[1]

  # Effective transmissibility for each group based on protection status
  eff_trans_a <- trans_p * ((1 - prop_protected_a) + kappa * prop_protected_a)
  eff_trans_b <- trans_p * ((1 - prop_protected_b) + kappa * prop_protected_b)
  eff_trans_c <- trans_p * ((1 - prop_protected_c) + kappa * prop_protected_c)

  # Receiving infection (rows of NGM)
  eff_susc_a <- ((1 - prop_protected_a) + kappa * prop_protected_a)
  eff_susc_b <- ((1 - prop_protected_b) + kappa * prop_protected_b)
  eff_susc_c <- ((1 - prop_protected_c) + kappa * prop_protected_c)

  # Infectious period (inverse of recovery rate)
  infectious_period <- 1 / data$rho[1]

  # Next-generation matrix
  NGM <- matrix(0, nrow = 3, ncol = 3)

  # Fill the NGM with transmission rates between groups
  NGM[1, 1] <- eff_trans_a * ca_a * infectious_period * eff_susc_a
  NGM[1, 2] <- eff_trans_b * ca_b * infectious_period * eff_susc_a
  NGM[1, 3] <- eff_trans_c * ca_c * infectious_period * eff_susc_a

  NGM[2, 1] <- eff_trans_a * cb_a * infectious_period * eff_susc_b
  NGM[2, 2] <- eff_trans_b * cb_b * infectious_period * eff_susc_b
  NGM[2, 3] <- eff_trans_c * cb_c * infectious_period * eff_susc_b

  NGM[3, 1] <- eff_trans_a * cc_a * infectious_period * eff_susc_c
  NGM[3, 2] <- eff_trans_b * cc_b * infectious_period * eff_susc_c
  NGM[3, 3] <- eff_trans_c * cc_c * infectious_period * eff_susc_c

  # R0 as the dominant eigenvalue of the NGM
  R0 <- max(abs(eigen(NGM)$values))

  return(R0)
}

#' Calculate epidemic peak and timing metrics
#'
#' @param sim_data Processed simulation data
#' @return List with peak and timing metrics
calculate_peak_metrics <- function(sim_data) {
  # Support both naming conventions
  if ("total_prevalence" %in% names(sim_data)) {
    peak_prevalence <- max(sim_data$total_prevalence, na.rm = TRUE)
  } else {
    peak_prevalence <- max(sim_data$total_infected, na.rm = TRUE)
  }

  if ("total_prop_prevalence" %in% names(sim_data)) {
    peak_prop_prevalence <- max(sim_data$total_prop_prevalence, na.rm = TRUE)
    crossing_time <- sim_data %>%
      filter(total_prop_prevalence >= peak_prop_prevalence) %>%
      slice(1) %>%
      pull(time)
  } else {
    peak_prop_prevalence <- max(sim_data$total_prop_infected, na.rm = TRUE)
    crossing_time <- sim_data %>%
      filter(total_prop_infected >= peak_prop_prevalence) %>%
      slice(1) %>%
      pull(time)
  }

  inflection_point <- sim_data %>%
    filter(total_new_deaths == max(total_new_deaths, na.rm = TRUE)) %>%
    pull(time)

  list(
    peak_prevalence = peak_prevalence,
    peak_prop_prevalence = peak_prop_prevalence,
    # Legacy names for backward compatibility
    peak_infected = peak_prevalence,
    peak_proportion = peak_prop_prevalence,
    crossing_time = crossing_time,
    inflection_point = inflection_point
  )
}

#' Calculate outbreak size and AUC metrics
#'
#' @param sim_data Processed simulation data
#' @return List with outbreak size metrics
calculate_outbreak_metrics <- function(sim_data) {
  # Use total_incidence if available, then total_prevalence, then total_infected (legacy)
  if ("total_incidence" %in% names(sim_data)) {
    outbreak_size <- auc(x = sim_data$time, y = sim_data$total_incidence)
  } else if ("total_prevalence" %in% names(sim_data)) {
    outbreak_size <- auc(x = sim_data$time, y = sim_data$total_prevalence)
  } else {
    outbreak_size <- auc(x = sim_data$time, y = sim_data$total_infected)
  }

  # Support both naming conventions for proportion prevalence
  if ("total_prop_prevalence" %in% names(sim_data)) {
    auc_value <- auc(x = sim_data$time, y = sim_data$total_prop_prevalence)
  } else {
    auc_value <- auc(x = sim_data$time, y = sim_data$total_prop_infected)
  }
  auc_value <- round(auc_value, digits = 2)

  # Calculate cumulative proportion ever infected (recovered + dead) / initial pop
  last_row <- sim_data %>% slice(n())
  first_row <- sim_data %>% slice(1)
  initial_pop <- first_row$total_pop

  # Total ever infected = recovered + dead (those who are no longer susceptible or currently infected)
  total_recovered <- with(last_row, RUa + RPa + RUb + RPb + RUc + RPc)
  total_dead <- with(last_row, DUa + DPa + DUb + DPb + DUc + DPc)
  cumulative_infected_prop <- (total_recovered + total_dead) / initial_pop
  cumulative_infected_prop <- round(cumulative_infected_prop, digits = 4)

  list(
    outbreak_size = outbreak_size,
    auc = auc_value,
    cumulative_proportion_infected = cumulative_infected_prop
  )
}

#' Calculate population shrinkage metrics
#'
#' @param sim_data Processed simulation data
#' @return List with population shrinkage percentages
calculate_population_shrinkage <- function(sim_data) {
  last_row <- sim_data %>% slice(n())
  first_row <- sim_data %>% slice(1)

  # Support both naming conventions (rep_pop vs pop_a)
  if ("rep_pop" %in% names(sim_data)) {
    final_rep_pop <- last_row %>% pull(rep_pop)
    final_dem_pop <- last_row %>% pull(dem_pop)
    final_ind_pop <- last_row %>% pull(ind_pop)
    init_rep_pop <- first_row %>% pull(rep_pop)
    init_dem_pop <- first_row %>% pull(dem_pop)
    init_ind_pop <- first_row %>% pull(ind_pop)
  } else {
    final_rep_pop <- last_row %>% pull(pop_a)
    final_dem_pop <- last_row %>% pull(pop_b)
    final_ind_pop <- last_row %>% pull(pop_c)
    init_rep_pop <- first_row %>% pull(pop_a)
    init_dem_pop <- first_row %>% pull(pop_b)
    init_ind_pop <- first_row %>% pull(pop_c)
  }
  final_tot_pop <- last_row %>% pull(total_pop)
  init_tot_pop <- first_row %>% pull(total_pop)

  list(
    rep_shrink = round(((init_rep_pop - final_rep_pop) / init_rep_pop) * 100, 2),
    dem_shrink = round(((init_dem_pop - final_dem_pop) / init_dem_pop) * 100, 2),
    ind_shrink = round(((init_ind_pop - final_ind_pop) / init_ind_pop) * 100, 2),
    tot_shrink = round(((init_tot_pop - final_tot_pop) / init_tot_pop) * 100, 2)
  )
}

#' Calculate partisan death comparison metrics
#'
#' @param sim_data Processed simulation data
#' @return List with death comparison metrics
calculate_death_metrics <- function(sim_data) {
  last_row <- sim_data %>% slice_tail(n = 1)

  # Support both naming conventions (rep_dead vs dead_reps)
  if ("rep_dead" %in% names(sim_data)) {
    rep_deaths <- last_row %>% pull(rep_dead)
    dem_deaths <- last_row %>% pull(dem_dead)
    ind_deaths <- last_row %>% pull(ind_dead)
  } else {
    rep_deaths <- last_row %>% pull(dead_reps)
    dem_deaths <- last_row %>% pull(dead_dems)
    ind_deaths <- last_row %>% pull(dead_inds)
  }

  diff_deaths <- ((rep_deaths - dem_deaths) / dem_deaths) * 100
  diff_deaths <- round(diff_deaths, digits = 2)

  list(
    rep_dead = rep_deaths,
    dem_dead = dem_deaths,
    ind_dead = ind_deaths,
    # Legacy names for backward compatibility
    rep_deaths = rep_deaths,
    dem_deaths = dem_deaths,
    ind_deaths = ind_deaths,
    diff_deaths = diff_deaths
  )
}

#' Calculate partisan-specific outbreak sizes
#'
#' @param sim_data Processed simulation data with incidence
#' @return List with partisan outbreak sizes and comparisons
calculate_partisan_outbreak_sizes <- function(sim_data) {
  # Support both naming conventions (rep_incidence and rep_new_cases)
  rep_inc_col <- if ("rep_incidence" %in% names(sim_data)) {
    sim_data$rep_incidence
  } else {
    sim_data$rep_new_cases
  }
  dem_inc_col <- if ("dem_incidence" %in% names(sim_data)) {
    sim_data$dem_incidence
  } else {
    sim_data$dem_new_cases
  }
  ind_inc_col <- if ("ind_incidence" %in% names(sim_data)) {
    sim_data$ind_incidence
  } else {
    sim_data$ind_new_cases
  }

  rep_outbreak <- auc(x = sim_data$time, y = rep_inc_col)
  dem_outbreak <- auc(x = sim_data$time, y = dem_inc_col)
  ind_outbreak <- auc(x = sim_data$time, y = ind_inc_col)

  rep_vs_dem <- round(((rep_outbreak - dem_outbreak) / dem_outbreak) * 100, 2)

  list(
    rep_outbreak = rep_outbreak,
    dem_outbreak = dem_outbreak,
    ind_outbreak = ind_outbreak,
    rep_vs_dem_percentage = rep_vs_dem
  )
}

#' Calculate comparison metrics between current and null models
#'
#' @param current_metrics List of metrics for current model
#' @param null_metrics List of metrics for null model
#' @return List with comparison metrics
calculate_comparison_metrics <- function(current_metrics, null_metrics) {
  outbreak_increase <- ((current_metrics$outbreak_size - null_metrics$outbreak_size) /
    null_metrics$outbreak_size) * 100
  outbreak_increase <- round(outbreak_increase, 2)

  # Support both naming conventions (peak_prop_prevalence and peak_proportion)
  current_peak <- if (!is.null(current_metrics$peak_prop_prevalence)) {
    current_metrics$peak_prop_prevalence
  } else {
    current_metrics$peak_proportion
  }

  null_peak <- if (!is.null(null_metrics$peak_prop_prevalence)) {
    null_metrics$peak_prop_prevalence
  } else {
    null_metrics$peak_proportion
  }

  peak_increase <- ((current_peak - null_peak) / null_peak) * 100
  peak_increase <- round(peak_increase, 2)

  crossing_time_diff <- null_metrics$crossing_time - current_metrics$crossing_time
  peak_diff <- current_peak - null_peak

  list(
    outbreak_size_increase = outbreak_increase,
    peak_increase = peak_increase,
    crossing_time_diff = crossing_time_diff,
    peak_diff = peak_diff
  )
}

#' Run full data processing pipeline
#' Processes raw simulation output through all stages
#'
#' @param sim_output Raw simulation output from sir_three_group_pu()
#' @param current_params List of current parameters
#' @param null_model Optional processed null model for comparisons
#' @param dem_model Optional processed dem model for comparisons
#' @return List with processed simulation data and all metrics
run_full_processing <- function(sim_output, current_params, null_model = NULL, dem_model = NULL) {
  # Step 1: Add basic epidemic metrics

  sim_data <- add_epidemic_metrics(sim_output)

  # Step 2: Add parameters to dataset
  sim_data <- add_parameters_to_dataset(sim_data, current_params)

  # Step 3: Generate contact matrices
  contact_result <- generate_contact_matrices(
    sim_data,
    list(beta_a = current_params$beta_a, beta_b = current_params$beta_b)
  )
  sim_data <- contact_result$modified_df

  # Step 4: Add additional epidemic columns (force of infection, incidence, etc.)
  sim_data <- add_additional_epidemic_columns(sim_data)

  # Step 5: Add comparison columns if null/dem models provided
  if (!is.null(null_model)) {
    sim_data <- sim_data %>%
      mutate(
        null_total_prop_prevalence = null_model$total_prop_prevalence,
        null_total_prop_dead = null_model$total_prop_dead,
        null_total_pop = null_model$total_pop,
        null_total_incidence = null_model$total_incidence
      )
  }

  if (!is.null(dem_model)) {
    sim_data <- sim_data %>%
      mutate(
        dem_total_prop_prevalence = dem_model$total_prop_prevalence,
        dem_total_prop_dead = dem_model$total_prop_dead,
        dem_total_pop = dem_model$total_pop,
        dem_total_incidence = dem_model$total_incidence
      )
  }

  # Calculate all metrics
  peak_metrics <- calculate_peak_metrics(sim_data)
  outbreak_metrics <- calculate_outbreak_metrics(sim_data)
  pop_shrinkage <- calculate_population_shrinkage(sim_data)
  death_metrics <- calculate_death_metrics(sim_data)
  partisan_outbreaks <- calculate_partisan_outbreak_sizes(sim_data)
  R0 <- calculate_R0(sim_data)

  list(
    sim_data = sim_data,
    contact_matrices = contact_result$ac_mats,
    peak_metrics = peak_metrics,
    outbreak_metrics = outbreak_metrics,
    pop_shrinkage = pop_shrinkage,
    death_metrics = death_metrics,
    partisan_outbreaks = partisan_outbreaks,
    R0 = R0
  )
}

# ============================================================================
# Legacy function names for backward compatibility with existing Shiny code
# These wrap the new functions but maintain the old interface
# ============================================================================

#' Process simulation output (legacy wrapper)
#' @param sim_output Raw simulation output
#' @return Data frame with epidemic metrics
process_simulation_output <- function(sim_output) {
  add_epidemic_metrics(sim_output)
}

#' Process main simulation with comparison models (legacy wrapper)
#' @param sim_output Raw simulation output
#' @param null_model Processed null model
#' @param dem_model Processed dem model
#' @return Data frame with all metrics
process_main_simulation <- function(sim_output, null_model, dem_model) {
  sim_data <- add_epidemic_metrics(sim_output)

  # Add comparison columns

  sim_data %>%
    mutate(
      # Legacy column names for compatibility
      total_infected = total_prevalence,
      rep_infected = rep_prevalence,
      dem_infected = dem_prevalence,
      ind_infected = ind_prevalence,
      cumulative_infected = cumsum(total_prevalence),
      cumulative_rep_infected = cumsum(rep_prevalence),
      cumulative_dem_infected = cumsum(dem_prevalence),
      cumulative_ind_infected = cumsum(ind_prevalence),
      total_new_cases = total_prevalence - lag(total_prevalence, default = 0),
      pop_a = rep_pop,
      pop_b = dem_pop,
      pop_c = ind_pop,
      total_prop_infected = total_prop_prevalence,
      rep_prop_infected = rep_prop_prevalence,
      dem_prop_infected = dem_prop_prevalence,
      ind_prop_infected = ind_prop_prevalence,
      cumulative_prop_infected = cumsum(total_prop_prevalence) / n(),
      cumulative_rep_prop_infected = cumsum(rep_prop_prevalence),
      cumulative_dem_prop_infected = cumsum(dem_prop_prevalence),
      cumulative_ind_prop_infected = cumsum(ind_prop_prevalence),
      null_total_prop_infected = null_model$total_prop_prevalence,
      dem_total_prop_infected = dem_model$total_prop_prevalence,
      dead_reps = rep_dead,
      dead_dems = dem_dead,
      dead_inds = ind_dead,
      total_prop_dead = total_dead / total_pop,
      null_total_prop_dead = null_model$total_prop_dead,
      dem_total_prop_dead = dem_model$total_prop_dead,
      total_daily_mortality_rate = ifelse(is.na(total_pop) | total_pop == 0, NA,
        total_new_deaths / total_pop
      ),
      dem_total_pop = dem_model$total_pop,
      dem_total_new_cases = dem_model$total_prevalence - lag(dem_model$total_prevalence, default = 0),
      null_total_pop = null_model$total_pop,
      null_total_new_cases = null_model$total_prevalence - lag(null_model$total_prevalence, default = 0),
      null_total_prop_cases = (null_model$total_prevalence - lag(null_model$total_prevalence, default = 0)) / null_model$total_pop,
      # Calculate null incidence rate (per 100,000)
      null_total_incidence_rate = (null_model$total_prevalence - lag(null_model$total_prevalence, default = 0)) / null_model$total_pop * 100000
    )
}

#' Calculate contacts over time (legacy wrapper)
#' @param sim_data Processed simulation data
#' @param beta_a Homophily parameter A
#' @param beta_b Homophily parameter B
#' @return Data frame with contact matrices
calculate_contacts_over_time <- function(sim_data, beta_a, beta_b) {
  # Create contacts dataframe structure expected by legacy code
  contacts_df <- data.frame(time = sim_data$time) %>%
    mutate(
      R_R = NA, R_D = NA, R_I = NA,
      D_D = NA, D_R = NA, D_I = NA,
      I_I = NA, I_R = NA, I_D = NA
    )

  for (r in 1:nrow(sim_data)) {
    # Use rep_pop/dem_pop/ind_pop if available, otherwise fall back to pop_a/pop_b/pop_c
    N_a <- if ("rep_pop" %in% names(sim_data)) sim_data$rep_pop[r] else sim_data$pop_a[r]
    N_b <- if ("dem_pop" %in% names(sim_data)) sim_data$dem_pop[r] else sim_data$pop_b[r]
    N_c <- if ("ind_pop" %in% names(sim_data)) sim_data$ind_pop[r] else sim_data$pop_c[r]

    ac_mat <- avg_contact_matrix_3gp(
      dbar_a = sim_data$ca[r],
      dbar_b = sim_data$cb[r],
      dbar_c = sim_data$cc[r],
      N_a = N_a,
      N_b = N_b,
      N_c = N_c,
      beta_a = beta_a,
      beta_b = beta_b
    )

    contacts_df$R_R[r] <- ac_mat[1, 1]
    contacts_df$R_D[r] <- ac_mat[1, 2]
    contacts_df$R_I[r] <- ac_mat[1, 3]

    contacts_df$D_R[r] <- ac_mat[2, 1]
    contacts_df$D_D[r] <- ac_mat[2, 2]
    contacts_df$D_I[r] <- ac_mat[2, 3]

    contacts_df$I_R[r] <- ac_mat[3, 1]
    contacts_df$I_D[r] <- ac_mat[3, 2]
    contacts_df$I_I[r] <- ac_mat[3, 3]
  }

  contacts_df <- contacts_df %>%
    mutate(
      Total_R_Contacts = R_R + R_D + R_I,
      Total_D_Contacts = D_R + D_D + D_I,
      Total_I_Contacts = I_R + I_D + I_I,
      N_a = if ("rep_pop" %in% names(sim_data)) sim_data$rep_pop else sim_data$pop_a,
      N_b = if ("dem_pop" %in% names(sim_data)) sim_data$dem_pop else sim_data$pop_b,
      N_c = if ("ind_pop" %in% names(sim_data)) sim_data$ind_pop else sim_data$pop_c
    )

  return(contacts_df)
}

#' Add force of infection to simulation data (legacy wrapper)
#' @param sim_data Processed simulation data
#' @param contacts_df Contact matrix data
#' @param current_params Current parameters list
#' @return Data frame with force of infection metrics
add_force_of_infection <- function(sim_data, contacts_df, current_params) {
  # Use rep_pop/dem_pop/ind_pop if available
  pop_a_col <- if ("rep_pop" %in% names(sim_data)) sim_data$rep_pop else sim_data$pop_a
  pop_b_col <- if ("dem_pop" %in% names(sim_data)) sim_data$dem_pop else sim_data$pop_b
  pop_c_col <- if ("ind_pop" %in% names(sim_data)) sim_data$ind_pop else sim_data$pop_c

  sim_data %>%
    mutate(
      N0 = current_params$N0,
      trans_p = current_params$trans_p,
      kappa = current_params$kappa,
      ell = current_params$ell,
      rho = current_params$rho,
      vstart = current_params$vstart,
      gamma = current_params$gamma,
      frac_a = current_params$frac_a,
      frac_b = current_params$frac_b,
      pi_a = current_params$pi_a,
      pi_b = current_params$pi_b,
      pi_c = current_params$pi_c,
      phi_a = current_params$phi_a,
      phi_b = current_params$phi_b,
      phi_c = current_params$phi_c,
      zeta_a = current_params$zeta_a,
      zeta_b = current_params$zeta_b,
      zeta_c = current_params$zeta_c,
      mu_a = current_params$mu_a,
      mu_b = current_params$mu_b,
      mu_c = current_params$mu_c,
      vacc_a = current_params$vacc_a,
      vacc_b = current_params$vacc_b,
      vacc_c = current_params$vacc_c,
      I0_a = current_params$I0_a,
      I0_b = current_params$I0_b,
      I0_c = current_params$I0_c,
      beta_a = current_params$beta_a,
      beta_b = current_params$beta_b,
      ca_a = contacts_df$R_R,
      ca_b = contacts_df$R_D,
      ca_c = contacts_df$R_I,
      cb_a = contacts_df$D_R,
      cb_b = contacts_df$D_D,
      cb_c = contacts_df$D_I,
      cc_a = contacts_df$I_R,
      cc_b = contacts_df$I_D,
      cc_c = contacts_df$I_I,
      # Force of infection components
      lambda_aa = trans_p * (ca_a * (IUa / pop_a_col + kappa * IPa / pop_a_col)),
      lambda_ab = trans_p * (ca_b * (IUb / pop_b_col + kappa * IPb / pop_b_col)),
      lambda_ac = trans_p * (ca_c * (IUc / pop_c_col + kappa * IPc / pop_c_col)),
      lambda_a = lambda_aa + lambda_ab + lambda_ac,
      lambda_ba = trans_p * (cb_a * (IUa / pop_a_col + kappa * IPa / pop_a_col)),
      lambda_bb = trans_p * (cb_b * (IUb / pop_b_col + kappa * IPb / pop_b_col)),
      lambda_bc = trans_p * (cb_c * (IUc / pop_c_col + kappa * IPc / pop_c_col)),
      lambda_b = lambda_ba + lambda_bb + lambda_bc,
      lambda_ca = trans_p * (cc_a * (IUa / pop_a_col + kappa * IPa / pop_a_col)),
      lambda_cb = trans_p * (cc_b * (IUb / pop_b_col + kappa * IPb / pop_b_col)),
      lambda_cc = trans_p * (cc_c * (IUc / pop_c_col + kappa * IPc / pop_c_col)),
      lambda_c = lambda_ca + lambda_cb + lambda_cc,
      # Proportion of infections by source
      prop_rep_infections_from_reps = lambda_aa / lambda_a,
      prop_rep_infections_from_dems = lambda_ab / lambda_a,
      prop_rep_infections_from_inds = lambda_ac / lambda_a,
      prop_dem_infections_from_reps = lambda_ba / lambda_b,
      prop_dem_infections_from_dems = lambda_bb / lambda_b,
      prop_dem_infections_from_inds = lambda_bc / lambda_b,
      prop_ind_infections_from_reps = lambda_ca / lambda_c,
      prop_ind_infections_from_dems = lambda_cb / lambda_c,
      prop_ind_infections_from_inds = lambda_cc / lambda_c,
      # Incidence by source for Republicans
      rep_from_rep_new_cases = if_else(row_number() == 1, 1 / 3,
        (SUa * lambda_aa) + (SPa * lambda_aa * kappa)
      ),
      rep_from_dem_new_cases = if_else(row_number() == 1, 1 / 3,
        (SUa * lambda_ab) + (SPa * lambda_ab * kappa)
      ),
      rep_from_ind_new_cases = if_else(row_number() == 1, 1 / 3,
        (SUa * lambda_ac) + (SPa * lambda_ac * kappa)
      ),
      rep_new_cases = rep_from_rep_new_cases + rep_from_dem_new_cases + rep_from_ind_new_cases,
      # Incidence by source for Democrats
      dem_from_rep_new_cases = if_else(row_number() == 1, 1 / 3,
        (SUb * lambda_ba) + (SPb * lambda_ba * kappa)
      ),
      dem_from_dem_new_cases = if_else(row_number() == 1, 1 / 3,
        (SUb * lambda_bb) + (SPb * lambda_bb * kappa)
      ),
      dem_from_ind_new_cases = if_else(row_number() == 1, 1 / 3,
        (SUb * lambda_bc) + (SPb * lambda_bc * kappa)
      ),
      dem_new_cases = dem_from_rep_new_cases + dem_from_dem_new_cases + dem_from_ind_new_cases,
      # Incidence by source for Independents
      ind_from_rep_new_cases = if_else(row_number() == 1, 1 / 3,
        (SUc * lambda_ca) + (SPc * lambda_ca * kappa)
      ),
      ind_from_dem_new_cases = if_else(row_number() == 1, 1 / 3,
        (SUc * lambda_cb) + (SPc * lambda_cb * kappa)
      ),
      ind_from_ind_new_cases = if_else(row_number() == 1, 1 / 3,
        (SUc * lambda_cc) + (SPc * lambda_cc * kappa)
      ),
      ind_new_cases = ind_from_rep_new_cases + ind_from_dem_new_cases + ind_from_ind_new_cases,
      total_new_cases = rep_new_cases + dem_new_cases + ind_new_cases,
      # Incidence rates (per 100,000)
      rep_incidence_rate = (rep_new_cases / pop_a_col) * 100000,
      dem_incidence_rate = (dem_new_cases / pop_b_col) * 100000,
      ind_incidence_rate = (ind_new_cases / pop_c_col) * 100000,
      total_incidence_rate = (total_new_cases / total_pop) * 100000,
      # Protection proportions
      rep_prop_protected = (SPa + IPa + RPa) / pop_a_col,
      dem_prop_protected = (SPb + IPb + RPb) / pop_b_col,
      ind_prop_protected = (SPc + IPc + RPc) / pop_c_col
    )
}

#' Extract All Model Metrics
#'
#' Convenience function to extract all relevant metrics from null and cautious models.
#'
#' @param null_model Processed null model output
#' @param cautious_model Processed cautious model output
#' @return List containing all extracted metrics
extract_reference_model_metrics <- function(null_model, cautious_model) {
  # Calculate null model metrics

  null_peak <- calculate_peak_metrics(null_model)
  null_outbreak <- calculate_outbreak_metrics(null_model)

  # Calculate cautious model metrics
  cautious_peak <- calculate_peak_metrics(cautious_model)

  list(
    # Null (baseline) model metrics
    null = list(
      inflection_point = null_peak$inflection_point,
      outbreak_size = null_outbreak$outbreak_size,
      auc = null_outbreak$auc,
      peak_infected = null_peak$peak_infected,
      peak_proportion = null_peak$peak_proportion,
      crossing_time = null_peak$crossing_time
    ),
    # Cautious model metrics
    cautious = list(
      peak_proportion = cautious_peak$peak_proportion,
      crossing_time = cautious_peak$crossing_time
    )
  )
}