import pandas as pd import itertools from typing import Dict, List, Optional def build_scenarios( baseline_row: pd.Series, variable_options: Dict[str, List], max_scenarios: int = 2000 ) -> pd.DataFrame: """ Create scenario dataframe by varying selected variables around a baseline patient row. baseline_row: one patient's input row (Series) variable_options: {variable_name: [candidate_values]} """ if baseline_row is None or baseline_row.empty: raise ValueError("baseline_row is required (single patient input row).") variables = list(variable_options.keys()) choices = [variable_options[v] for v in variables] combos = list(itertools.product(*choices)) if len(combos) > max_scenarios: combos = combos[:max_scenarios] rows = [] for combo in combos: r = baseline_row.copy() for v, val in zip(variables, combo): r[v] = val rows.append(r) return pd.DataFrame(rows) def rank_scenarios( df_pred: pd.DataFrame, gvhd_col: str = "pred_aGVHD", surv_col: Optional[str] = "surv_1y", objective: str = "min_gvhd_max_survival" ) -> pd.DataFrame: """ Rank scenarios: - min_gvhd: sort by gvhd_col ascending - max_survival: sort by surv_col descending - min_gvhd_max_survival: gvhd low first, survival high next """ df = df_pred.copy() if objective == "min_gvhd": df = df.sort_values([gvhd_col], ascending=[True]) elif objective == "max_survival" and surv_col and surv_col in df.columns: df = df.sort_values([surv_col], ascending=[False]) else: sort_cols = [gvhd_col] ascending = [True] if surv_col and surv_col in df.columns: sort_cols.append(surv_col) ascending.append(False) df = df.sort_values(sort_cols, ascending=ascending) df.reset_index(drop=True, inplace=True) return df