from __future__ import annotations import json from pathlib import Path from typing import List import pandas as pd from .utils import ( family_label, fmt_float, fmt_sci, fmt_text, safe_read_csv, split_flags, warning_badges, ) def export_results(df: pd.DataFrame, out_dir: str | Path) -> None: out_dir = Path(out_dir) out_dir.mkdir(parents=True, exist_ok=True) df.to_csv(out_dir / "rf_mode_pack_v5_1_all.csv", index=False) passed = df[df["hard_pass"]].copy() passed.sort_values(["balanced_v5", "delivered"], ascending=False).to_csv( out_dir / "rf_mode_pack_v5_1_passed.csv", index=False, ) shortlist = passed[passed["candidate_role"] != ""].copy() shortlist_cols = [ "candidate_id", "type", "family", "candidate_role", "freq_GHz", "Eacc_MVpm", "L_m", "Vc_MV", "delivered", "P_wall_peak_kW", "P_wall_avg_kW", "pulse_length_ns", "rep_rate_Hz", "duty_cycle", "deltaT_pulse", "thermal_margin", "BDR_eff", "BDR_margin", "R_f", "R_phi", "R_c", "R_tol", "C_src", "C_cool", "mat_rf_wall", "mat_insulator", "mat_thermal_shield", "mat_structural", "suggested_primary_material", "regime", "c_capture", "c_loss", "balanced_v5", "frontier_v5", "hard_pass", "warning_flags", ] shortlist[shortlist_cols].sort_values( ["family", "candidate_role", "balanced_v5"], ascending=[True, True, False], ).to_csv(out_dir / "rf_mode_pack_v5_1_shortlist_3x3.csv", index=False) family_summary = passed.groupby("family").agg( count=("family", "count"), median_delivered=("delivered", "median"), median_P_wall_avg_kW=("P_wall_avg_kW", "median"), median_deltaT=("deltaT_pulse", "median"), median_BDR=("BDR_eff", "median"), median_R_f=("R_f", "median"), median_R_phi=("R_phi", "median"), median_R_c=("R_c", "median"), median_R_tol=("R_tol", "median"), median_C_src=("C_src", "median"), median_C_cool=("C_cool", "median"), median_balanced_v5=("balanced_v5", "median"), median_frontier_v5=("frontier_v5", "median"), ).reset_index() family_summary.to_csv(out_dir / "rf_mode_pack_v5_1_family_summary.csv", index=False) rejected = df[~df["hard_pass"]].copy() reject_counts = ( rejected.assign(fail_reasons_split=rejected["fail_reasons"].str.split("|")) .explode("fail_reasons_split") .dropna(subset=["fail_reasons_split"]) .query("fail_reasons_split != ''") .groupby(["family", "fail_reasons_split"]) .size() .reset_index(name="count") .rename(columns={"fail_reasons_split": "reason"}) ) reject_counts.to_csv(out_dir / "rf_mode_pack_v5_1_reject_reasons.csv", index=False) with open(out_dir / "rf_mode_pack_v5_1_meta.json", "w", encoding="utf-8") as f: json.dump({ "rows_total": int(len(df)), "rows_passed": int(df["hard_pass"].sum()), "rows_failed": int((~df["hard_pass"]).sum()), }, f, ensure_ascii=False, indent=2) # ========================================================= # GLOBAL VIEWS # ========================================================= def deduplicate_global_best(df: pd.DataFrame, score_col="balanced_v5") -> pd.DataFrame: if df.empty: return df.copy() s = df.sort_values([score_col, "delivered"], ascending=False).copy() return s.drop_duplicates(subset=["candidate_id"], keep="first") def deduplicate_global_safest(df: pd.DataFrame) -> pd.DataFrame: if df.empty: return df.copy() s = df.sort_values(["R_tol", "C_cool", "C_src", "balanced_v5"], ascending=[False, False, False, False]).copy() return s.drop_duplicates(subset=["candidate_id"], keep="first") def export_global_views(all_class_outputs: List[pd.DataFrame], out_dir: Path, reference_class: str) -> None: out_dir.mkdir(parents=True, exist_ok=True) concat_all = pd.concat(all_class_outputs, axis=0, ignore_index=True) passing = concat_all[concat_all["hard_pass"]].copy() passing.to_csv(out_dir / "rf_mode_pack_v5_1_global_concat_passed.csv", index=False) dedup_balanced = deduplicate_global_best(passing, score_col="balanced_v5") dedup_frontier = deduplicate_global_best(passing, score_col="frontier_v5") dedup_safest = deduplicate_global_safest(passing) dedup_balanced.to_csv(out_dir / "rf_mode_pack_v5_1_global_dedup_balanced.csv", index=False) dedup_frontier.to_csv(out_dir / "rf_mode_pack_v5_1_global_dedup_frontier.csv", index=False) dedup_safest.to_csv(out_dir / "rf_mode_pack_v5_1_global_dedup_safest.csv", index=False) ref = concat_all[ (concat_all["feasibility_class"] == reference_class) & (concat_all["hard_pass"]) ].copy() ref.to_csv(out_dir / f"rf_mode_pack_v5_1_reference_{reference_class}.csv", index=False) fam_bal = ( dedup_balanced.groupby("family").agg( count=("family", "count"), median_delivered=("delivered", "median"), median_P_wall_avg_kW=("P_wall_avg_kW", "median"), median_deltaT=("deltaT_pulse", "median"), median_BDR=("BDR_eff", "median"), median_R_f=("R_f", "median"), median_R_phi=("R_phi", "median"), median_R_c=("R_c", "median"), median_R_tol=("R_tol", "median"), median_C_src=("C_src", "median"), median_C_cool=("C_cool", "median"), median_balanced_v5=("balanced_v5", "median"), median_frontier_v5=("frontier_v5", "median"), ).reset_index().sort_values("median_balanced_v5", ascending=False) if not dedup_balanced.empty else pd.DataFrame() ) fam_bal.to_csv(out_dir / "rf_mode_pack_v5_1_global_family_summary_balanced.csv", index=False) with open(out_dir / "rf_mode_pack_v5_1_global_meta.json", "w", encoding="utf-8") as f: json.dump({ "reference_class": reference_class, "rows_concat_passed": int(len(passing)), "rows_dedup_balanced": int(len(dedup_balanced)), "rows_dedup_frontier": int(len(dedup_frontier)), "rows_dedup_safest": int(len(dedup_safest)), "rows_reference_class": int(len(ref)), }, f, ensure_ascii=False, indent=2) # ========================================================= # REPORT # ========================================================= def top_line(df: pd.DataFrame, score_col: str, title: str) -> str: if df.empty: return f"- No candidate for {title}." row = df.sort_values([score_col, "delivered"], ascending=False).iloc[0] return ( f"- {title}: **{row['type']}** ({family_label(row['family'])}), " f"f={fmt_float(row['freq_GHz'],3)} GHz, " f"yield={fmt_sci(row['delivered'],2)}, " f"Pavg={fmt_float(row['P_wall_avg_kW'],2)} kW, " f"material={fmt_text(row.get('suggested_primary_material', ''))}, " f"regime={fmt_text(row['regime'])}, " f"warnings={warning_badges(row['warning_flags'])}." ) def safest_line(df: pd.DataFrame) -> str: if df.empty: return "- No safest candidate available." row = df.sort_values(["R_tol", "C_cool", "C_src", "balanced_v5"], ascending=[False, False, False, False]).iloc[0] return ( f"- Global safest (deduplicated): **{row['type']}** ({family_label(row['family'])}), " f"Rtol={fmt_float(row['R_tol'],2)}, " f"Csrc={fmt_float(row['C_src'],2)}, " f"Ccool={fmt_float(row['C_cool'],2)}, " f"material={fmt_text(row.get('suggested_primary_material', ''))}, " f"warnings={warning_badges(row['warning_flags'])}." ) def family_summary_table(df: pd.DataFrame) -> List[str]: if df.empty: return ["_No family summary available._"] s = df.copy() if "median_balanced_v5" in s.columns: s = s.sort_values("median_balanced_v5", ascending=False) elif "count" in s.columns: s = s.sort_values("count", ascending=False) lines = [ "| Family | Count | Median Yield | Median Pavg [kW] | Median ΔT [K] | Median BDR | Median Rf | Median Rφ | Median Rc | Median Rtol | Median Csrc | Median Ccool | Median Balanced | Median Frontier |", "|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|", ] for _, r in s.iterrows(): lines.append( f"| {family_label(r['family'])} | {int(r['count'])} | " f"{fmt_sci(r['median_delivered'],2)} | " f"{fmt_float(r['median_P_wall_avg_kW'],2)} | " f"{fmt_float(r['median_deltaT'],2)} | " f"{fmt_sci(r['median_BDR'],2)} | " f"{fmt_float(r['median_R_f'],2)} | " f"{fmt_float(r['median_R_phi'],2)} | " f"{fmt_float(r['median_R_c'],2)} | " f"{fmt_float(r['median_R_tol'],2)} | " f"{fmt_float(r['median_C_src'],2)} | " f"{fmt_float(r['median_C_cool'],2)} | " f"{fmt_float(r['median_balanced_v5'],3)} | " f"{fmt_float(r['median_frontier_v5'],3)} |" ) return lines def shortlist_table(shortlist: pd.DataFrame) -> List[str]: if shortlist.empty: return ["_No shortlist candidates available._"] s = shortlist.copy().sort_values(["family", "candidate_role", "balanced_v5"], ascending=[True, True, False]) lines = [ "| Family | Type | Role | f [GHz] | Eacc [MV/m] | Vc [MV] | Yield | Pavg [kW] | ΔT [K] | BDR | Rtol | Csrc | Ccool | RF Wall | Insulator | Shield | Structural | Primary Material | Score | Regime | Warnings |", "|---|---|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---|---|---|---|---|---:|---|---|", ] for _, r in s.iterrows(): score_to_show = r["frontier_v5"] if r["candidate_role"] == "aggressive" else r["balanced_v5"] lines.append( f"| {family_label(r['family'])} | {r['type']} | {fmt_text(r['candidate_role'])} | " f"{fmt_float(r['freq_GHz'],3)} | {fmt_float(r['Eacc_MVpm'],2)} | {fmt_float(r['Vc_MV'],2)} | " f"{fmt_sci(r['delivered'],2)} | {fmt_float(r['P_wall_avg_kW'],2)} | {fmt_float(r['deltaT_pulse'],2)} | " f"{fmt_sci(r['BDR_eff'],2)} | {fmt_float(r['R_tol'],2)} | {fmt_float(r['C_src'],2)} | {fmt_float(r['C_cool'],2)} | " f"{fmt_text(r.get('mat_rf_wall', ''))} | {fmt_text(r.get('mat_insulator', ''))} | " f"{fmt_text(r.get('mat_thermal_shield', ''))} | {fmt_text(r.get('mat_structural', ''))} | " f"{fmt_text(r.get('suggested_primary_material', ''))} | {fmt_float(score_to_show,3)} | " f"{fmt_text(r['regime'])} | {warning_badges(r['warning_flags'])} |" ) return lines def reject_reason_table(df: pd.DataFrame, top_n=15) -> List[str]: if df.empty: return ["_No reject diagnostics available._"] s = df.copy().sort_values("count", ascending=False).head(top_n) lines = ["| Family | Reason | Count |", "|---|---|---:|"] for _, r in s.iterrows(): lines.append(f"| {family_label(r['family'])} | {r['reason']} | {int(r['count'])} |") return lines def warning_summary_global(passed: pd.DataFrame) -> List[str]: if passed.empty: return ["- No warning-bearing candidates."] warn = passed.copy() warn["warning_flags"] = warn["warning_flags"].fillna("") warn = warn[warn["warning_flags"] != ""] if warn.empty: return ["- No yellow warnings among qualified candidates."] exploded = ( warn.assign(flag=warn["warning_flags"].str.split("|")) .explode("flag") .groupby(["family", "flag"]) .size() .reset_index(name="count") .sort_values(["flag", "count"], ascending=[True, False]) ) lines, current_flag = [], None for _, r in exploded.iterrows(): if r["flag"] != current_flag: current_flag = r["flag"] lines.append(f"- {current_flag}:") lines.append(f" - {family_label(r['family'])}: {int(r['count'])}") return lines def class_conclusion(passed: pd.DataFrame) -> str: if passed.empty: return "No viable candidates in this class." fam_counts = passed["family"].value_counts() top_family_count = fam_counts.index[0] top_count = int(fam_counts.iloc[0]) fam_score = passed.groupby("family")["balanced_v5"].median().sort_values(ascending=False) top_family_score = fam_score.index[0] best = passed.sort_values(["balanced_v5", "delivered"], ascending=False).iloc[0] return ( f"Dominant family by count: **{family_label(top_family_count)}** ({top_count} qualified). " f"Top family by median balanced score: **{family_label(top_family_score)}**. " f"Best candidate: **{best['type']}**, " f"yield={fmt_sci(best['delivered'],2)}, " f"Pavg={fmt_float(best['P_wall_avg_kW'],2)} kW, " f"material={fmt_text(best.get('suggested_primary_material', ''))}, " f"regime={fmt_text(best['regime'])}." ) def build_class_section(base_dir: Path, class_name: str) -> List[str]: class_dir = base_dir / class_name passed = safe_read_csv(class_dir / "rf_mode_pack_v5_1_passed.csv") shortlist = safe_read_csv(class_dir / "rf_mode_pack_v5_1_shortlist_3x3.csv") family = safe_read_csv(class_dir / "rf_mode_pack_v5_1_family_summary.csv") rejects = safe_read_csv(class_dir / "rf_mode_pack_v5_1_reject_reasons.csv") lines = [ f"## {class_name}", "", f"- Qualified designs: **{len(passed)}**", f"- Conclusion: {class_conclusion(passed)}", "", "### Shortlist (safest / balanced / aggressive)", "", ] lines.extend(shortlist_table(shortlist)) lines += ["", "### Family summary", ""] lines.extend(family_summary_table(family)) lines += ["", "### Warning diagnostics", ""] lines.extend(warning_summary_global(passed)) lines += ["", "### Main rejection reasons", ""] lines.extend(reject_reason_table(rejects)) lines.append("") return lines def build_global_section(base_dir: Path, reference_class: str) -> List[str]: gdir = base_dir / "global_views" dedup_bal = safe_read_csv(gdir / "rf_mode_pack_v5_1_global_dedup_balanced.csv") dedup_front = safe_read_csv(gdir / "rf_mode_pack_v5_1_global_dedup_frontier.csv") dedup_safe = safe_read_csv(gdir / "rf_mode_pack_v5_1_global_dedup_safest.csv") fam_bal = safe_read_csv(gdir / "rf_mode_pack_v5_1_global_family_summary_balanced.csv") ref = safe_read_csv(gdir / f"rf_mode_pack_v5_1_reference_{reference_class}.csv") lines = [ "# RF Mode Pack v5.1 — Engineering Pass Report", "", "## Executive summary", "", top_line(dedup_bal, "balanced_v5", "Global best balanced (deduplicated)"), top_line(dedup_front, "frontier_v5", "Global best frontier (deduplicated)"), safest_line(dedup_safe), "", f"- Reference class for stable engineering view: **{reference_class}**", "", "## Global family ranking (deduplicated balanced view)", "", ] lines.extend(family_summary_table(fam_bal)) lines.append("") if not ref.empty: lines += [ f"## Reference-class snapshot ({reference_class})", "", top_line(ref, "balanced_v5", "Best balanced inside reference class"), top_line(ref, "frontier_v5", "Best frontier-scored candidate inside reference class"), "", "### Warning diagnostics in reference class", "", ] lines.extend(warning_summary_global(ref)) lines.append("") return lines def generate_report(base_dir: str | Path, output_path: str | Path, class_order: List[str], reference_class: str) -> None: base_dir = Path(base_dir) output_path = Path(output_path) lines = [] lines.extend(build_global_section(base_dir, reference_class)) for class_name in class_order: lines.extend(build_class_section(base_dir, class_name)) output_path.write_text("\n".join(lines), encoding="utf-8")