voxforge-world / engine /sim /rf /reporting.py
peiti's picture
Upload folder using huggingface_hub
b154e4c verified
from __future__ import annotations
import json
from pathlib import Path
from typing import List
import pandas as pd
from .utils import (
family_label,
fmt_float,
fmt_sci,
fmt_text,
safe_read_csv,
split_flags,
warning_badges,
)
def export_results(df: pd.DataFrame, out_dir: str | Path) -> None:
out_dir = Path(out_dir)
out_dir.mkdir(parents=True, exist_ok=True)
df.to_csv(out_dir / "rf_mode_pack_v5_1_all.csv", index=False)
passed = df[df["hard_pass"]].copy()
passed.sort_values(["balanced_v5", "delivered"], ascending=False).to_csv(
out_dir / "rf_mode_pack_v5_1_passed.csv",
index=False,
)
shortlist = passed[passed["candidate_role"] != ""].copy()
shortlist_cols = [
"candidate_id", "type", "family", "candidate_role",
"freq_GHz", "Eacc_MVpm", "L_m", "Vc_MV",
"delivered", "P_wall_peak_kW", "P_wall_avg_kW",
"pulse_length_ns", "rep_rate_Hz", "duty_cycle",
"deltaT_pulse", "thermal_margin",
"BDR_eff", "BDR_margin",
"R_f", "R_phi", "R_c", "R_tol",
"C_src", "C_cool",
"mat_rf_wall", "mat_insulator", "mat_thermal_shield", "mat_structural",
"suggested_primary_material",
"regime", "c_capture", "c_loss",
"balanced_v5", "frontier_v5",
"hard_pass", "warning_flags",
]
shortlist[shortlist_cols].sort_values(
["family", "candidate_role", "balanced_v5"],
ascending=[True, True, False],
).to_csv(out_dir / "rf_mode_pack_v5_1_shortlist_3x3.csv", index=False)
family_summary = passed.groupby("family").agg(
count=("family", "count"),
median_delivered=("delivered", "median"),
median_P_wall_avg_kW=("P_wall_avg_kW", "median"),
median_deltaT=("deltaT_pulse", "median"),
median_BDR=("BDR_eff", "median"),
median_R_f=("R_f", "median"),
median_R_phi=("R_phi", "median"),
median_R_c=("R_c", "median"),
median_R_tol=("R_tol", "median"),
median_C_src=("C_src", "median"),
median_C_cool=("C_cool", "median"),
median_balanced_v5=("balanced_v5", "median"),
median_frontier_v5=("frontier_v5", "median"),
).reset_index()
family_summary.to_csv(out_dir / "rf_mode_pack_v5_1_family_summary.csv", index=False)
rejected = df[~df["hard_pass"]].copy()
reject_counts = (
rejected.assign(fail_reasons_split=rejected["fail_reasons"].str.split("|"))
.explode("fail_reasons_split")
.dropna(subset=["fail_reasons_split"])
.query("fail_reasons_split != ''")
.groupby(["family", "fail_reasons_split"])
.size()
.reset_index(name="count")
.rename(columns={"fail_reasons_split": "reason"})
)
reject_counts.to_csv(out_dir / "rf_mode_pack_v5_1_reject_reasons.csv", index=False)
with open(out_dir / "rf_mode_pack_v5_1_meta.json", "w", encoding="utf-8") as f:
json.dump({
"rows_total": int(len(df)),
"rows_passed": int(df["hard_pass"].sum()),
"rows_failed": int((~df["hard_pass"]).sum()),
}, f, ensure_ascii=False, indent=2)
# =========================================================
# GLOBAL VIEWS
# =========================================================
def deduplicate_global_best(df: pd.DataFrame, score_col="balanced_v5") -> pd.DataFrame:
if df.empty:
return df.copy()
s = df.sort_values([score_col, "delivered"], ascending=False).copy()
return s.drop_duplicates(subset=["candidate_id"], keep="first")
def deduplicate_global_safest(df: pd.DataFrame) -> pd.DataFrame:
if df.empty:
return df.copy()
s = df.sort_values(["R_tol", "C_cool", "C_src", "balanced_v5"], ascending=[False, False, False, False]).copy()
return s.drop_duplicates(subset=["candidate_id"], keep="first")
def export_global_views(all_class_outputs: List[pd.DataFrame], out_dir: Path, reference_class: str) -> None:
out_dir.mkdir(parents=True, exist_ok=True)
concat_all = pd.concat(all_class_outputs, axis=0, ignore_index=True)
passing = concat_all[concat_all["hard_pass"]].copy()
passing.to_csv(out_dir / "rf_mode_pack_v5_1_global_concat_passed.csv", index=False)
dedup_balanced = deduplicate_global_best(passing, score_col="balanced_v5")
dedup_frontier = deduplicate_global_best(passing, score_col="frontier_v5")
dedup_safest = deduplicate_global_safest(passing)
dedup_balanced.to_csv(out_dir / "rf_mode_pack_v5_1_global_dedup_balanced.csv", index=False)
dedup_frontier.to_csv(out_dir / "rf_mode_pack_v5_1_global_dedup_frontier.csv", index=False)
dedup_safest.to_csv(out_dir / "rf_mode_pack_v5_1_global_dedup_safest.csv", index=False)
ref = concat_all[
(concat_all["feasibility_class"] == reference_class) &
(concat_all["hard_pass"])
].copy()
ref.to_csv(out_dir / f"rf_mode_pack_v5_1_reference_{reference_class}.csv", index=False)
fam_bal = (
dedup_balanced.groupby("family").agg(
count=("family", "count"),
median_delivered=("delivered", "median"),
median_P_wall_avg_kW=("P_wall_avg_kW", "median"),
median_deltaT=("deltaT_pulse", "median"),
median_BDR=("BDR_eff", "median"),
median_R_f=("R_f", "median"),
median_R_phi=("R_phi", "median"),
median_R_c=("R_c", "median"),
median_R_tol=("R_tol", "median"),
median_C_src=("C_src", "median"),
median_C_cool=("C_cool", "median"),
median_balanced_v5=("balanced_v5", "median"),
median_frontier_v5=("frontier_v5", "median"),
).reset_index().sort_values("median_balanced_v5", ascending=False)
if not dedup_balanced.empty else pd.DataFrame()
)
fam_bal.to_csv(out_dir / "rf_mode_pack_v5_1_global_family_summary_balanced.csv", index=False)
with open(out_dir / "rf_mode_pack_v5_1_global_meta.json", "w", encoding="utf-8") as f:
json.dump({
"reference_class": reference_class,
"rows_concat_passed": int(len(passing)),
"rows_dedup_balanced": int(len(dedup_balanced)),
"rows_dedup_frontier": int(len(dedup_frontier)),
"rows_dedup_safest": int(len(dedup_safest)),
"rows_reference_class": int(len(ref)),
}, f, ensure_ascii=False, indent=2)
# =========================================================
# REPORT
# =========================================================
def top_line(df: pd.DataFrame, score_col: str, title: str) -> str:
if df.empty:
return f"- No candidate for {title}."
row = df.sort_values([score_col, "delivered"], ascending=False).iloc[0]
return (
f"- {title}: **{row['type']}** ({family_label(row['family'])}), "
f"f={fmt_float(row['freq_GHz'],3)} GHz, "
f"yield={fmt_sci(row['delivered'],2)}, "
f"Pavg={fmt_float(row['P_wall_avg_kW'],2)} kW, "
f"material={fmt_text(row.get('suggested_primary_material', ''))}, "
f"regime={fmt_text(row['regime'])}, "
f"warnings={warning_badges(row['warning_flags'])}."
)
def safest_line(df: pd.DataFrame) -> str:
if df.empty:
return "- No safest candidate available."
row = df.sort_values(["R_tol", "C_cool", "C_src", "balanced_v5"], ascending=[False, False, False, False]).iloc[0]
return (
f"- Global safest (deduplicated): **{row['type']}** ({family_label(row['family'])}), "
f"Rtol={fmt_float(row['R_tol'],2)}, "
f"Csrc={fmt_float(row['C_src'],2)}, "
f"Ccool={fmt_float(row['C_cool'],2)}, "
f"material={fmt_text(row.get('suggested_primary_material', ''))}, "
f"warnings={warning_badges(row['warning_flags'])}."
)
def family_summary_table(df: pd.DataFrame) -> List[str]:
if df.empty:
return ["_No family summary available._"]
s = df.copy()
if "median_balanced_v5" in s.columns:
s = s.sort_values("median_balanced_v5", ascending=False)
elif "count" in s.columns:
s = s.sort_values("count", ascending=False)
lines = [
"| Family | Count | Median Yield | Median Pavg [kW] | Median ΔT [K] | Median BDR | Median Rf | Median Rφ | Median Rc | Median Rtol | Median Csrc | Median Ccool | Median Balanced | Median Frontier |",
"|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|",
]
for _, r in s.iterrows():
lines.append(
f"| {family_label(r['family'])} | {int(r['count'])} | "
f"{fmt_sci(r['median_delivered'],2)} | "
f"{fmt_float(r['median_P_wall_avg_kW'],2)} | "
f"{fmt_float(r['median_deltaT'],2)} | "
f"{fmt_sci(r['median_BDR'],2)} | "
f"{fmt_float(r['median_R_f'],2)} | "
f"{fmt_float(r['median_R_phi'],2)} | "
f"{fmt_float(r['median_R_c'],2)} | "
f"{fmt_float(r['median_R_tol'],2)} | "
f"{fmt_float(r['median_C_src'],2)} | "
f"{fmt_float(r['median_C_cool'],2)} | "
f"{fmt_float(r['median_balanced_v5'],3)} | "
f"{fmt_float(r['median_frontier_v5'],3)} |"
)
return lines
def shortlist_table(shortlist: pd.DataFrame) -> List[str]:
if shortlist.empty:
return ["_No shortlist candidates available._"]
s = shortlist.copy().sort_values(["family", "candidate_role", "balanced_v5"], ascending=[True, True, False])
lines = [
"| Family | Type | Role | f [GHz] | Eacc [MV/m] | Vc [MV] | Yield | Pavg [kW] | ΔT [K] | BDR | Rtol | Csrc | Ccool | RF Wall | Insulator | Shield | Structural | Primary Material | Score | Regime | Warnings |",
"|---|---|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---|---|---|---|---|---:|---|---|",
]
for _, r in s.iterrows():
score_to_show = r["frontier_v5"] if r["candidate_role"] == "aggressive" else r["balanced_v5"]
lines.append(
f"| {family_label(r['family'])} | {r['type']} | {fmt_text(r['candidate_role'])} | "
f"{fmt_float(r['freq_GHz'],3)} | {fmt_float(r['Eacc_MVpm'],2)} | {fmt_float(r['Vc_MV'],2)} | "
f"{fmt_sci(r['delivered'],2)} | {fmt_float(r['P_wall_avg_kW'],2)} | {fmt_float(r['deltaT_pulse'],2)} | "
f"{fmt_sci(r['BDR_eff'],2)} | {fmt_float(r['R_tol'],2)} | {fmt_float(r['C_src'],2)} | {fmt_float(r['C_cool'],2)} | "
f"{fmt_text(r.get('mat_rf_wall', ''))} | {fmt_text(r.get('mat_insulator', ''))} | "
f"{fmt_text(r.get('mat_thermal_shield', ''))} | {fmt_text(r.get('mat_structural', ''))} | "
f"{fmt_text(r.get('suggested_primary_material', ''))} | {fmt_float(score_to_show,3)} | "
f"{fmt_text(r['regime'])} | {warning_badges(r['warning_flags'])} |"
)
return lines
def reject_reason_table(df: pd.DataFrame, top_n=15) -> List[str]:
if df.empty:
return ["_No reject diagnostics available._"]
s = df.copy().sort_values("count", ascending=False).head(top_n)
lines = ["| Family | Reason | Count |", "|---|---|---:|"]
for _, r in s.iterrows():
lines.append(f"| {family_label(r['family'])} | {r['reason']} | {int(r['count'])} |")
return lines
def warning_summary_global(passed: pd.DataFrame) -> List[str]:
if passed.empty:
return ["- No warning-bearing candidates."]
warn = passed.copy()
warn["warning_flags"] = warn["warning_flags"].fillna("")
warn = warn[warn["warning_flags"] != ""]
if warn.empty:
return ["- No yellow warnings among qualified candidates."]
exploded = (
warn.assign(flag=warn["warning_flags"].str.split("|"))
.explode("flag")
.groupby(["family", "flag"])
.size()
.reset_index(name="count")
.sort_values(["flag", "count"], ascending=[True, False])
)
lines, current_flag = [], None
for _, r in exploded.iterrows():
if r["flag"] != current_flag:
current_flag = r["flag"]
lines.append(f"- {current_flag}:")
lines.append(f" - {family_label(r['family'])}: {int(r['count'])}")
return lines
def class_conclusion(passed: pd.DataFrame) -> str:
if passed.empty:
return "No viable candidates in this class."
fam_counts = passed["family"].value_counts()
top_family_count = fam_counts.index[0]
top_count = int(fam_counts.iloc[0])
fam_score = passed.groupby("family")["balanced_v5"].median().sort_values(ascending=False)
top_family_score = fam_score.index[0]
best = passed.sort_values(["balanced_v5", "delivered"], ascending=False).iloc[0]
return (
f"Dominant family by count: **{family_label(top_family_count)}** ({top_count} qualified). "
f"Top family by median balanced score: **{family_label(top_family_score)}**. "
f"Best candidate: **{best['type']}**, "
f"yield={fmt_sci(best['delivered'],2)}, "
f"Pavg={fmt_float(best['P_wall_avg_kW'],2)} kW, "
f"material={fmt_text(best.get('suggested_primary_material', ''))}, "
f"regime={fmt_text(best['regime'])}."
)
def build_class_section(base_dir: Path, class_name: str) -> List[str]:
class_dir = base_dir / class_name
passed = safe_read_csv(class_dir / "rf_mode_pack_v5_1_passed.csv")
shortlist = safe_read_csv(class_dir / "rf_mode_pack_v5_1_shortlist_3x3.csv")
family = safe_read_csv(class_dir / "rf_mode_pack_v5_1_family_summary.csv")
rejects = safe_read_csv(class_dir / "rf_mode_pack_v5_1_reject_reasons.csv")
lines = [
f"## {class_name}",
"",
f"- Qualified designs: **{len(passed)}**",
f"- Conclusion: {class_conclusion(passed)}",
"",
"### Shortlist (safest / balanced / aggressive)",
"",
]
lines.extend(shortlist_table(shortlist))
lines += ["", "### Family summary", ""]
lines.extend(family_summary_table(family))
lines += ["", "### Warning diagnostics", ""]
lines.extend(warning_summary_global(passed))
lines += ["", "### Main rejection reasons", ""]
lines.extend(reject_reason_table(rejects))
lines.append("")
return lines
def build_global_section(base_dir: Path, reference_class: str) -> List[str]:
gdir = base_dir / "global_views"
dedup_bal = safe_read_csv(gdir / "rf_mode_pack_v5_1_global_dedup_balanced.csv")
dedup_front = safe_read_csv(gdir / "rf_mode_pack_v5_1_global_dedup_frontier.csv")
dedup_safe = safe_read_csv(gdir / "rf_mode_pack_v5_1_global_dedup_safest.csv")
fam_bal = safe_read_csv(gdir / "rf_mode_pack_v5_1_global_family_summary_balanced.csv")
ref = safe_read_csv(gdir / f"rf_mode_pack_v5_1_reference_{reference_class}.csv")
lines = [
"# RF Mode Pack v5.1 — Engineering Pass Report",
"",
"## Executive summary",
"",
top_line(dedup_bal, "balanced_v5", "Global best balanced (deduplicated)"),
top_line(dedup_front, "frontier_v5", "Global best frontier (deduplicated)"),
safest_line(dedup_safe),
"",
f"- Reference class for stable engineering view: **{reference_class}**",
"",
"## Global family ranking (deduplicated balanced view)",
"",
]
lines.extend(family_summary_table(fam_bal))
lines.append("")
if not ref.empty:
lines += [
f"## Reference-class snapshot ({reference_class})",
"",
top_line(ref, "balanced_v5", "Best balanced inside reference class"),
top_line(ref, "frontier_v5", "Best frontier-scored candidate inside reference class"),
"",
"### Warning diagnostics in reference class",
"",
]
lines.extend(warning_summary_global(ref))
lines.append("")
return lines
def generate_report(base_dir: str | Path, output_path: str | Path, class_order: List[str], reference_class: str) -> None:
base_dir = Path(base_dir)
output_path = Path(output_path)
lines = []
lines.extend(build_global_section(base_dir, reference_class))
for class_name in class_order:
lines.extend(build_class_section(base_dir, class_name))
output_path.write_text("\n".join(lines), encoding="utf-8")