File size: 2,412 Bytes
46cc63a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 | """
Integrated markdown report for Phase 5 expert runs.
"""
from __future__ import annotations
from pathlib import Path
def write_expert_report(metrics: dict, path: Path) -> None:
run_id = metrics.get("run_id", "unknown")
lines = [
f"# Phase 5 Expert Adaptation — {run_id}",
"",
"## Targets",
"- Test **F1-toxic** > 0.75",
"- |Train F1-toxic − Test F1-toxic| < 5 pp (0.05)",
"",
"## Holdout test (tuned thresholds on validation)",
"",
"| Model | F1-toxic (test) | F1-toxic (train) | Toxic gap (pp) | Threshold | Gap OK |",
"|-------|-------------------|--------------------|----------------|-----------|--------|",
]
for key, label in (
("transformer", "Toxic-BERT"),
("logistic_regression", "LR-TFIDF (250 feat)"),
("ensemble", "Hybrid 0.7/0.3"),
):
m = metrics.get(key)
if not m:
continue
gap_pp = m.get(
"train_test_gap_toxic_pp",
m.get("train_test_gap_toxic", 0) * 100,
)
gap_ok = "✅" if m.get("gap_toxic_ok", gap_pp < 5) else "⚠️"
lines.append(
f"| {label} | {m.get('f1_toxic', '—')} | {m.get('f1_toxic_train', '—')} | "
f"{gap_pp} | {m.get('threshold', '—')} | {gap_ok} |"
)
aug = metrics.get("augmentation", {})
lines.extend(
[
"",
"## Augmentation",
f"- Pivot language: {aug.get('pivot_lang', '—')}",
f"- Train size: {aug.get('train_size_before')} → {aug.get('train_size_after')} "
f"(+{aug.get('added_samples', 0)})",
"",
"## Verdict",
]
)
verdicts = []
for key, label in (
("transformer", "Toxic-BERT"),
("ensemble", "Hybrid"),
):
m = metrics.get(key)
if m and m.get("gap_toxic_ok"):
verdicts.append(f"**{label}** toxic gap < 5 pp ✅")
elif m:
gap = m.get("train_test_gap_toxic_pp", "?")
verdicts.append(f"**{label}** toxic gap {gap} pp ⚠️")
lines.append(
"; ".join(verdicts) if verdicts else "No model metrics recorded."
)
lines.extend(["", f"- JSON: `reports/expert/expert_run_{run_id}.json`", ""])
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text("\n".join(lines), encoding="utf-8")
|