File size: 3,047 Bytes
46cc63a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 | """Briefing-aligned report: Golden Baseline (gap <1%) + Final Hybrid (gap <5%, F1≥0.80)."""
from __future__ import annotations
from pathlib import Path
def write_golden_baseline_report(metrics: dict, path: Path) -> None:
run_id = metrics.get("run_id", "unknown")
target = float(metrics.get("target_f1_weighted", 0.80))
max_gap_pp = float(metrics.get("max_train_test_gap_pp", 5.0))
baseline_gap_target_pp = float(metrics.get("baseline_gap_target_pp", 1.0))
base = metrics.get("golden_baseline", {})
squeeze = metrics.get("performance_squeeze", {})
hybrid = metrics.get("hybrid_safety_net", {})
base_gap = base.get("train_test_gap_pp", 99)
hybrid_gap = hybrid.get("train_test_gap_pp", 99)
hybrid_f1 = hybrid.get("f1_weighted", 0)
base_ok = base_gap < baseline_gap_target_pp
hybrid_gap_ok = hybrid_gap < max_gap_pp
hybrid_f1_ok = hybrid_f1 >= target
hybrid_ok = hybrid_gap_ok and hybrid_f1_ok
lines = [
f"# Golden Baseline Strategy — {run_id}",
"",
"Two-step briefing alignment: **Esencial** frozen expert baseline, then **Experto** squeeze + hybrid.",
"",
"## Step 1 — Golden Baseline (Esencial)",
"",
f"| Metric | Value | Target |",
f"|--------|-------|--------|",
f"| F1 weighted (test) | **{base.get('f1_weighted', '—')}** | ~0.72 (pretrained expert) |",
f"| Train–test gap (pp) | **{base_gap}** | < {baseline_gap_target_pp}% {'✅' if base_ok else '⚠️'} |",
f"| Fine-tuning | None (all layers frozen) | — |",
f"| Threshold | {base.get('threshold', '—')} | val-tuned |",
"",
"## Step 2 — Performance Squeeze (Experto)",
"",
f"| Metric | Value | Target |",
f"|--------|-------|--------|",
f"| F1 weighted (test) | **{squeeze.get('f1_weighted', '—')}** | ≥ {target} |",
f"| Train–test gap (pp) | **{squeeze.get('train_test_gap_pp', '—')}** | ≤ 4.9% |",
f"| R-Drop | {squeeze.get('rdrop_enabled', False)} | enabled |",
f"| Layers trained | last {squeeze.get('freeze_mode', '—')} | 2 + head |",
"",
"## Step 3 — Hybrid Safety Net (Final)",
"",
f"| Metric | Value | Target |",
f"|--------|-------|--------|",
f"| F1 weighted (test) | **{hybrid_f1}** | ≥ {target} {'✅' if hybrid_f1_ok else '⚠️'} |",
f"| Train–test gap (pp) | **{hybrid_gap}** | < {max_gap_pp}% {'✅' if hybrid_gap_ok else '⚠️'} |",
f"| Weights | BERT {hybrid.get('bert_weight')} / LR {hybrid.get('lr_weight')} | anchor |",
f"| LR regularization | C=0.001, max_features=200 | stability |",
"",
f"### Overall: {'✅ Briefing targets met' if base_ok and hybrid_ok else '⚠️ Review gaps / F1'}",
"",
f"- JSON: `reports/golden_baseline/golden_baseline_run_{run_id}.json`",
"",
]
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text("\n".join(lines), encoding="utf-8")
|