File size: 3,047 Bytes
46cc63a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
"""Briefing-aligned report: Golden Baseline (gap <1%) + Final Hybrid (gap <5%, F1≥0.80)."""

from __future__ import annotations

from pathlib import Path


def write_golden_baseline_report(metrics: dict, path: Path) -> None:
    run_id = metrics.get("run_id", "unknown")
    target = float(metrics.get("target_f1_weighted", 0.80))
    max_gap_pp = float(metrics.get("max_train_test_gap_pp", 5.0))
    baseline_gap_target_pp = float(metrics.get("baseline_gap_target_pp", 1.0))

    base = metrics.get("golden_baseline", {})
    squeeze = metrics.get("performance_squeeze", {})
    hybrid = metrics.get("hybrid_safety_net", {})

    base_gap = base.get("train_test_gap_pp", 99)
    hybrid_gap = hybrid.get("train_test_gap_pp", 99)
    hybrid_f1 = hybrid.get("f1_weighted", 0)

    base_ok = base_gap < baseline_gap_target_pp
    hybrid_gap_ok = hybrid_gap < max_gap_pp
    hybrid_f1_ok = hybrid_f1 >= target
    hybrid_ok = hybrid_gap_ok and hybrid_f1_ok

    lines = [
        f"# Golden Baseline Strategy — {run_id}",
        "",
        "Two-step briefing alignment: **Esencial** frozen expert baseline, then **Experto** squeeze + hybrid.",
        "",
        "## Step 1 — Golden Baseline (Esencial)",
        "",
        f"| Metric | Value | Target |",
        f"|--------|-------|--------|",
        f"| F1 weighted (test) | **{base.get('f1_weighted', '—')}** | ~0.72 (pretrained expert) |",
        f"| Train–test gap (pp) | **{base_gap}** | < {baseline_gap_target_pp}% {'✅' if base_ok else '⚠️'} |",
        f"| Fine-tuning | None (all layers frozen) | — |",
        f"| Threshold | {base.get('threshold', '—')} | val-tuned |",
        "",
        "## Step 2 — Performance Squeeze (Experto)",
        "",
        f"| Metric | Value | Target |",
        f"|--------|-------|--------|",
        f"| F1 weighted (test) | **{squeeze.get('f1_weighted', '—')}** | ≥ {target} |",
        f"| Train–test gap (pp) | **{squeeze.get('train_test_gap_pp', '—')}** | ≤ 4.9% |",
        f"| R-Drop | {squeeze.get('rdrop_enabled', False)} | enabled |",
        f"| Layers trained | last {squeeze.get('freeze_mode', '—')} | 2 + head |",
        "",
        "## Step 3 — Hybrid Safety Net (Final)",
        "",
        f"| Metric | Value | Target |",
        f"|--------|-------|--------|",
        f"| F1 weighted (test) | **{hybrid_f1}** | ≥ {target} {'✅' if hybrid_f1_ok else '⚠️'} |",
        f"| Train–test gap (pp) | **{hybrid_gap}** | < {max_gap_pp}% {'✅' if hybrid_gap_ok else '⚠️'} |",
        f"| Weights | BERT {hybrid.get('bert_weight')} / LR {hybrid.get('lr_weight')} | anchor |",
        f"| LR regularization | C=0.001, max_features=200 | stability |",
        "",
        f"### Overall: {'✅ Briefing targets met' if base_ok and hybrid_ok else '⚠️ Review gaps / F1'}",
        "",
        f"- JSON: `reports/golden_baseline/golden_baseline_run_{run_id}.json`",
        "",
    ]
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text("\n".join(lines), encoding="utf-8")