File size: 3,081 Bytes
46cc63a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
"""
Integrated markdown report for stable production runs.
"""

from __future__ import annotations

from pathlib import Path
from typing import Any


def write_integrated_report(metrics: dict, path: Path) -> None:
    """Write human-readable report with scores and gaps."""
    run_id = metrics.get("run_id", "unknown")
    lines = [
        f"# Stable Production Run — {run_id}",
        "",
        "## Targets",
        "- Test F1 (weighted) > 0.80",
        "- |Train F1 − Test/Val F1| < 5 pp (0.05)",
        "",
        "## Holdout test (final models)",
        "",
        "| Model | F1 (test) | F1 (train) | Train−Test gap (pp) | ROC-AUC | Gap OK |",
        "|-------|-----------|------------|---------------------|---------|--------|",
    ]

    for key, label in (
        ("distilbert", "DistilBERT"),
        ("logistic_regression", "LR-TFIDF"),
        ("ensemble", "Hybrid ensemble"),
    ):
        m = metrics.get(key)
        if not m:
            continue
        gap_pp = m.get("train_test_gap_pp", m.get("train_test_gap", 0) * 100)
        gap_ok = "✅" if gap_pp < 5 else "⚠️"
        lines.append(
            f"| {label} | {m.get('f1_weighted', '—')} | {m.get('f1_train', '—')} | "
            f"{gap_pp} | {m.get('roc_auc', '—')} | {gap_ok} |"
        )

    lr_gap = metrics.get("lr_gap_search")
    if lr_gap:
        lines.extend(
            [
                "",
                f"**LR gap search:** C={lr_gap.get('C')}, max_features={lr_gap.get('max_features')}, "
                f"min_df={lr_gap.get('min_df')} — gap {lr_gap.get('train_test_gap_pp')} pp "
                f"({'OK' if lr_gap.get('gap_ok') else 'above target'})",
            ]
        )

    lines.extend(["", "## Stratified 5-fold CV (train+val pool)", ""])

    for cv_key, title in (
        ("cv_logistic_regression", "LR-TFIDF"),
        ("cv_distilbert", "DistilBERT"),
    ):
        cv = metrics.get(cv_key)
        if not cv:
            continue
        stable = "✅" if cv.get("stable_across_folds") else "⚠️"
        lines.extend(
            [
                f"### {title} {stable}",
                "",
                f"- F1 mean ± std: **{cv.get('f1_mean')} ± {cv.get('f1_std')}** "
                f"(min {cv.get('f1_min')}, max {cv.get('f1_max')})",
                f"- Fold gap mean: {cv.get('gap_mean')} (max {cv.get('gap_max')})",
                f"- ROC-AUC mean: {cv.get('roc_auc_mean')}",
                "",
            ]
        )

    lines.extend(
        [
            "## Augmentation",
            "",
            f"- Enabled: {metrics.get('augmentation', {}).get('enabled', True)}",
            f"- Train size after aug: {metrics.get('augmentation', {}).get('train_size_after', '—')}",
            "",
            "## Artifacts",
            "",
            f"- JSON: `reports/stable/stable_run_{run_id}.json`",
            f"- CSV: `reports/stable/stable_summary_{run_id}.csv`",
            "",
        ]
    )

    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text("\n".join(lines), encoding="utf-8")