File size: 6,618 Bytes
990ad35
 
 
 
 
 
 
 
 
 
 
 
 
c271c72
990ad35
 
c271c72
 
 
 
 
 
 
e411cee
 
d317049
 
 
 
e411cee
4c79e2a
 
e411cee
4c79e2a
d317049
4c79e2a
d317049
 
c271c72
990ad35
 
 
 
 
 
 
 
 
 
d317049
 
 
 
 
 
 
 
 
 
 
 
e74897d
 
d317049
 
 
 
 
 
 
 
 
 
 
4c79e2a
 
 
 
 
 
e411cee
 
4c79e2a
 
 
 
 
 
 
 
e411cee
 
4c79e2a
4d13aee
 
e411cee
 
 
 
4c79e2a
 
 
e411cee
 
 
4c79e2a
d317049
e411cee
 
 
 
d317049
990ad35
 
c271c72
 
4d13aee
 
e411cee
 
 
 
 
 
 
 
990ad35
 
e411cee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
"""
Shared TFT quality-gate helper.

Single source of truth for the deployment thresholds used both by:
  - the API (`/api/models/tft/summary`, `backend/app/main.py`)
  - the CI script (`backend/scripts/tft_quality_gate.py`)

Lives under the `app` package so the HF production container (which copies
`backend/app/` but does NOT copy `backend/scripts/`) can import it.
"""

from __future__ import annotations

from typing import List, Optional, Tuple


def evaluate_quality_gate(
    da: float,
    sharpe: float,
    vr: float,
    tail_capture: Optional[float] = None,
    quantile_crossing_rate: Optional[float] = None,
    median_sort_gap_max: Optional[float] = None,
    pi80_width: Optional[float] = None,
    pi96_width: Optional[float] = None,
    weekly_directional_accuracy: Optional[float] = None,
    weekly_magnitude_ratio: Optional[float] = None,
    weekly_tail_capture_rate: Optional[float] = None,
    weekly_pi80_coverage: Optional[float] = None,
    weekly_pi80_width: Optional[float] = None,
    weekly_pi80_width_ratio: Optional[float] = None,
    weekly_pi96_coverage: Optional[float] = None,
    weekly_pi96_width: Optional[float] = None,
    weekly_pi96_width_ratio: Optional[float] = None,
    weekly_quantile_crossing_rate: Optional[float] = None,
    weekly_sorted_quantile_crossing_rate: Optional[float] = None,
    weekly_median_sort_gap_max: Optional[float] = None,
    weekly_sample_count: Optional[int] = None,
) -> Tuple[bool, List[str]]:
    """
    Evaluate TFT-ASRO metrics against deployment thresholds.

    Returns:
        (passed, reasons) — passed is True when no threshold is violated;
        otherwise reasons contains a human-readable explanation for each
        breach. Thresholds align with the Sprint-1 quality gate defined in
        docs/reports/tft-asro-sprint1-kapsamli-iyilestirme-*.md.
    """
    reasons: list[str] = []
    sample_count = int(weekly_sample_count or 0)
    min_weekly_da = 0.51 if sample_count and sample_count < 80 else 0.53

    if weekly_directional_accuracy is None:
        reasons.append("Missing weekly_directional_accuracy")
    elif weekly_directional_accuracy < min_weekly_da:
        reasons.append(f"WeeklyDA={weekly_directional_accuracy:.4f} < {min_weekly_da:.2f}")

    if weekly_magnitude_ratio is None:
        reasons.append("Missing weekly_magnitude_ratio")
    elif weekly_magnitude_ratio < 0.65 or weekly_magnitude_ratio > 1.35:
        reasons.append(f"WeeklyMagnitudeRatio={weekly_magnitude_ratio:.4f} outside [0.65, 1.35]")
        if weekly_magnitude_ratio > 3.0:
            reasons.append(f"WeeklyMagnitudeExplosion={weekly_magnitude_ratio:.4f} > 3.0")

    if weekly_tail_capture_rate is None:
        reasons.append("Missing weekly_tail_capture_rate")
    elif weekly_tail_capture_rate < 0.45:
        reasons.append(f"WeeklyTailCapture={weekly_tail_capture_rate:.4f} < 0.45")

    if weekly_pi80_coverage is None:
        reasons.append("Missing weekly_pi80_coverage")
    elif weekly_pi80_coverage < 0.74 or weekly_pi80_coverage > 0.86:
        reasons.append(f"WeeklyPI80={weekly_pi80_coverage:.4f} outside [0.74, 0.86]")

    if weekly_pi80_width_ratio is None:
        reasons.append("Missing weekly_pi80_width_ratio")
    elif weekly_pi80_width_ratio > 2.0 and weekly_pi80_coverage is not None and weekly_pi80_coverage > 0.86:
        reasons.append(
            f"WeeklyPI80Overwide={weekly_pi80_width_ratio:.4f} with coverage={weekly_pi80_coverage:.4f}"
        )
    if weekly_pi80_width is not None and weekly_pi80_width < 0.0:
        reasons.append(f"WeeklyPI80Width={weekly_pi80_width:.4f} < 0.0")

    if weekly_pi96_coverage is None:
        reasons.append("Missing weekly_pi96_coverage")

    if weekly_pi96_width_ratio is None:
        reasons.append("Missing weekly_pi96_width_ratio")
    elif weekly_pi96_width_ratio > 3.0:
        reasons.append(f"WeeklyPI96WidthRatio={weekly_pi96_width_ratio:.4f} > 3.0")
    if weekly_pi96_width is not None and weekly_pi96_width < 0.0:
        reasons.append(f"WeeklyPI96Width={weekly_pi96_width:.4f} < 0.0")

    if weekly_quantile_crossing_rate is None:
        reasons.append("Missing weekly_quantile_crossing_rate")
    elif weekly_quantile_crossing_rate > 0.001:
        raise AssertionError(
            f"WeeklyPublicQuantileCrossing={weekly_quantile_crossing_rate:.4f} > 0.001"
        )

    if weekly_sorted_quantile_crossing_rate is None:
        reasons.append("Missing weekly_sorted_quantile_crossing_rate")
    elif weekly_sorted_quantile_crossing_rate > 0.001:
        raise AssertionError(
            f"WeeklyOrderedQuantileCrossing={weekly_sorted_quantile_crossing_rate:.4f} > 0.001"
        )

    if weekly_median_sort_gap_max is not None and weekly_median_sort_gap_max > 0.001:
        raise AssertionError(
            f"WeeklyOrderedMedianSortGapMax={weekly_median_sort_gap_max:.4f} > 0.001"
        )

    if sharpe < -0.30:
        reasons.append(f"Sharpe={sharpe:.4f} < -0.30")
    if tail_capture is not None and tail_capture < 0.35:
        reasons.append(f"TailCapture={tail_capture:.4f} < 0.35")
    if quantile_crossing_rate is None:
        reasons.append("Missing quantile_crossing_rate")
    elif quantile_crossing_rate > 0.001:
        raise AssertionError(f"PublicQuantileCrossing={quantile_crossing_rate:.4f} > 0.001")
    if median_sort_gap_max is not None and median_sort_gap_max > 0.001:
        raise AssertionError(f"OrderedMedianSortGapMax={median_sort_gap_max:.4f} > 0.001")
    if pi80_width is not None and pi80_width < 0.0:
        reasons.append(f"PI80Width={pi80_width:.4f} < 0.0")
    if pi96_width is not None and pi96_width < 0.0:
        reasons.append(f"PI96Width={pi96_width:.4f} < 0.0")

    return len(reasons) == 0, reasons


def evaluate_quality_gate_warnings(
    vr: float,
    mae_vs_naive_zero: Optional[float] = None,
    weekly_mae_vs_naive_zero: Optional[float] = None,
) -> List[str]:
    """Return stabilization warnings that do not fail promotion yet."""
    warnings: list[str] = []
    if vr > 2.5:
        warnings.append(f"VR={vr:.4f} > 2.5 - model overdispersed")
    if vr < 0.4:
        warnings.append(f"VR={vr:.4f} < 0.4 - model underdispersed")
    if mae_vs_naive_zero is not None and mae_vs_naive_zero > 1.25:
        warnings.append(
            f"MAEvsNaiveZero={mae_vs_naive_zero:.4f} > 1.25 - worse than warning baseline"
        )
    if weekly_mae_vs_naive_zero is not None and weekly_mae_vs_naive_zero > 1.25:
        warnings.append(
            f"WeeklyMAEvsNaiveZero={weekly_mae_vs_naive_zero:.4f} > 1.25 - worse than warning baseline"
        )
    return warnings