Spaces:
Running
Running
File size: 6,618 Bytes
990ad35 c271c72 990ad35 c271c72 e411cee d317049 e411cee 4c79e2a e411cee 4c79e2a d317049 4c79e2a d317049 c271c72 990ad35 d317049 e74897d d317049 4c79e2a e411cee 4c79e2a e411cee 4c79e2a 4d13aee e411cee 4c79e2a e411cee 4c79e2a d317049 e411cee d317049 990ad35 c271c72 4d13aee e411cee 990ad35 e411cee | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 | """
Shared TFT quality-gate helper.
Single source of truth for the deployment thresholds used both by:
- the API (`/api/models/tft/summary`, `backend/app/main.py`)
- the CI script (`backend/scripts/tft_quality_gate.py`)
Lives under the `app` package so the HF production container (which copies
`backend/app/` but does NOT copy `backend/scripts/`) can import it.
"""
from __future__ import annotations
from typing import List, Optional, Tuple
def evaluate_quality_gate(
da: float,
sharpe: float,
vr: float,
tail_capture: Optional[float] = None,
quantile_crossing_rate: Optional[float] = None,
median_sort_gap_max: Optional[float] = None,
pi80_width: Optional[float] = None,
pi96_width: Optional[float] = None,
weekly_directional_accuracy: Optional[float] = None,
weekly_magnitude_ratio: Optional[float] = None,
weekly_tail_capture_rate: Optional[float] = None,
weekly_pi80_coverage: Optional[float] = None,
weekly_pi80_width: Optional[float] = None,
weekly_pi80_width_ratio: Optional[float] = None,
weekly_pi96_coverage: Optional[float] = None,
weekly_pi96_width: Optional[float] = None,
weekly_pi96_width_ratio: Optional[float] = None,
weekly_quantile_crossing_rate: Optional[float] = None,
weekly_sorted_quantile_crossing_rate: Optional[float] = None,
weekly_median_sort_gap_max: Optional[float] = None,
weekly_sample_count: Optional[int] = None,
) -> Tuple[bool, List[str]]:
"""
Evaluate TFT-ASRO metrics against deployment thresholds.
Returns:
(passed, reasons) — passed is True when no threshold is violated;
otherwise reasons contains a human-readable explanation for each
breach. Thresholds align with the Sprint-1 quality gate defined in
docs/reports/tft-asro-sprint1-kapsamli-iyilestirme-*.md.
"""
reasons: list[str] = []
sample_count = int(weekly_sample_count or 0)
min_weekly_da = 0.51 if sample_count and sample_count < 80 else 0.53
if weekly_directional_accuracy is None:
reasons.append("Missing weekly_directional_accuracy")
elif weekly_directional_accuracy < min_weekly_da:
reasons.append(f"WeeklyDA={weekly_directional_accuracy:.4f} < {min_weekly_da:.2f}")
if weekly_magnitude_ratio is None:
reasons.append("Missing weekly_magnitude_ratio")
elif weekly_magnitude_ratio < 0.65 or weekly_magnitude_ratio > 1.35:
reasons.append(f"WeeklyMagnitudeRatio={weekly_magnitude_ratio:.4f} outside [0.65, 1.35]")
if weekly_magnitude_ratio > 3.0:
reasons.append(f"WeeklyMagnitudeExplosion={weekly_magnitude_ratio:.4f} > 3.0")
if weekly_tail_capture_rate is None:
reasons.append("Missing weekly_tail_capture_rate")
elif weekly_tail_capture_rate < 0.45:
reasons.append(f"WeeklyTailCapture={weekly_tail_capture_rate:.4f} < 0.45")
if weekly_pi80_coverage is None:
reasons.append("Missing weekly_pi80_coverage")
elif weekly_pi80_coverage < 0.74 or weekly_pi80_coverage > 0.86:
reasons.append(f"WeeklyPI80={weekly_pi80_coverage:.4f} outside [0.74, 0.86]")
if weekly_pi80_width_ratio is None:
reasons.append("Missing weekly_pi80_width_ratio")
elif weekly_pi80_width_ratio > 2.0 and weekly_pi80_coverage is not None and weekly_pi80_coverage > 0.86:
reasons.append(
f"WeeklyPI80Overwide={weekly_pi80_width_ratio:.4f} with coverage={weekly_pi80_coverage:.4f}"
)
if weekly_pi80_width is not None and weekly_pi80_width < 0.0:
reasons.append(f"WeeklyPI80Width={weekly_pi80_width:.4f} < 0.0")
if weekly_pi96_coverage is None:
reasons.append("Missing weekly_pi96_coverage")
if weekly_pi96_width_ratio is None:
reasons.append("Missing weekly_pi96_width_ratio")
elif weekly_pi96_width_ratio > 3.0:
reasons.append(f"WeeklyPI96WidthRatio={weekly_pi96_width_ratio:.4f} > 3.0")
if weekly_pi96_width is not None and weekly_pi96_width < 0.0:
reasons.append(f"WeeklyPI96Width={weekly_pi96_width:.4f} < 0.0")
if weekly_quantile_crossing_rate is None:
reasons.append("Missing weekly_quantile_crossing_rate")
elif weekly_quantile_crossing_rate > 0.001:
raise AssertionError(
f"WeeklyPublicQuantileCrossing={weekly_quantile_crossing_rate:.4f} > 0.001"
)
if weekly_sorted_quantile_crossing_rate is None:
reasons.append("Missing weekly_sorted_quantile_crossing_rate")
elif weekly_sorted_quantile_crossing_rate > 0.001:
raise AssertionError(
f"WeeklyOrderedQuantileCrossing={weekly_sorted_quantile_crossing_rate:.4f} > 0.001"
)
if weekly_median_sort_gap_max is not None and weekly_median_sort_gap_max > 0.001:
raise AssertionError(
f"WeeklyOrderedMedianSortGapMax={weekly_median_sort_gap_max:.4f} > 0.001"
)
if sharpe < -0.30:
reasons.append(f"Sharpe={sharpe:.4f} < -0.30")
if tail_capture is not None and tail_capture < 0.35:
reasons.append(f"TailCapture={tail_capture:.4f} < 0.35")
if quantile_crossing_rate is None:
reasons.append("Missing quantile_crossing_rate")
elif quantile_crossing_rate > 0.001:
raise AssertionError(f"PublicQuantileCrossing={quantile_crossing_rate:.4f} > 0.001")
if median_sort_gap_max is not None and median_sort_gap_max > 0.001:
raise AssertionError(f"OrderedMedianSortGapMax={median_sort_gap_max:.4f} > 0.001")
if pi80_width is not None and pi80_width < 0.0:
reasons.append(f"PI80Width={pi80_width:.4f} < 0.0")
if pi96_width is not None and pi96_width < 0.0:
reasons.append(f"PI96Width={pi96_width:.4f} < 0.0")
return len(reasons) == 0, reasons
def evaluate_quality_gate_warnings(
vr: float,
mae_vs_naive_zero: Optional[float] = None,
weekly_mae_vs_naive_zero: Optional[float] = None,
) -> List[str]:
"""Return stabilization warnings that do not fail promotion yet."""
warnings: list[str] = []
if vr > 2.5:
warnings.append(f"VR={vr:.4f} > 2.5 - model overdispersed")
if vr < 0.4:
warnings.append(f"VR={vr:.4f} < 0.4 - model underdispersed")
if mae_vs_naive_zero is not None and mae_vs_naive_zero > 1.25:
warnings.append(
f"MAEvsNaiveZero={mae_vs_naive_zero:.4f} > 1.25 - worse than warning baseline"
)
if weekly_mae_vs_naive_zero is not None and weekly_mae_vs_naive_zero > 1.25:
warnings.append(
f"WeeklyMAEvsNaiveZero={weekly_mae_vs_naive_zero:.4f} > 1.25 - worse than warning baseline"
)
return warnings
|