2026_MLB_Model / analytics /confidence.py
Syntrex's picture
Promote strikeout v2 and harden telemetry models
50dc123
raw
history blame
4.99 kB
from __future__ import annotations
from typing import Any
def _safe_float(value: Any) -> float | None:
try:
if value is None:
return None
text = str(value).strip().lower()
if text in {"", "nan", "none"}:
return None
return float(value)
except Exception:
return None
def compute_confidence(model_outputs: dict[str, Any], game_row: dict[str, Any]) -> dict[str, Any]:
"""
Confidence score from 0-100.
Drivers:
- batter / pitcher identity present
- EV90 present
- probability sanity
- live state completeness
- bullpen certainty
"""
if model_outputs.get("confidence_score") is not None:
display_score = _safe_float(model_outputs.get("confidence_score_display"))
canonical_score = display_score if display_score is not None else _safe_float(model_outputs.get("confidence_score"))
bucket = str(
model_outputs.get("confidence_bucket")
or model_outputs.get("confidence_bucket_display")
or "medium"
).strip().lower()
return {
"confidence": canonical_score,
"confidence_bucket": bucket if bucket else "medium",
"confidence_reasons": list(model_outputs.get("confidence_reasons") or []),
"confidence_score_raw": _safe_float(model_outputs.get("confidence_score_raw")),
"confidence_score_display": canonical_score,
"confidence_source": model_outputs.get("confidence_source"),
"confidence_component_bonuses": list(model_outputs.get("confidence_component_bonuses") or []),
"confidence_component_penalties": list(model_outputs.get("confidence_component_penalties") or []),
"confidence_primary_driver": model_outputs.get("confidence_primary_driver"),
"confidence_summary_label": model_outputs.get("confidence_summary_label"),
}
score = 50.0
reasons: list[str] = []
ev90 = model_outputs.get("ev90")
batter_name = str(model_outputs.get("batter_name", "") or "").strip()
pitcher_name = str(model_outputs.get("pitcher_name", "") or "").strip()
hr_prob = model_outputs.get("hr_prob")
hit_prob = model_outputs.get("hit_prob")
tb2p_prob = model_outputs.get("tb2p_prob")
starter_stays_next_batter_prob = model_outputs.get("starter_stays_next_batter_prob")
bullpen_entry_prob = model_outputs.get("bullpen_entry_prob")
if batter_name:
score += 8
else:
score -= 12
reasons.append("Missing batter identity")
if pitcher_name:
score += 8
else:
score -= 10
reasons.append("Missing pitcher identity")
if ev90 is not None:
score += 12
else:
score -= 10
reasons.append("Missing EV90")
if hit_prob is not None and 0.03 <= float(hit_prob) <= 0.70:
score += 6
else:
score -= 8
reasons.append("Hit probability unstable")
if hr_prob is not None and 0.001 <= float(hr_prob) <= 0.35:
score += 6
else:
score -= 8
reasons.append("HR probability unstable")
if tb2p_prob is not None and 0.01 <= float(tb2p_prob) <= 0.60:
score += 6
else:
score -= 8
reasons.append("TB probability unstable")
status = str(game_row.get("status", "") or "").lower()
if any(token in status for token in ["top", "bot", "mid", "live"]):
score += 8
else:
score -= 4
reasons.append("Game state not strongly live")
outs = game_row.get("outs")
if outs is not None:
score += 3
balls = game_row.get("balls")
strikes = game_row.get("strikes")
if balls is not None and strikes is not None:
score += 4
if starter_stays_next_batter_prob is not None:
try:
stay_prob = float(starter_stays_next_batter_prob)
if 0.70 <= stay_prob <= 0.97:
score += 6
elif stay_prob < 0.45:
score -= 5
reasons.append("Pitcher continuity uncertain")
except Exception:
pass
if bullpen_entry_prob is not None:
try:
bp = float(bullpen_entry_prob)
if bp >= 0.35:
score -= 4
reasons.append("Bullpen transition uncertainty")
except Exception:
pass
score = max(0.0, min(100.0, score))
if score >= 80:
bucket = "high"
elif score >= 60:
bucket = "medium"
else:
bucket = "low"
return {
"confidence": score,
"confidence_bucket": bucket,
"confidence_reasons": reasons,
"confidence_score_raw": score,
"confidence_score_display": score,
"confidence_source": "legacy_live_confidence",
"confidence_component_bonuses": [],
"confidence_component_penalties": [],
"confidence_primary_driver": None,
"confidence_summary_label": None,
}