Spaces:

Syntrex
/

2026_MLB_Model

Running

App Files Files

2026_MLB_Model / analytics /confidence.py

Syntrex

Promote strikeout v2 and harden telemetry models

50dc123 about 2 months ago

raw

history blame

4.99 kB

	from __future__ import annotations

	from typing import Any


	def _safe_float(value: Any) -> float \| None:
	try:
	if value is None:
	return None
	text = str(value).strip().lower()
	if text in {"", "nan", "none"}:
	return None
	return float(value)
	except Exception:
	return None


	def compute_confidence(model_outputs: dict[str, Any], game_row: dict[str, Any]) -> dict[str, Any]:
	"""
	Confidence score from 0-100.

	Drivers:
	- batter / pitcher identity present
	- EV90 present
	- probability sanity
	- live state completeness
	- bullpen certainty
	"""
	if model_outputs.get("confidence_score") is not None:
	display_score = _safe_float(model_outputs.get("confidence_score_display"))
	canonical_score = display_score if display_score is not None else _safe_float(model_outputs.get("confidence_score"))
	bucket = str(
	model_outputs.get("confidence_bucket")
	or model_outputs.get("confidence_bucket_display")
	or "medium"
	).strip().lower()
	return {
	"confidence": canonical_score,
	"confidence_bucket": bucket if bucket else "medium",
	"confidence_reasons": list(model_outputs.get("confidence_reasons") or []),
	"confidence_score_raw": _safe_float(model_outputs.get("confidence_score_raw")),
	"confidence_score_display": canonical_score,
	"confidence_source": model_outputs.get("confidence_source"),
	"confidence_component_bonuses": list(model_outputs.get("confidence_component_bonuses") or []),
	"confidence_component_penalties": list(model_outputs.get("confidence_component_penalties") or []),
	"confidence_primary_driver": model_outputs.get("confidence_primary_driver"),
	"confidence_summary_label": model_outputs.get("confidence_summary_label"),
	}

	score = 50.0
	reasons: list[str] = []

	ev90 = model_outputs.get("ev90")
	batter_name = str(model_outputs.get("batter_name", "") or "").strip()
	pitcher_name = str(model_outputs.get("pitcher_name", "") or "").strip()

	hr_prob = model_outputs.get("hr_prob")
	hit_prob = model_outputs.get("hit_prob")
	tb2p_prob = model_outputs.get("tb2p_prob")

	starter_stays_next_batter_prob = model_outputs.get("starter_stays_next_batter_prob")
	bullpen_entry_prob = model_outputs.get("bullpen_entry_prob")

	if batter_name:
	score += 8
	else:
	score -= 12
	reasons.append("Missing batter identity")

	if pitcher_name:
	score += 8
	else:
	score -= 10
	reasons.append("Missing pitcher identity")

	if ev90 is not None:
	score += 12
	else:
	score -= 10
	reasons.append("Missing EV90")

	if hit_prob is not None and 0.03 <= float(hit_prob) <= 0.70:
	score += 6
	else:
	score -= 8
	reasons.append("Hit probability unstable")

	if hr_prob is not None and 0.001 <= float(hr_prob) <= 0.35:
	score += 6
	else:
	score -= 8
	reasons.append("HR probability unstable")

	if tb2p_prob is not None and 0.01 <= float(tb2p_prob) <= 0.60:
	score += 6
	else:
	score -= 8
	reasons.append("TB probability unstable")

	status = str(game_row.get("status", "") or "").lower()
	if any(token in status for token in ["top", "bot", "mid", "live"]):
	score += 8
	else:
	score -= 4
	reasons.append("Game state not strongly live")

	outs = game_row.get("outs")
	if outs is not None:
	score += 3

	balls = game_row.get("balls")
	strikes = game_row.get("strikes")
	if balls is not None and strikes is not None:
	score += 4

	if starter_stays_next_batter_prob is not None:
	try:
	stay_prob = float(starter_stays_next_batter_prob)
	if 0.70 <= stay_prob <= 0.97:
	score += 6
	elif stay_prob < 0.45:
	score -= 5
	reasons.append("Pitcher continuity uncertain")
	except Exception:
	pass

	if bullpen_entry_prob is not None:
	try:
	bp = float(bullpen_entry_prob)
	if bp >= 0.35:
	score -= 4
	reasons.append("Bullpen transition uncertainty")
	except Exception:
	pass

	score = max(0.0, min(100.0, score))

	if score >= 80:
	bucket = "high"
	elif score >= 60:
	bucket = "medium"
	else:
	bucket = "low"

	return {
	"confidence": score,
	"confidence_bucket": bucket,
	"confidence_reasons": reasons,
	"confidence_score_raw": score,
	"confidence_score_display": score,
	"confidence_source": "legacy_live_confidence",
	"confidence_component_bonuses": [],
	"confidence_component_penalties": [],
	"confidence_primary_driver": None,
	"confidence_summary_label": None,
	}