Spaces:

Syntrex
/

2026_MLB_Model

Sleeping

App Files Files

Syntrex commited on Mar 12

Commit

b149c7a

verified ·

1 Parent(s): c57cbf4

Create pitcher_live_state_v2.py

Browse files

Files changed (1) hide show

models/pitcher_live_state_v2.py +310 -0

models/pitcher_live_state_v2.py ADDED Viewed

	@@ -0,0 +1,310 @@

+from __future__ import annotations
+from typing import Any
+def _safe_float(value: Any, default: float | None = None) -> float | None:
+    try:
+        if value is None:
+            return default
+        text = str(value).strip().lower()
+        if text in {"", "nan", "none"}:
+            return default
+        return float(value)
+    except Exception:
+        return default
+def _safe_int(value: Any, default: int = 0) -> int:
+    try:
+        if value is None:
+            return default
+        text = str(value).strip().lower()
+        if text in {"", "nan", "none"}:
+            return default
+        return int(float(value))
+    except Exception:
+        return default
+def _clamp(value: float, low: float, high: float) -> float:
+    return max(low, min(high, value))
+def build_pitcher_live_state_v2(
+    pitcher_row: dict[str, Any],
+    game_row: dict[str, Any],
+    context: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    """
+    Strongest Phase 6 live-state scorer.
+    Produces:
+    - drift metrics
+    - fatigue / degradation score
+    - trust-live score
+    - adaptive baseline/live weights
+    """
+    context = context or {}
+    # --- baseline profile ---
+    avg_release_speed = _safe_float(pitcher_row.get("avg_release_speed"))
+    avg_release_spin_rate = _safe_float(pitcher_row.get("avg_release_spin_rate"))
+    avg_release_extension = _safe_float(pitcher_row.get("avg_release_extension"))
+    avg_pfx_x = _safe_float(pitcher_row.get("avg_pfx_x"))
+    avg_pfx_z = _safe_float(pitcher_row.get("avg_pfx_z"))
+    ev_allowed = _safe_float(pitcher_row.get("ev_allowed"))
+    hard_hit_rate_allowed = _safe_float(pitcher_row.get("hard_hit_rate_allowed"))
+    barrel_rate_allowed = _safe_float(pitcher_row.get("barrel_rate_allowed"))
+    # --- live telemetry ---
+    live_velocity = _safe_float(game_row.get("pitch_velocity"))
+    live_spin = _safe_float(game_row.get("pitch_spin_rate"))
+    live_extension = _safe_float(game_row.get("pitch_extension"))
+    live_pfx_x = _safe_float(game_row.get("pitch_pfx_x"))
+    live_pfx_z = _safe_float(game_row.get("pitch_pfx_z"))
+    # --- live baseball context ---
+    outs = _safe_int(game_row.get("outs"), 0)
+    balls = _safe_int(game_row.get("balls"), 0)
+    strikes = _safe_int(game_row.get("strikes"), 0)
+    bullpen_entry_prob = _safe_float(context.get("bullpen_entry_prob"), 0.0) or 0.0
+    starter_stays_next_batter_prob = _safe_float(context.get("starter_stays_next_batter_prob"), 1.0) or 1.0
+    starter_stays_next_inning_prob = _safe_float(context.get("starter_stays_next_inning_prob"), 1.0) or 1.0
+    pitch_count = _safe_int(game_row.get("pitch_count"), 0)
+    # if no real pitch count is present, build a weak proxy from inning/outs/count
+    if pitch_count <= 0:
+        inning_number = _safe_int(game_row.get("inning"), 1)
+        pitch_count = max(0, (inning_number - 1) * 15 + outs * 5 + balls + strikes)
+    times_through_order = 1
+    if pitch_count >= 75:
+        times_through_order = 3
+    elif pitch_count >= 35:
+        times_through_order = 2
+    # --- drift calculations ---
+    velo_delta = None
+    if avg_release_speed is not None and live_velocity is not None:
+        velo_delta = live_velocity - avg_release_speed
+    spin_delta = None
+    if avg_release_spin_rate is not None and live_spin is not None:
+        spin_delta = live_spin - avg_release_spin_rate
+    extension_delta = None
+    if avg_release_extension is not None and live_extension is not None:
+        extension_delta = live_extension - avg_release_extension
+    movement_delta_x = None
+    if avg_pfx_x is not None and live_pfx_x is not None:
+        movement_delta_x = live_pfx_x - avg_pfx_x
+    movement_delta_z = None
+    if avg_pfx_z is not None and live_pfx_z is not None:
+        movement_delta_z = live_pfx_z - avg_pfx_z
+    # --- count-profile tendency proxy ---
+    # Current exact count is only for current batter.
+    # But repeated pitcher-state patterns can be approximated from live count stress.
+    ahead_indicator = 1.0 if strikes >= 2 and balls <= 1 else 0.0
+    behind_indicator = 1.0 if balls >= 2 and strikes <= 1 else 0.0
+    full_count_indicator = 1.0 if balls == 3 and strikes == 2 else 0.0
+    # --- evidence quality ---
+    evidence_quality_score = 0.0
+    reason_tags: list[str] = []
+    signal_count = 0
+    if velo_delta is not None:
+        signal_count += 1
+        if abs(velo_delta) >= 0.8:
+            evidence_quality_score += 0.18
+        if abs(velo_delta) >= 1.4:
+            evidence_quality_score += 0.10
+    if spin_delta is not None:
+        signal_count += 1
+        if abs(spin_delta) >= 100:
+            evidence_quality_score += 0.16
+        if abs(spin_delta) >= 175:
+            evidence_quality_score += 0.08
+    if extension_delta is not None:
+        signal_count += 1
+        if abs(extension_delta) >= 0.20:
+            evidence_quality_score += 0.12
+        if abs(extension_delta) >= 0.35:
+            evidence_quality_score += 0.06
+    if movement_delta_x is not None and abs(movement_delta_x) >= 2.0:
+        signal_count += 1
+        evidence_quality_score += 0.06
+    if movement_delta_z is not None and abs(movement_delta_z) >= 2.0:
+        signal_count += 1
+        evidence_quality_score += 0.06
+    if signal_count >= 3:
+        evidence_quality_score += 0.10
+    # pitch count / TTO make live evidence more trustworthy
+    if pitch_count >= 35:
+        evidence_quality_score += 0.08
+    if pitch_count >= 70:
+        evidence_quality_score += 0.10
+    if times_through_order >= 3:
+        evidence_quality_score += 0.10
+    evidence_quality_score = _clamp(evidence_quality_score, 0.0, 1.0)
+    # --- drift persistence proxy ---
+    # Until true rolling pitch memory exists, use multi-signal agreement + fatigue context.
+    drift_persistence_score = 0.0
+    negative_drift_signals = 0
+    if velo_delta is not None and velo_delta <= -0.8:
+        negative_drift_signals += 1
+    if spin_delta is not None and spin_delta <= -100:
+        negative_drift_signals += 1
+    if extension_delta is not None and extension_delta <= -0.20:
+        negative_drift_signals += 1
+    if movement_delta_z is not None and movement_delta_z <= -1.5:
+        negative_drift_signals += 1
+    drift_persistence_score += 0.15 * negative_drift_signals
+    if negative_drift_signals >= 2:
+        drift_persistence_score += 0.15
+    if negative_drift_signals >= 3:
+        drift_persistence_score += 0.10
+    if pitch_count >= 70:
+        drift_persistence_score += 0.10
+    if times_through_order >= 3:
+        drift_persistence_score += 0.10
+    drift_persistence_score = _clamp(drift_persistence_score, 0.0, 1.0)
+    # --- fatigue score ---
+    fatigue_score = 0.0
+    if pitch_count >= 35:
+        fatigue_score += 0.10
+    if pitch_count >= 60:
+        fatigue_score += 0.15
+    if pitch_count >= 85:
+        fatigue_score += 0.20
+    if times_through_order >= 2:
+        fatigue_score += 0.08
+    if times_through_order >= 3:
+        fatigue_score += 0.12
+    if velo_delta is not None and velo_delta <= -0.8:
+        fatigue_score += 0.12
+        reason_tags.append("velo_drop")
+    if velo_delta is not None and velo_delta <= -1.5:
+        fatigue_score += 0.10
+    if spin_delta is not None and spin_delta <= -100:
+        fatigue_score += 0.08
+        reason_tags.append("spin_drop")
+    if spin_delta is not None and spin_delta <= -180:
+        fatigue_score += 0.08
+    if extension_delta is not None and extension_delta <= -0.20:
+        fatigue_score += 0.06
+        reason_tags.append("extension_drop")
+    if extension_delta is not None and extension_delta <= -0.35:
+        fatigue_score += 0.05
+    if bullpen_entry_prob >= 0.40:
+        fatigue_score += 0.06
+    if bullpen_entry_prob >= 0.60:
+        fatigue_score += 0.06
+    fatigue_score = _clamp(fatigue_score, 0.0, 1.0)
+    # --- degradation score ---
+    degradation_score = 0.0
+    if ev_allowed is not None and ev_allowed >= 90.0:
+        degradation_score += 0.08
+    if ev_allowed is not None and ev_allowed >= 91.5:
+        degradation_score += 0.08
+    if hard_hit_rate_allowed is not None and hard_hit_rate_allowed >= 0.40:
+        degradation_score += 0.08
+    if hard_hit_rate_allowed is not None and hard_hit_rate_allowed >= 0.46:
+        degradation_score += 0.08
+    if barrel_rate_allowed is not None and barrel_rate_allowed >= 0.08:
+        degradation_score += 0.08
+    if barrel_rate_allowed is not None and barrel_rate_allowed >= 0.11:
+        degradation_score += 0.10
+    if behind_indicator >= 1.0:
+        degradation_score += 0.06
+        reason_tags.append("behind_in_count")
+    if full_count_indicator >= 1.0:
+        degradation_score += 0.06
+        reason_tags.append("full_count")
+    degradation_score += fatigue_score * 0.35
+    degradation_score = _clamp(degradation_score, 0.0, 1.0)
+    # --- trust-live score ---
+    trust_live_score = (
+        evidence_quality_score * 0.45
+        + drift_persistence_score * 0.30
+        + fatigue_score * 0.15
+        + degradation_score * 0.10
+    )
+    trust_live_score = _clamp(trust_live_score, 0.0, 1.0)
+    # --- adaptive blend regimes ---
+    if trust_live_score < 0.30:
+        baseline_weight = 0.80
+        live_weight = 0.20
+    elif trust_live_score < 0.55:
+        baseline_weight = 0.60
+        live_weight = 0.40
+    elif trust_live_score < 0.80:
+        baseline_weight = 0.40
+        live_weight = 0.60
+    else:
+        baseline_weight = 0.25
+        live_weight = 0.75
+    return {
+        "velo_delta": velo_delta,
+        "spin_delta": spin_delta,
+        "extension_delta": extension_delta,
+        "movement_delta_x": movement_delta_x,
+        "movement_delta_z": movement_delta_z,
+        "pitch_count": pitch_count,
+        "times_through_order": times_through_order,
+        "ahead_rate_live": ahead_indicator,
+        "behind_rate_live": behind_indicator,
+        "full_count_rate_live": full_count_indicator,
+        "first_pitch_strike_tendency_live": None,
+        "bullpen_entry_prob": bullpen_entry_prob,
+        "starter_stays_next_batter_prob": starter_stays_next_batter_prob,
+        "starter_stays_next_inning_prob": starter_stays_next_inning_prob,
+        "drift_persistence_score": drift_persistence_score,
+        "evidence_quality_score": evidence_quality_score,
+        "fatigue_score": fatigue_score,
+        "degradation_score": degradation_score,
+        "trust_live_score": trust_live_score,
+        "baseline_weight": baseline_weight,
+        "live_weight": live_weight,
+        "reason_tags": reason_tags[:6],
+    }