2026_MLB_Model / models /pitch_sequence_model.py
Syntrex's picture
Phase A-I audit: park factors, platoon adj, pitch sequencing, simulation overhaul
01d9b69
raw
history blame
11.4 kB
from __future__ import annotations
import logging
from typing import Any
logger = logging.getLogger(__name__)
def build_sequence_features(
game_row: dict[str, Any],
pitcher_row: dict[str, Any],
batter_row: dict[str, Any],
pitcher_family_zone_row: dict[str, Any] | None = None,
) -> dict[str, Any]:
def _safe_int(value: Any, default: int = 0) -> int:
try:
if value is None:
return default
text = str(value).strip().lower()
if text in {"", "nan", "none"}:
return default
return int(float(value))
except Exception:
return default
def _safe_float(value: Any, default: float | None = None) -> float | None:
try:
if value is None:
return default
text = str(value).strip().lower()
if text in {"", "nan", "none"}:
return default
return float(value)
except Exception:
return default
balls = _safe_int(game_row.get("balls"), 0)
strikes = _safe_int(game_row.get("strikes"), 0)
outs = _safe_int(game_row.get("outs"), 0)
last_pitch_type = str(game_row.get("pitch_type", "") or "").strip().lower()
avg_release_speed = _safe_float(pitcher_row.get("avg_release_speed"))
avg_release_spin_rate = _safe_float(pitcher_row.get("avg_release_spin_rate"))
avg_release_extension = _safe_float(pitcher_row.get("avg_release_extension"))
last_pitch_velocity = _safe_float(game_row.get("pitch_velocity"))
last_pitch_spin_rate = _safe_float(game_row.get("pitch_spin_rate"))
last_pitch_extension = _safe_float(game_row.get("pitch_extension"))
velo_delta_from_baseline = None
if last_pitch_velocity is not None and avg_release_speed is not None:
velo_delta_from_baseline = last_pitch_velocity - avg_release_speed
spin_delta_from_baseline = None
if last_pitch_spin_rate is not None and avg_release_spin_rate is not None:
spin_delta_from_baseline = last_pitch_spin_rate - avg_release_spin_rate
extension_delta_from_baseline = None
if last_pitch_extension is not None and avg_release_extension is not None:
extension_delta_from_baseline = last_pitch_extension - avg_release_extension
# D1: Player-specific pitch usage rates from pitcher's family-zone profiles
pf_zone = pitcher_family_zone_row or {}
pitcher_fastball_usage_rate = pf_zone.get("fastball_usage_rate")
pitcher_breaking_usage_rate = pf_zone.get("breaking_usage_rate")
pitcher_offspeed_usage_rate = pf_zone.get("offspeed_usage_rate")
return {
"balls": balls,
"strikes": strikes,
"outs": outs,
"runner_on_1b": bool(game_row.get("runner_on_1b", False)),
"runner_on_2b": bool(game_row.get("runner_on_2b", False)),
"runner_on_3b": bool(game_row.get("runner_on_3b", False)),
"pitcher_avg_release_speed": avg_release_speed,
"pitcher_avg_release_spin_rate": avg_release_spin_rate,
"pitcher_avg_release_extension": avg_release_extension,
"pitcher_ev_allowed": pitcher_row.get("ev_allowed"),
"batter_ev90": batter_row.get("ev90"),
"batter_barrel_rate": batter_row.get("barrel_rate"),
"batter_hard_hit_rate": batter_row.get("hard_hit_rate"),
"last_pitch_type": last_pitch_type,
"last_pitch_velocity": last_pitch_velocity,
"last_pitch_spin_rate": last_pitch_spin_rate,
"last_pitch_extension": last_pitch_extension,
"velo_delta_from_baseline": velo_delta_from_baseline,
"spin_delta_from_baseline": spin_delta_from_baseline,
"extension_delta_from_baseline": extension_delta_from_baseline,
# E3: Handedness
"batter_stand": str(batter_row.get("batter_stand", "R") or "R"),
"p_throws": str(pitcher_row.get("p_throws", "R") or "R"),
# D1: Player-specific usage rates
"pitcher_fastball_usage_rate": pitcher_fastball_usage_rate,
"pitcher_breaking_usage_rate": pitcher_breaking_usage_rate,
"pitcher_offspeed_usage_rate": pitcher_offspeed_usage_rate,
}
def _normalize_probs(fastball_prob: float, breaking_prob: float, offspeed_prob: float) -> tuple[float, float, float]:
total = fastball_prob + breaking_prob + offspeed_prob
if total <= 0:
return 0.48, 0.32, 0.20
return (
fastball_prob / total,
breaking_prob / total,
offspeed_prob / total,
)
def predict_next_pitch_distribution(feature_row: dict[str, Any]) -> dict[str, Any]:
"""
Pitch sequencing model v2.1
Count-aware, sequence-aware, and lightly live-telemetry-aware.
"""
balls = int(feature_row.get("balls", 0))
strikes = int(feature_row.get("strikes", 0))
outs = int(feature_row.get("outs", 0))
last_pitch_type = str(feature_row.get("last_pitch_type", "") or "").lower()
batter_ev90 = feature_row.get("batter_ev90")
batter_barrel_rate = feature_row.get("batter_barrel_rate")
batter_hard_hit_rate = feature_row.get("batter_hard_hit_rate")
velo_delta = feature_row.get("velo_delta_from_baseline")
spin_delta = feature_row.get("spin_delta_from_baseline")
extension_delta = feature_row.get("extension_delta_from_baseline")
# F1: Count-conditioned pitch distribution (full lookup table replaces crude adjustments)
count_state_table = {
(0, 0): (0.54, 0.28, 0.18),
(1, 0): (0.55, 0.27, 0.18),
(2, 0): (0.62, 0.22, 0.16),
(3, 0): (0.75, 0.12, 0.13),
(0, 1): (0.52, 0.30, 0.18),
(1, 1): (0.52, 0.30, 0.18),
(2, 1): (0.56, 0.26, 0.18),
(3, 1): (0.58, 0.24, 0.18),
(0, 2): (0.42, 0.40, 0.18),
(1, 2): (0.44, 0.36, 0.20),
(2, 2): (0.48, 0.32, 0.20),
(3, 2): (0.52, 0.28, 0.20),
}
count_key = (min(balls, 3), min(strikes, 2))
cs_fb, cs_br, cs_os = count_state_table.get(count_key, (0.54, 0.28, 0.18))
# D1: Blend with player-specific usage rates if available (50/50)
player_fb = feature_row.get("pitcher_fastball_usage_rate")
player_br = feature_row.get("pitcher_breaking_usage_rate")
player_os = feature_row.get("pitcher_offspeed_usage_rate")
if player_fb is not None and player_br is not None and player_os is not None:
try:
p_fb, p_br, p_os = float(player_fb), float(player_br), float(player_os)
p_total = p_fb + p_br + p_os
if p_total > 0:
p_fb /= p_total
p_br /= p_total
p_os /= p_total
fastball_prob = (cs_fb + p_fb) / 2
breaking_prob = (cs_br + p_br) / 2
offspeed_prob = (cs_os + p_os) / 2
else:
fastball_prob, breaking_prob, offspeed_prob = cs_fb, cs_br, cs_os
except Exception as e:
logger.debug(f"[sequence_model] player usage rate blend skipped: {e}")
fastball_prob, breaking_prob, offspeed_prob = cs_fb, cs_br, cs_os
else:
fastball_prob, breaking_prob, offspeed_prob = cs_fb, cs_br, cs_os
# Previous-pitch memory
if "slider" in last_pitch_type or "curve" in last_pitch_type:
fastball_prob += 0.04
breaking_prob -= 0.03
offspeed_prob -= 0.01
elif "change" in last_pitch_type or "split" in last_pitch_type:
fastball_prob += 0.03
offspeed_prob -= 0.02
elif "fastball" in last_pitch_type or "sinker" in last_pitch_type or "cutter" in last_pitch_type:
fastball_prob -= 0.03
breaking_prob += 0.02
offspeed_prob += 0.01
# Better hitters tend to see fewer heart-zone fastballs in advantage counts
try:
if batter_ev90 is not None and float(batter_ev90) >= 102:
if balls >= 2:
fastball_prob -= 0.03
breaking_prob += 0.02
offspeed_prob += 0.01
except Exception as e:
logger.debug(f"[sequence_model] EV90 adjustment skipped: {e}")
try:
if batter_barrel_rate is not None and float(batter_barrel_rate) >= 0.10:
if strikes < 2:
fastball_prob -= 0.02
breaking_prob += 0.01
offspeed_prob += 0.01
except Exception as e:
logger.debug(f"[sequence_model] barrel_rate adjustment skipped: {e}")
# Live overlay: if velo is down, reduce fastball confidence slightly
try:
if velo_delta is not None and float(velo_delta) <= -1.0:
fastball_prob -= 0.03
breaking_prob += 0.02
offspeed_prob += 0.01
elif velo_delta is not None and float(velo_delta) >= 1.0:
fastball_prob += 0.02
breaking_prob -= 0.01
offspeed_prob -= 0.01
except Exception as e:
logger.debug(f"[sequence_model] live velo overlay skipped: {e}")
# Live overlay: if spin is down, slightly reduce breaking-ball trust
try:
if spin_delta is not None and float(spin_delta) <= -120:
breaking_prob -= 0.02
fastball_prob += 0.01
offspeed_prob += 0.01
except Exception as e:
logger.debug(f"[sequence_model] live spin overlay skipped: {e}")
# Live overlay: shorter extension can imply less deception
try:
if extension_delta is not None and float(extension_delta) <= -0.25:
fastball_prob -= 0.01
offspeed_prob += 0.01
except Exception as e:
logger.debug(f"[sequence_model] live extension overlay skipped: {e}")
# E3: Handedness-aware pitch tendency adjustment
batter_stand = feature_row.get("batter_stand", "R")
p_throws = feature_row.get("p_throws", "R")
if p_throws == "R" and batter_stand == "L":
# RHP vs LHB: sliders break away from LHB
fastball_prob -= 0.02
breaking_prob += 0.03
elif p_throws == "L" and batter_stand == "R":
# LHP vs RHB: changeup advantage
fastball_prob -= 0.02
offspeed_prob += 0.04
elif (p_throws == "L" and batter_stand == "L") or (p_throws == "R" and batter_stand == "R"):
# Same-hand matchup: breaking balls break away
fastball_prob -= 0.01
breaking_prob += 0.02
fastball_prob, breaking_prob, offspeed_prob = _normalize_probs(
fastball_prob, breaking_prob, offspeed_prob
)
heart = 0.22
shadow = 0.43
chase = 0.35
if balls >= 2:
heart += 0.04
chase -= 0.03
shadow -= 0.01
if strikes >= 2:
chase += 0.06
heart -= 0.03
shadow -= 0.03
if balls == 3 and strikes == 2:
heart += 0.05
chase -= 0.03
shadow -= 0.02
try:
if batter_hard_hit_rate is not None and float(batter_hard_hit_rate) >= 0.42:
heart -= 0.02
shadow += 0.01
chase += 0.01
except Exception:
pass
if outs == 2 and (feature_row.get("runner_on_2b") or feature_row.get("runner_on_3b")):
shadow += 0.02
heart -= 0.01
chase -= 0.01
zone_total = heart + shadow + chase
heart /= zone_total
shadow /= zone_total
chase /= zone_total
return {
"fastball_prob": fastball_prob,
"breaking_prob": breaking_prob,
"offspeed_prob": offspeed_prob,
"zone_probs": {
"heart": heart,
"shadow": shadow,
"chase": chase,
},
}