Spaces:

Syntrex
/

2026_MLB_Model

Running

App Files Files

2026_MLB_Model / models /pitch_sequence_model.py

Syntrex

Phase A-I audit: park factors, platoon adj, pitch sequencing, simulation overhaul

01d9b69 2 months ago

raw

history blame

11.4 kB

	from __future__ import annotations

	import logging
	from typing import Any

	logger = logging.getLogger(__name__)


	def build_sequence_features(
	game_row: dict[str, Any],
	pitcher_row: dict[str, Any],
	batter_row: dict[str, Any],
	pitcher_family_zone_row: dict[str, Any] \| None = None,
	) -> dict[str, Any]:
	def _safe_int(value: Any, default: int = 0) -> int:
	try:
	if value is None:
	return default
	text = str(value).strip().lower()
	if text in {"", "nan", "none"}:
	return default
	return int(float(value))
	except Exception:
	return default

	def _safe_float(value: Any, default: float \| None = None) -> float \| None:
	try:
	if value is None:
	return default
	text = str(value).strip().lower()
	if text in {"", "nan", "none"}:
	return default
	return float(value)
	except Exception:
	return default

	balls = _safe_int(game_row.get("balls"), 0)
	strikes = _safe_int(game_row.get("strikes"), 0)
	outs = _safe_int(game_row.get("outs"), 0)

	last_pitch_type = str(game_row.get("pitch_type", "") or "").strip().lower()

	avg_release_speed = _safe_float(pitcher_row.get("avg_release_speed"))
	avg_release_spin_rate = _safe_float(pitcher_row.get("avg_release_spin_rate"))
	avg_release_extension = _safe_float(pitcher_row.get("avg_release_extension"))

	last_pitch_velocity = _safe_float(game_row.get("pitch_velocity"))
	last_pitch_spin_rate = _safe_float(game_row.get("pitch_spin_rate"))
	last_pitch_extension = _safe_float(game_row.get("pitch_extension"))

	velo_delta_from_baseline = None
	if last_pitch_velocity is not None and avg_release_speed is not None:
	velo_delta_from_baseline = last_pitch_velocity - avg_release_speed

	spin_delta_from_baseline = None
	if last_pitch_spin_rate is not None and avg_release_spin_rate is not None:
	spin_delta_from_baseline = last_pitch_spin_rate - avg_release_spin_rate

	extension_delta_from_baseline = None
	if last_pitch_extension is not None and avg_release_extension is not None:
	extension_delta_from_baseline = last_pitch_extension - avg_release_extension

	# D1: Player-specific pitch usage rates from pitcher's family-zone profiles
	pf_zone = pitcher_family_zone_row or {}
	pitcher_fastball_usage_rate = pf_zone.get("fastball_usage_rate")
	pitcher_breaking_usage_rate = pf_zone.get("breaking_usage_rate")
	pitcher_offspeed_usage_rate = pf_zone.get("offspeed_usage_rate")

	return {
	"balls": balls,
	"strikes": strikes,
	"outs": outs,
	"runner_on_1b": bool(game_row.get("runner_on_1b", False)),
	"runner_on_2b": bool(game_row.get("runner_on_2b", False)),
	"runner_on_3b": bool(game_row.get("runner_on_3b", False)),
	"pitcher_avg_release_speed": avg_release_speed,
	"pitcher_avg_release_spin_rate": avg_release_spin_rate,
	"pitcher_avg_release_extension": avg_release_extension,
	"pitcher_ev_allowed": pitcher_row.get("ev_allowed"),
	"batter_ev90": batter_row.get("ev90"),
	"batter_barrel_rate": batter_row.get("barrel_rate"),
	"batter_hard_hit_rate": batter_row.get("hard_hit_rate"),
	"last_pitch_type": last_pitch_type,
	"last_pitch_velocity": last_pitch_velocity,
	"last_pitch_spin_rate": last_pitch_spin_rate,
	"last_pitch_extension": last_pitch_extension,
	"velo_delta_from_baseline": velo_delta_from_baseline,
	"spin_delta_from_baseline": spin_delta_from_baseline,
	"extension_delta_from_baseline": extension_delta_from_baseline,
	# E3: Handedness
	"batter_stand": str(batter_row.get("batter_stand", "R") or "R"),
	"p_throws": str(pitcher_row.get("p_throws", "R") or "R"),
	# D1: Player-specific usage rates
	"pitcher_fastball_usage_rate": pitcher_fastball_usage_rate,
	"pitcher_breaking_usage_rate": pitcher_breaking_usage_rate,
	"pitcher_offspeed_usage_rate": pitcher_offspeed_usage_rate,
	}


	def _normalize_probs(fastball_prob: float, breaking_prob: float, offspeed_prob: float) -> tuple[float, float, float]:
	total = fastball_prob + breaking_prob + offspeed_prob
	if total <= 0:
	return 0.48, 0.32, 0.20
	return (
	fastball_prob / total,
	breaking_prob / total,
	offspeed_prob / total,
	)


	def predict_next_pitch_distribution(feature_row: dict[str, Any]) -> dict[str, Any]:
	"""
	Pitch sequencing model v2.1
	Count-aware, sequence-aware, and lightly live-telemetry-aware.
	"""
	balls = int(feature_row.get("balls", 0))
	strikes = int(feature_row.get("strikes", 0))
	outs = int(feature_row.get("outs", 0))
	last_pitch_type = str(feature_row.get("last_pitch_type", "") or "").lower()

	batter_ev90 = feature_row.get("batter_ev90")
	batter_barrel_rate = feature_row.get("batter_barrel_rate")
	batter_hard_hit_rate = feature_row.get("batter_hard_hit_rate")

	velo_delta = feature_row.get("velo_delta_from_baseline")
	spin_delta = feature_row.get("spin_delta_from_baseline")
	extension_delta = feature_row.get("extension_delta_from_baseline")

	# F1: Count-conditioned pitch distribution (full lookup table replaces crude adjustments)
	count_state_table = {
	(0, 0): (0.54, 0.28, 0.18),
	(1, 0): (0.55, 0.27, 0.18),
	(2, 0): (0.62, 0.22, 0.16),
	(3, 0): (0.75, 0.12, 0.13),
	(0, 1): (0.52, 0.30, 0.18),
	(1, 1): (0.52, 0.30, 0.18),
	(2, 1): (0.56, 0.26, 0.18),
	(3, 1): (0.58, 0.24, 0.18),
	(0, 2): (0.42, 0.40, 0.18),
	(1, 2): (0.44, 0.36, 0.20),
	(2, 2): (0.48, 0.32, 0.20),
	(3, 2): (0.52, 0.28, 0.20),
	}
	count_key = (min(balls, 3), min(strikes, 2))
	cs_fb, cs_br, cs_os = count_state_table.get(count_key, (0.54, 0.28, 0.18))

	# D1: Blend with player-specific usage rates if available (50/50)
	player_fb = feature_row.get("pitcher_fastball_usage_rate")
	player_br = feature_row.get("pitcher_breaking_usage_rate")
	player_os = feature_row.get("pitcher_offspeed_usage_rate")

	if player_fb is not None and player_br is not None and player_os is not None:
	try:
	p_fb, p_br, p_os = float(player_fb), float(player_br), float(player_os)
	p_total = p_fb + p_br + p_os
	if p_total > 0:
	p_fb /= p_total
	p_br /= p_total
	p_os /= p_total
	fastball_prob = (cs_fb + p_fb) / 2
	breaking_prob = (cs_br + p_br) / 2
	offspeed_prob = (cs_os + p_os) / 2
	else:
	fastball_prob, breaking_prob, offspeed_prob = cs_fb, cs_br, cs_os
	except Exception as e:
	logger.debug(f"[sequence_model] player usage rate blend skipped: {e}")
	fastball_prob, breaking_prob, offspeed_prob = cs_fb, cs_br, cs_os
	else:
	fastball_prob, breaking_prob, offspeed_prob = cs_fb, cs_br, cs_os

	# Previous-pitch memory
	if "slider" in last_pitch_type or "curve" in last_pitch_type:
	fastball_prob += 0.04
	breaking_prob -= 0.03
	offspeed_prob -= 0.01
	elif "change" in last_pitch_type or "split" in last_pitch_type:
	fastball_prob += 0.03
	offspeed_prob -= 0.02
	elif "fastball" in last_pitch_type or "sinker" in last_pitch_type or "cutter" in last_pitch_type:
	fastball_prob -= 0.03
	breaking_prob += 0.02
	offspeed_prob += 0.01

	# Better hitters tend to see fewer heart-zone fastballs in advantage counts
	try:
	if batter_ev90 is not None and float(batter_ev90) >= 102:
	if balls >= 2:
	fastball_prob -= 0.03
	breaking_prob += 0.02
	offspeed_prob += 0.01
	except Exception as e:
	logger.debug(f"[sequence_model] EV90 adjustment skipped: {e}")

	try:
	if batter_barrel_rate is not None and float(batter_barrel_rate) >= 0.10:
	if strikes < 2:
	fastball_prob -= 0.02
	breaking_prob += 0.01
	offspeed_prob += 0.01
	except Exception as e:
	logger.debug(f"[sequence_model] barrel_rate adjustment skipped: {e}")

	# Live overlay: if velo is down, reduce fastball confidence slightly
	try:
	if velo_delta is not None and float(velo_delta) <= -1.0:
	fastball_prob -= 0.03
	breaking_prob += 0.02
	offspeed_prob += 0.01
	elif velo_delta is not None and float(velo_delta) >= 1.0:
	fastball_prob += 0.02
	breaking_prob -= 0.01
	offspeed_prob -= 0.01
	except Exception as e:
	logger.debug(f"[sequence_model] live velo overlay skipped: {e}")

	# Live overlay: if spin is down, slightly reduce breaking-ball trust
	try:
	if spin_delta is not None and float(spin_delta) <= -120:
	breaking_prob -= 0.02
	fastball_prob += 0.01
	offspeed_prob += 0.01
	except Exception as e:
	logger.debug(f"[sequence_model] live spin overlay skipped: {e}")

	# Live overlay: shorter extension can imply less deception
	try:
	if extension_delta is not None and float(extension_delta) <= -0.25:
	fastball_prob -= 0.01
	offspeed_prob += 0.01
	except Exception as e:
	logger.debug(f"[sequence_model] live extension overlay skipped: {e}")

	# E3: Handedness-aware pitch tendency adjustment
	batter_stand = feature_row.get("batter_stand", "R")
	p_throws = feature_row.get("p_throws", "R")
	if p_throws == "R" and batter_stand == "L":
	# RHP vs LHB: sliders break away from LHB
	fastball_prob -= 0.02
	breaking_prob += 0.03
	elif p_throws == "L" and batter_stand == "R":
	# LHP vs RHB: changeup advantage
	fastball_prob -= 0.02
	offspeed_prob += 0.04
	elif (p_throws == "L" and batter_stand == "L") or (p_throws == "R" and batter_stand == "R"):
	# Same-hand matchup: breaking balls break away
	fastball_prob -= 0.01
	breaking_prob += 0.02

	fastball_prob, breaking_prob, offspeed_prob = _normalize_probs(
	fastball_prob, breaking_prob, offspeed_prob
	)

	heart = 0.22
	shadow = 0.43
	chase = 0.35

	if balls >= 2:
	heart += 0.04
	chase -= 0.03
	shadow -= 0.01

	if strikes >= 2:
	chase += 0.06
	heart -= 0.03
	shadow -= 0.03

	if balls == 3 and strikes == 2:
	heart += 0.05
	chase -= 0.03
	shadow -= 0.02

	try:
	if batter_hard_hit_rate is not None and float(batter_hard_hit_rate) >= 0.42:
	heart -= 0.02
	shadow += 0.01
	chase += 0.01
	except Exception:
	pass

	if outs == 2 and (feature_row.get("runner_on_2b") or feature_row.get("runner_on_3b")):
	shadow += 0.02
	heart -= 0.01
	chase -= 0.01

	zone_total = heart + shadow + chase
	heart /= zone_total
	shadow /= zone_total
	chase /= zone_total

	return {
	"fastball_prob": fastball_prob,
	"breaking_prob": breaking_prob,
	"offspeed_prob": offspeed_prob,
	"zone_probs": {
	"heart": heart,
	"shadow": shadow,
	"chase": chase,
	},
	}