from __future__ import annotations import logging from typing import Any logger = logging.getLogger(__name__) def build_sequence_features( game_row: dict[str, Any], pitcher_row: dict[str, Any], batter_row: dict[str, Any], pitcher_family_zone_row: dict[str, Any] | None = None, ) -> dict[str, Any]: def _safe_int(value: Any, default: int = 0) -> int: try: if value is None: return default text = str(value).strip().lower() if text in {"", "nan", "none"}: return default return int(float(value)) except Exception: return default def _safe_float(value: Any, default: float | None = None) -> float | None: try: if value is None: return default text = str(value).strip().lower() if text in {"", "nan", "none"}: return default return float(value) except Exception: return default balls = _safe_int(game_row.get("balls"), 0) strikes = _safe_int(game_row.get("strikes"), 0) outs = _safe_int(game_row.get("outs"), 0) last_pitch_type = str(game_row.get("pitch_type", "") or "").strip().lower() avg_release_speed = _safe_float(pitcher_row.get("avg_release_speed")) avg_release_spin_rate = _safe_float(pitcher_row.get("avg_release_spin_rate")) avg_release_extension = _safe_float(pitcher_row.get("avg_release_extension")) last_pitch_velocity = _safe_float(game_row.get("pitch_velocity")) last_pitch_spin_rate = _safe_float(game_row.get("pitch_spin_rate")) last_pitch_extension = _safe_float(game_row.get("pitch_extension")) velo_delta_from_baseline = None if last_pitch_velocity is not None and avg_release_speed is not None: velo_delta_from_baseline = last_pitch_velocity - avg_release_speed spin_delta_from_baseline = None if last_pitch_spin_rate is not None and avg_release_spin_rate is not None: spin_delta_from_baseline = last_pitch_spin_rate - avg_release_spin_rate extension_delta_from_baseline = None if last_pitch_extension is not None and avg_release_extension is not None: extension_delta_from_baseline = last_pitch_extension - avg_release_extension # D1: Player-specific pitch usage rates from pitcher's family-zone profiles pf_zone = pitcher_family_zone_row or {} pitcher_fastball_usage_rate = pf_zone.get("fastball_usage_rate") pitcher_breaking_usage_rate = pf_zone.get("breaking_usage_rate") pitcher_offspeed_usage_rate = pf_zone.get("offspeed_usage_rate") return { "balls": balls, "strikes": strikes, "outs": outs, "runner_on_1b": bool(game_row.get("runner_on_1b", False)), "runner_on_2b": bool(game_row.get("runner_on_2b", False)), "runner_on_3b": bool(game_row.get("runner_on_3b", False)), "pitcher_avg_release_speed": avg_release_speed, "pitcher_avg_release_spin_rate": avg_release_spin_rate, "pitcher_avg_release_extension": avg_release_extension, "pitcher_ev_allowed": pitcher_row.get("ev_allowed"), "batter_ev90": batter_row.get("ev90"), "batter_barrel_rate": batter_row.get("barrel_rate"), "batter_hard_hit_rate": batter_row.get("hard_hit_rate"), "last_pitch_type": last_pitch_type, "last_pitch_velocity": last_pitch_velocity, "last_pitch_spin_rate": last_pitch_spin_rate, "last_pitch_extension": last_pitch_extension, "velo_delta_from_baseline": velo_delta_from_baseline, "spin_delta_from_baseline": spin_delta_from_baseline, "extension_delta_from_baseline": extension_delta_from_baseline, # E3: Handedness "batter_stand": str(batter_row.get("batter_stand", "R") or "R"), "p_throws": str(pitcher_row.get("p_throws", "R") or "R"), # D1: Player-specific usage rates "pitcher_fastball_usage_rate": pitcher_fastball_usage_rate, "pitcher_breaking_usage_rate": pitcher_breaking_usage_rate, "pitcher_offspeed_usage_rate": pitcher_offspeed_usage_rate, } def _normalize_probs(fastball_prob: float, breaking_prob: float, offspeed_prob: float) -> tuple[float, float, float]: total = fastball_prob + breaking_prob + offspeed_prob if total <= 0: return 0.48, 0.32, 0.20 return ( fastball_prob / total, breaking_prob / total, offspeed_prob / total, ) def predict_next_pitch_distribution(feature_row: dict[str, Any]) -> dict[str, Any]: """ Pitch sequencing model v2.1 Count-aware, sequence-aware, and lightly live-telemetry-aware. """ balls = int(feature_row.get("balls", 0)) strikes = int(feature_row.get("strikes", 0)) outs = int(feature_row.get("outs", 0)) last_pitch_type = str(feature_row.get("last_pitch_type", "") or "").lower() batter_ev90 = feature_row.get("batter_ev90") batter_barrel_rate = feature_row.get("batter_barrel_rate") batter_hard_hit_rate = feature_row.get("batter_hard_hit_rate") velo_delta = feature_row.get("velo_delta_from_baseline") spin_delta = feature_row.get("spin_delta_from_baseline") extension_delta = feature_row.get("extension_delta_from_baseline") # F1: Count-conditioned pitch distribution (full lookup table replaces crude adjustments) count_state_table = { (0, 0): (0.54, 0.28, 0.18), (1, 0): (0.55, 0.27, 0.18), (2, 0): (0.62, 0.22, 0.16), (3, 0): (0.75, 0.12, 0.13), (0, 1): (0.52, 0.30, 0.18), (1, 1): (0.52, 0.30, 0.18), (2, 1): (0.56, 0.26, 0.18), (3, 1): (0.58, 0.24, 0.18), (0, 2): (0.42, 0.40, 0.18), (1, 2): (0.44, 0.36, 0.20), (2, 2): (0.48, 0.32, 0.20), (3, 2): (0.52, 0.28, 0.20), } count_key = (min(balls, 3), min(strikes, 2)) cs_fb, cs_br, cs_os = count_state_table.get(count_key, (0.54, 0.28, 0.18)) # D1: Blend with player-specific usage rates if available (50/50) player_fb = feature_row.get("pitcher_fastball_usage_rate") player_br = feature_row.get("pitcher_breaking_usage_rate") player_os = feature_row.get("pitcher_offspeed_usage_rate") if player_fb is not None and player_br is not None and player_os is not None: try: p_fb, p_br, p_os = float(player_fb), float(player_br), float(player_os) p_total = p_fb + p_br + p_os if p_total > 0: p_fb /= p_total p_br /= p_total p_os /= p_total fastball_prob = (cs_fb + p_fb) / 2 breaking_prob = (cs_br + p_br) / 2 offspeed_prob = (cs_os + p_os) / 2 else: fastball_prob, breaking_prob, offspeed_prob = cs_fb, cs_br, cs_os except Exception as e: logger.debug(f"[sequence_model] player usage rate blend skipped: {e}") fastball_prob, breaking_prob, offspeed_prob = cs_fb, cs_br, cs_os else: fastball_prob, breaking_prob, offspeed_prob = cs_fb, cs_br, cs_os # Previous-pitch memory if "slider" in last_pitch_type or "curve" in last_pitch_type: fastball_prob += 0.04 breaking_prob -= 0.03 offspeed_prob -= 0.01 elif "change" in last_pitch_type or "split" in last_pitch_type: fastball_prob += 0.03 offspeed_prob -= 0.02 elif "fastball" in last_pitch_type or "sinker" in last_pitch_type or "cutter" in last_pitch_type: fastball_prob -= 0.03 breaking_prob += 0.02 offspeed_prob += 0.01 # Better hitters tend to see fewer heart-zone fastballs in advantage counts try: if batter_ev90 is not None and float(batter_ev90) >= 102: if balls >= 2: fastball_prob -= 0.03 breaking_prob += 0.02 offspeed_prob += 0.01 except Exception as e: logger.debug(f"[sequence_model] EV90 adjustment skipped: {e}") try: if batter_barrel_rate is not None and float(batter_barrel_rate) >= 0.10: if strikes < 2: fastball_prob -= 0.02 breaking_prob += 0.01 offspeed_prob += 0.01 except Exception as e: logger.debug(f"[sequence_model] barrel_rate adjustment skipped: {e}") # Live overlay: if velo is down, reduce fastball confidence slightly try: if velo_delta is not None and float(velo_delta) <= -1.0: fastball_prob -= 0.03 breaking_prob += 0.02 offspeed_prob += 0.01 elif velo_delta is not None and float(velo_delta) >= 1.0: fastball_prob += 0.02 breaking_prob -= 0.01 offspeed_prob -= 0.01 except Exception as e: logger.debug(f"[sequence_model] live velo overlay skipped: {e}") # Live overlay: if spin is down, slightly reduce breaking-ball trust try: if spin_delta is not None and float(spin_delta) <= -120: breaking_prob -= 0.02 fastball_prob += 0.01 offspeed_prob += 0.01 except Exception as e: logger.debug(f"[sequence_model] live spin overlay skipped: {e}") # Live overlay: shorter extension can imply less deception try: if extension_delta is not None and float(extension_delta) <= -0.25: fastball_prob -= 0.01 offspeed_prob += 0.01 except Exception as e: logger.debug(f"[sequence_model] live extension overlay skipped: {e}") # E3: Handedness-aware pitch tendency adjustment batter_stand = feature_row.get("batter_stand", "R") p_throws = feature_row.get("p_throws", "R") if p_throws == "R" and batter_stand == "L": # RHP vs LHB: sliders break away from LHB fastball_prob -= 0.02 breaking_prob += 0.03 elif p_throws == "L" and batter_stand == "R": # LHP vs RHB: changeup advantage fastball_prob -= 0.02 offspeed_prob += 0.04 elif (p_throws == "L" and batter_stand == "L") or (p_throws == "R" and batter_stand == "R"): # Same-hand matchup: breaking balls break away fastball_prob -= 0.01 breaking_prob += 0.02 fastball_prob, breaking_prob, offspeed_prob = _normalize_probs( fastball_prob, breaking_prob, offspeed_prob ) heart = 0.22 shadow = 0.43 chase = 0.35 if balls >= 2: heart += 0.04 chase -= 0.03 shadow -= 0.01 if strikes >= 2: chase += 0.06 heart -= 0.03 shadow -= 0.03 if balls == 3 and strikes == 2: heart += 0.05 chase -= 0.03 shadow -= 0.02 try: if batter_hard_hit_rate is not None and float(batter_hard_hit_rate) >= 0.42: heart -= 0.02 shadow += 0.01 chase += 0.01 except Exception: pass if outs == 2 and (feature_row.get("runner_on_2b") or feature_row.get("runner_on_3b")): shadow += 0.02 heart -= 0.01 chase -= 0.01 zone_total = heart + shadow + chase heart /= zone_total shadow /= zone_total chase /= zone_total return { "fastball_prob": fastball_prob, "breaking_prob": breaking_prob, "offspeed_prob": offspeed_prob, "zone_probs": { "heart": heart, "shadow": shadow, "chase": chase, }, }