Spaces:
Running
Running
| from __future__ import annotations | |
| import logging | |
| from typing import Any | |
| logger = logging.getLogger(__name__) | |
| def build_sequence_features( | |
| game_row: dict[str, Any], | |
| pitcher_row: dict[str, Any], | |
| batter_row: dict[str, Any], | |
| pitcher_family_zone_row: dict[str, Any] | None = None, | |
| ) -> dict[str, Any]: | |
| def _safe_int(value: Any, default: int = 0) -> int: | |
| try: | |
| if value is None: | |
| return default | |
| text = str(value).strip().lower() | |
| if text in {"", "nan", "none"}: | |
| return default | |
| return int(float(value)) | |
| except Exception: | |
| return default | |
| def _safe_float(value: Any, default: float | None = None) -> float | None: | |
| try: | |
| if value is None: | |
| return default | |
| text = str(value).strip().lower() | |
| if text in {"", "nan", "none"}: | |
| return default | |
| return float(value) | |
| except Exception: | |
| return default | |
| balls = _safe_int(game_row.get("balls"), 0) | |
| strikes = _safe_int(game_row.get("strikes"), 0) | |
| outs = _safe_int(game_row.get("outs"), 0) | |
| last_pitch_type = str(game_row.get("pitch_type", "") or "").strip().lower() | |
| avg_release_speed = _safe_float(pitcher_row.get("avg_release_speed")) | |
| avg_release_spin_rate = _safe_float(pitcher_row.get("avg_release_spin_rate")) | |
| avg_release_extension = _safe_float(pitcher_row.get("avg_release_extension")) | |
| last_pitch_velocity = _safe_float(game_row.get("pitch_velocity")) | |
| last_pitch_spin_rate = _safe_float(game_row.get("pitch_spin_rate")) | |
| last_pitch_extension = _safe_float(game_row.get("pitch_extension")) | |
| velo_delta_from_baseline = None | |
| if last_pitch_velocity is not None and avg_release_speed is not None: | |
| velo_delta_from_baseline = last_pitch_velocity - avg_release_speed | |
| spin_delta_from_baseline = None | |
| if last_pitch_spin_rate is not None and avg_release_spin_rate is not None: | |
| spin_delta_from_baseline = last_pitch_spin_rate - avg_release_spin_rate | |
| extension_delta_from_baseline = None | |
| if last_pitch_extension is not None and avg_release_extension is not None: | |
| extension_delta_from_baseline = last_pitch_extension - avg_release_extension | |
| # D1: Player-specific pitch usage rates from pitcher's family-zone profiles | |
| pf_zone = pitcher_family_zone_row or {} | |
| pitcher_fastball_usage_rate = pf_zone.get("fastball_usage_rate") | |
| pitcher_breaking_usage_rate = pf_zone.get("breaking_usage_rate") | |
| pitcher_offspeed_usage_rate = pf_zone.get("offspeed_usage_rate") | |
| return { | |
| "balls": balls, | |
| "strikes": strikes, | |
| "outs": outs, | |
| "runner_on_1b": bool(game_row.get("runner_on_1b", False)), | |
| "runner_on_2b": bool(game_row.get("runner_on_2b", False)), | |
| "runner_on_3b": bool(game_row.get("runner_on_3b", False)), | |
| "pitcher_avg_release_speed": avg_release_speed, | |
| "pitcher_avg_release_spin_rate": avg_release_spin_rate, | |
| "pitcher_avg_release_extension": avg_release_extension, | |
| "pitcher_ev_allowed": pitcher_row.get("ev_allowed"), | |
| "batter_ev90": batter_row.get("ev90"), | |
| "batter_barrel_rate": batter_row.get("barrel_rate"), | |
| "batter_hard_hit_rate": batter_row.get("hard_hit_rate"), | |
| "last_pitch_type": last_pitch_type, | |
| "last_pitch_velocity": last_pitch_velocity, | |
| "last_pitch_spin_rate": last_pitch_spin_rate, | |
| "last_pitch_extension": last_pitch_extension, | |
| "velo_delta_from_baseline": velo_delta_from_baseline, | |
| "spin_delta_from_baseline": spin_delta_from_baseline, | |
| "extension_delta_from_baseline": extension_delta_from_baseline, | |
| # E3: Handedness | |
| "batter_stand": str(batter_row.get("batter_stand", "R") or "R"), | |
| "p_throws": str(pitcher_row.get("p_throws", "R") or "R"), | |
| # D1: Player-specific usage rates | |
| "pitcher_fastball_usage_rate": pitcher_fastball_usage_rate, | |
| "pitcher_breaking_usage_rate": pitcher_breaking_usage_rate, | |
| "pitcher_offspeed_usage_rate": pitcher_offspeed_usage_rate, | |
| } | |
| def _normalize_probs(fastball_prob: float, breaking_prob: float, offspeed_prob: float) -> tuple[float, float, float]: | |
| total = fastball_prob + breaking_prob + offspeed_prob | |
| if total <= 0: | |
| return 0.48, 0.32, 0.20 | |
| return ( | |
| fastball_prob / total, | |
| breaking_prob / total, | |
| offspeed_prob / total, | |
| ) | |
| def predict_next_pitch_distribution(feature_row: dict[str, Any]) -> dict[str, Any]: | |
| """ | |
| Pitch sequencing model v2.1 | |
| Count-aware, sequence-aware, and lightly live-telemetry-aware. | |
| """ | |
| balls = int(feature_row.get("balls", 0)) | |
| strikes = int(feature_row.get("strikes", 0)) | |
| outs = int(feature_row.get("outs", 0)) | |
| last_pitch_type = str(feature_row.get("last_pitch_type", "") or "").lower() | |
| batter_ev90 = feature_row.get("batter_ev90") | |
| batter_barrel_rate = feature_row.get("batter_barrel_rate") | |
| batter_hard_hit_rate = feature_row.get("batter_hard_hit_rate") | |
| velo_delta = feature_row.get("velo_delta_from_baseline") | |
| spin_delta = feature_row.get("spin_delta_from_baseline") | |
| extension_delta = feature_row.get("extension_delta_from_baseline") | |
| # F1: Count-conditioned pitch distribution (full lookup table replaces crude adjustments) | |
| count_state_table = { | |
| (0, 0): (0.54, 0.28, 0.18), | |
| (1, 0): (0.55, 0.27, 0.18), | |
| (2, 0): (0.62, 0.22, 0.16), | |
| (3, 0): (0.75, 0.12, 0.13), | |
| (0, 1): (0.52, 0.30, 0.18), | |
| (1, 1): (0.52, 0.30, 0.18), | |
| (2, 1): (0.56, 0.26, 0.18), | |
| (3, 1): (0.58, 0.24, 0.18), | |
| (0, 2): (0.42, 0.40, 0.18), | |
| (1, 2): (0.44, 0.36, 0.20), | |
| (2, 2): (0.48, 0.32, 0.20), | |
| (3, 2): (0.52, 0.28, 0.20), | |
| } | |
| count_key = (min(balls, 3), min(strikes, 2)) | |
| cs_fb, cs_br, cs_os = count_state_table.get(count_key, (0.54, 0.28, 0.18)) | |
| # D1: Blend with player-specific usage rates if available (50/50) | |
| player_fb = feature_row.get("pitcher_fastball_usage_rate") | |
| player_br = feature_row.get("pitcher_breaking_usage_rate") | |
| player_os = feature_row.get("pitcher_offspeed_usage_rate") | |
| if player_fb is not None and player_br is not None and player_os is not None: | |
| try: | |
| p_fb, p_br, p_os = float(player_fb), float(player_br), float(player_os) | |
| p_total = p_fb + p_br + p_os | |
| if p_total > 0: | |
| p_fb /= p_total | |
| p_br /= p_total | |
| p_os /= p_total | |
| fastball_prob = (cs_fb + p_fb) / 2 | |
| breaking_prob = (cs_br + p_br) / 2 | |
| offspeed_prob = (cs_os + p_os) / 2 | |
| else: | |
| fastball_prob, breaking_prob, offspeed_prob = cs_fb, cs_br, cs_os | |
| except Exception as e: | |
| logger.debug(f"[sequence_model] player usage rate blend skipped: {e}") | |
| fastball_prob, breaking_prob, offspeed_prob = cs_fb, cs_br, cs_os | |
| else: | |
| fastball_prob, breaking_prob, offspeed_prob = cs_fb, cs_br, cs_os | |
| # Previous-pitch memory | |
| if "slider" in last_pitch_type or "curve" in last_pitch_type: | |
| fastball_prob += 0.04 | |
| breaking_prob -= 0.03 | |
| offspeed_prob -= 0.01 | |
| elif "change" in last_pitch_type or "split" in last_pitch_type: | |
| fastball_prob += 0.03 | |
| offspeed_prob -= 0.02 | |
| elif "fastball" in last_pitch_type or "sinker" in last_pitch_type or "cutter" in last_pitch_type: | |
| fastball_prob -= 0.03 | |
| breaking_prob += 0.02 | |
| offspeed_prob += 0.01 | |
| # Better hitters tend to see fewer heart-zone fastballs in advantage counts | |
| try: | |
| if batter_ev90 is not None and float(batter_ev90) >= 102: | |
| if balls >= 2: | |
| fastball_prob -= 0.03 | |
| breaking_prob += 0.02 | |
| offspeed_prob += 0.01 | |
| except Exception as e: | |
| logger.debug(f"[sequence_model] EV90 adjustment skipped: {e}") | |
| try: | |
| if batter_barrel_rate is not None and float(batter_barrel_rate) >= 0.10: | |
| if strikes < 2: | |
| fastball_prob -= 0.02 | |
| breaking_prob += 0.01 | |
| offspeed_prob += 0.01 | |
| except Exception as e: | |
| logger.debug(f"[sequence_model] barrel_rate adjustment skipped: {e}") | |
| # Live overlay: if velo is down, reduce fastball confidence slightly | |
| try: | |
| if velo_delta is not None and float(velo_delta) <= -1.0: | |
| fastball_prob -= 0.03 | |
| breaking_prob += 0.02 | |
| offspeed_prob += 0.01 | |
| elif velo_delta is not None and float(velo_delta) >= 1.0: | |
| fastball_prob += 0.02 | |
| breaking_prob -= 0.01 | |
| offspeed_prob -= 0.01 | |
| except Exception as e: | |
| logger.debug(f"[sequence_model] live velo overlay skipped: {e}") | |
| # Live overlay: if spin is down, slightly reduce breaking-ball trust | |
| try: | |
| if spin_delta is not None and float(spin_delta) <= -120: | |
| breaking_prob -= 0.02 | |
| fastball_prob += 0.01 | |
| offspeed_prob += 0.01 | |
| except Exception as e: | |
| logger.debug(f"[sequence_model] live spin overlay skipped: {e}") | |
| # Live overlay: shorter extension can imply less deception | |
| try: | |
| if extension_delta is not None and float(extension_delta) <= -0.25: | |
| fastball_prob -= 0.01 | |
| offspeed_prob += 0.01 | |
| except Exception as e: | |
| logger.debug(f"[sequence_model] live extension overlay skipped: {e}") | |
| # E3: Handedness-aware pitch tendency adjustment | |
| batter_stand = feature_row.get("batter_stand", "R") | |
| p_throws = feature_row.get("p_throws", "R") | |
| if p_throws == "R" and batter_stand == "L": | |
| # RHP vs LHB: sliders break away from LHB | |
| fastball_prob -= 0.02 | |
| breaking_prob += 0.03 | |
| elif p_throws == "L" and batter_stand == "R": | |
| # LHP vs RHB: changeup advantage | |
| fastball_prob -= 0.02 | |
| offspeed_prob += 0.04 | |
| elif (p_throws == "L" and batter_stand == "L") or (p_throws == "R" and batter_stand == "R"): | |
| # Same-hand matchup: breaking balls break away | |
| fastball_prob -= 0.01 | |
| breaking_prob += 0.02 | |
| fastball_prob, breaking_prob, offspeed_prob = _normalize_probs( | |
| fastball_prob, breaking_prob, offspeed_prob | |
| ) | |
| heart = 0.22 | |
| shadow = 0.43 | |
| chase = 0.35 | |
| if balls >= 2: | |
| heart += 0.04 | |
| chase -= 0.03 | |
| shadow -= 0.01 | |
| if strikes >= 2: | |
| chase += 0.06 | |
| heart -= 0.03 | |
| shadow -= 0.03 | |
| if balls == 3 and strikes == 2: | |
| heart += 0.05 | |
| chase -= 0.03 | |
| shadow -= 0.02 | |
| try: | |
| if batter_hard_hit_rate is not None and float(batter_hard_hit_rate) >= 0.42: | |
| heart -= 0.02 | |
| shadow += 0.01 | |
| chase += 0.01 | |
| except Exception: | |
| pass | |
| if outs == 2 and (feature_row.get("runner_on_2b") or feature_row.get("runner_on_3b")): | |
| shadow += 0.02 | |
| heart -= 0.01 | |
| chase -= 0.01 | |
| zone_total = heart + shadow + chase | |
| heart /= zone_total | |
| shadow /= zone_total | |
| chase /= zone_total | |
| return { | |
| "fastball_prob": fastball_prob, | |
| "breaking_prob": breaking_prob, | |
| "offspeed_prob": offspeed_prob, | |
| "zone_probs": { | |
| "heart": heart, | |
| "shadow": shadow, | |
| "chase": chase, | |
| }, | |
| } |