Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import numpy as np | |
| import pandas as pd | |
| def simulate_batter_outcomes( | |
| hit_prob: float, | |
| hr_prob: float, | |
| n_sims: int = 10000, | |
| batter_row: dict | None = None, | |
| ) -> pd.DataFrame: | |
| rng = np.random.default_rng() # A2: fresh seed each run | |
| # A4: Batter-specific hit type distribution (empirical MLB 2024 baseline) | |
| hit_single_p = 0.62 | |
| hit_double_p = 0.33 | |
| hit_triple_p = 0.05 | |
| if batter_row is not None: | |
| pull_rate = batter_row.get("pull_rate") | |
| air_ball_rate = batter_row.get("air_ball_rate") | |
| avg_launch_angle = batter_row.get("avg_launch_angle") | |
| try: | |
| if pull_rate is not None and air_ball_rate is not None: | |
| if float(pull_rate) >= 0.45 and float(air_ball_rate) >= 0.45: | |
| # High pull + high air ball → more doubles | |
| hit_single_p -= 0.04 | |
| hit_double_p += 0.04 | |
| except Exception: | |
| pass | |
| try: | |
| if avg_launch_angle is not None and float(avg_launch_angle) >= 18: | |
| # Fly ball tendency → slight 2B/3B boost | |
| hit_single_p -= 0.02 | |
| hit_double_p += 0.01 | |
| hit_triple_p += 0.01 | |
| except Exception: | |
| pass | |
| hit_type_probs = np.array([hit_single_p, hit_double_p, hit_triple_p]) | |
| hit_type_probs = hit_type_probs / hit_type_probs.sum() | |
| # A3: Hierarchical sampling — HR first, then hit (mutually exclusive outcomes) | |
| is_hr = rng.binomial(1, min(hr_prob, 1.0), size=n_sims).astype(bool) | |
| # Among non-HR PAs, use hit_prob adjusted down by hr_prob already claimed | |
| adj_hit_prob = max(0.0, min(1.0, hit_prob - hr_prob)) | |
| is_hit = rng.binomial(1, adj_hit_prob, size=n_sims).astype(bool) | |
| hits = np.zeros(n_sims, dtype=int) | |
| hrs = np.zeros(n_sims, dtype=int) | |
| total_bases = np.zeros(n_sims, dtype=int) | |
| # HRs take priority | |
| hr_mask = is_hr | |
| hits[hr_mask] = 1 | |
| hrs[hr_mask] = 1 | |
| total_bases[hr_mask] = 4 | |
| # Non-HR hits distributed by batter-specific type distribution | |
| hit_mask = is_hit & ~is_hr | |
| hits[hit_mask] = 1 | |
| hit_count = int(hit_mask.sum()) | |
| if hit_count > 0: | |
| hit_types = rng.choice([1, 2, 3], size=hit_count, p=hit_type_probs) | |
| total_bases[hit_mask] = hit_types | |
| return pd.DataFrame( | |
| { | |
| "hit": hits, | |
| "hr": hrs, | |
| "total_bases": total_bases, | |
| } | |
| ) | |