from __future__ import annotations import numpy as np import pandas as pd def simulate_batter_outcomes( hit_prob: float, hr_prob: float, n_sims: int = 10000, batter_row: dict | None = None, ) -> pd.DataFrame: rng = np.random.default_rng() # A2: fresh seed each run # A4: Batter-specific hit type distribution (empirical MLB 2024 baseline) hit_single_p = 0.62 hit_double_p = 0.33 hit_triple_p = 0.05 if batter_row is not None: pull_rate = batter_row.get("pull_rate") air_ball_rate = batter_row.get("air_ball_rate") avg_launch_angle = batter_row.get("avg_launch_angle") try: if pull_rate is not None and air_ball_rate is not None: if float(pull_rate) >= 0.45 and float(air_ball_rate) >= 0.45: # High pull + high air ball → more doubles hit_single_p -= 0.04 hit_double_p += 0.04 except Exception: pass try: if avg_launch_angle is not None and float(avg_launch_angle) >= 18: # Fly ball tendency → slight 2B/3B boost hit_single_p -= 0.02 hit_double_p += 0.01 hit_triple_p += 0.01 except Exception: pass hit_type_probs = np.array([hit_single_p, hit_double_p, hit_triple_p]) hit_type_probs = hit_type_probs / hit_type_probs.sum() # A3: Hierarchical sampling — HR first, then hit (mutually exclusive outcomes) is_hr = rng.binomial(1, min(hr_prob, 1.0), size=n_sims).astype(bool) # Among non-HR PAs, use hit_prob adjusted down by hr_prob already claimed adj_hit_prob = max(0.0, min(1.0, hit_prob - hr_prob)) is_hit = rng.binomial(1, adj_hit_prob, size=n_sims).astype(bool) hits = np.zeros(n_sims, dtype=int) hrs = np.zeros(n_sims, dtype=int) total_bases = np.zeros(n_sims, dtype=int) # HRs take priority hr_mask = is_hr hits[hr_mask] = 1 hrs[hr_mask] = 1 total_bases[hr_mask] = 4 # Non-HR hits distributed by batter-specific type distribution hit_mask = is_hit & ~is_hr hits[hit_mask] = 1 hit_count = int(hit_mask.sum()) if hit_count > 0: hit_types = rng.choice([1, 2, 3], size=hit_count, p=hit_type_probs) total_bases[hit_mask] = hit_types return pd.DataFrame( { "hit": hits, "hr": hrs, "total_bases": total_bases, } )