Spaces:
Running
Running
| from __future__ import annotations | |
| import os | |
| import sys | |
| import types | |
| import unittest | |
| from unittest.mock import patch | |
| import pandas as pd | |
| sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) | |
| from models.hr_probability_engine import build_hr_probability_result | |
| def _sample_statcast_df() -> pd.DataFrame: | |
| batter_rows = [ | |
| { | |
| "player_name": "Slugger Sam", | |
| "launch_speed": 102 + i, | |
| "launch_angle": 24 + (i % 4), | |
| "estimated_woba_using_speedangle": 0.390, | |
| "stand": "L", | |
| "game_date": f"2026-03-{10 + i:02d}", | |
| "game_pk": 100 + i, | |
| "events": "home_run" if i % 4 == 0 else "single", | |
| "description": "hit_into_play", | |
| "pitch_name": "Slider", | |
| "plate_x": 0.1, | |
| "plate_z": 2.6, | |
| } | |
| for i in range(10) | |
| ] | |
| pitcher_rows = [ | |
| { | |
| "player_name": "Pitcher Pete", | |
| "launch_speed": 89 + (i % 3), | |
| "launch_angle": 14 + (i % 5), | |
| "release_speed": 94 + (i % 2), | |
| "release_spin_rate": 2250 + i * 5, | |
| "release_extension": 6.1, | |
| "pfx_x": 0.8, | |
| "pfx_z": 1.2, | |
| "estimated_woba_using_speedangle": 0.305, | |
| "p_throws": "R", | |
| "game_date": f"2026-03-{10 + i:02d}", | |
| "game_pk": 200 + i, | |
| "events": "field_out", | |
| "description": "swinging_strike" if i % 3 == 0 else "called_strike", | |
| "pitch_name": "Four-Seam Fastball", | |
| "pitch_type": "FF", | |
| "plate_x": 0.2, | |
| "plate_z": 2.8, | |
| "release_pos_x": -1.2, | |
| "release_pos_y": 54.0, | |
| "release_pos_z": 5.8, | |
| "vx0": 5.0, | |
| "vz0": -4.0, | |
| "ax": -10.0, | |
| "az": -20.0, | |
| } | |
| for i in range(12) | |
| ] | |
| return pd.DataFrame(batter_rows + pitcher_rows) | |
| class TestHrProbabilityEngine(unittest.TestCase): | |
| def setUp(self) -> None: | |
| self.statcast_df = _sample_statcast_df() | |
| self.batter_df = self.statcast_df[ | |
| self.statcast_df["player_name"] == "Slugger Sam" | |
| ].reset_index(drop=True) | |
| self.pitcher_df = self.statcast_df[ | |
| self.statcast_df["player_name"] == "Pitcher Pete" | |
| ].reset_index(drop=True) | |
| def test_baseline_parity_when_context_is_absent(self) -> None: | |
| with patch("models.hr_probability_engine.compute_pitcher_adjustment", return_value={"hr_adj": 0.0}), \ | |
| patch("models.hr_probability_engine.compute_environment_adjustment", return_value={"env_hr_boost": 0.0, "park_hr_boost": 0.0, "weather_hr_boost": 0.0}), \ | |
| patch("models.hr_probability_engine.build_trajectory_features", return_value={}), \ | |
| patch("models.hr_probability_engine.compute_trajectory_adjustment", return_value={"hr_adj": 0.0}), \ | |
| patch("models.hr_probability_engine.compute_upcoming_rolling_adjustment", return_value={"rolling_hr_adjustment": 0.0}): | |
| result = build_hr_probability_result( | |
| statcast_df=self.statcast_df, | |
| batter_name="Slugger Sam", | |
| mode="pregame", | |
| ) | |
| self.assertIsNotNone(result["baseline_hr_prob"]) | |
| self.assertAlmostEqual(result["baseline_hr_prob"], result["adjusted_hr_prob"], places=6) | |
| self.assertIn("live_pitch_telemetry", result["skipped_layers"]) | |
| def test_pitcher_context_moves_probability(self) -> None: | |
| with patch("models.hr_probability_engine.compute_pitcher_adjustment", return_value={"hr_adj": 0.02}), \ | |
| patch("models.hr_probability_engine.compute_environment_adjustment", return_value={"env_hr_boost": 0.0, "park_hr_boost": 0.0, "weather_hr_boost": 0.0}), \ | |
| patch("models.hr_probability_engine.build_trajectory_features", return_value={}), \ | |
| patch("models.hr_probability_engine.compute_trajectory_adjustment", return_value={"hr_adj": 0.0}), \ | |
| patch("models.hr_probability_engine.compute_upcoming_rolling_adjustment", return_value={"rolling_hr_adjustment": 0.0}): | |
| result = build_hr_probability_result( | |
| statcast_df=self.statcast_df, | |
| batter_name="Slugger Sam", | |
| pitcher_name="Pitcher Pete", | |
| mode="pregame", | |
| ) | |
| self.assertGreater(result["adjusted_hr_prob"], result["baseline_hr_prob"]) | |
| self.assertAlmostEqual(result["pregame_pitcher_context_adj"], 0.02, places=6) | |
| self.assertIn("pitcher", result["applied_layers"]) | |
| def test_low_sample_pitcher_adjustment_is_shrunk(self) -> None: | |
| with patch("models.hr_probability_engine.build_pitcher_feature_row", return_value={"sample_size": 10, "p_throws": "R"}), \ | |
| patch("models.hr_probability_engine.compute_pitcher_adjustment", return_value={"hr_adj": 0.02}), \ | |
| patch("models.hr_probability_engine.compute_environment_adjustment", return_value={"env_hr_boost": 0.0, "park_hr_boost": 0.0, "weather_hr_boost": 0.0}), \ | |
| patch("models.hr_probability_engine.build_trajectory_features", return_value={}), \ | |
| patch("models.hr_probability_engine.compute_trajectory_adjustment", return_value={"hr_adj": 0.0}), \ | |
| patch("models.hr_probability_engine.compute_upcoming_rolling_adjustment", return_value={"rolling_hr_adjustment": 0.0, "pitcher_rolling_confidence": 0.0}): | |
| result = build_hr_probability_result( | |
| batter_statcast_df=self.batter_df, | |
| batter_name="Slugger Sam", | |
| pitcher_statcast_df=self.pitcher_df, | |
| pitcher_name="Pitcher Pete", | |
| mode="pregame", | |
| ) | |
| self.assertLess(float(result["pitcher_hr_adjustment"]), 0.02) | |
| self.assertLess(float(result["pitcher_reliability"]), 0.2) | |
| def test_matchup_layers_are_applied_in_pregame_mode(self) -> None: | |
| fake_batter_zone = types.SimpleNamespace( | |
| build_batter_zone_feature_row=lambda *args, **kwargs: {} | |
| ) | |
| fake_pitcher_zone = types.SimpleNamespace( | |
| build_pitcher_zone_feature_row=lambda *args, **kwargs: {} | |
| ) | |
| fake_zone_matchup = types.SimpleNamespace( | |
| compute_zone_matchup_adjustment=lambda *args, **kwargs: {"hr_zone_boost": 0.20} | |
| ) | |
| fake_family_store = types.SimpleNamespace( | |
| build_batter_family_zone_feature_row=lambda *args, **kwargs: {}, | |
| build_pitcher_family_zone_feature_row=lambda *args, **kwargs: {}, | |
| ) | |
| fake_matchup_model = types.SimpleNamespace( | |
| compute_family_zone_matchup_adjustment=lambda *args, **kwargs: {"family_zone_hr_boost": 0.10} | |
| ) | |
| fake_batter_arsenal = types.SimpleNamespace( | |
| build_batter_arsenal_feature_row=lambda *args, **kwargs: {} | |
| ) | |
| fake_pitcher_arsenal = types.SimpleNamespace( | |
| build_pitcher_arsenal_feature_row=lambda *args, **kwargs: {} | |
| ) | |
| fake_arsenal_matchup = types.SimpleNamespace( | |
| compute_arsenal_matchup_adjustment=lambda *args, **kwargs: {"arsenal_hr_boost": 0.10} | |
| ) | |
| with patch("models.hr_probability_engine.compute_pitcher_adjustment", return_value={"hr_adj": 0.0}), \ | |
| patch("models.hr_probability_engine.compute_environment_adjustment", return_value={"env_hr_boost": 0.0, "park_hr_boost": 0.0, "weather_hr_boost": 0.0}), \ | |
| patch("models.hr_probability_engine.build_trajectory_features", return_value={}), \ | |
| patch("models.hr_probability_engine.compute_trajectory_adjustment", return_value={"hr_adj": 0.0}), \ | |
| patch("models.hr_probability_engine.compute_upcoming_rolling_adjustment", return_value={"rolling_hr_adjustment": 0.0}), \ | |
| patch.dict( | |
| sys.modules, | |
| { | |
| "models.batter_zone_model": fake_batter_zone, | |
| "models.pitcher_zone_model": fake_pitcher_zone, | |
| "models.zone_matchup_model": fake_zone_matchup, | |
| "models.family_zone_profile_store": fake_family_store, | |
| "models.matchup_model": fake_matchup_model, | |
| "models.batter_arsenal_model": fake_batter_arsenal, | |
| "models.pitcher_arsenal_model": fake_pitcher_arsenal, | |
| "models.arsenal_matchup_model": fake_arsenal_matchup, | |
| }, | |
| ): | |
| result = build_hr_probability_result( | |
| statcast_df=self.statcast_df, | |
| batter_name="Slugger Sam", | |
| pitcher_name="Pitcher Pete", | |
| mode="pregame", | |
| ) | |
| self.assertNotEqual(result["zone_hr_adjustment"], 0.0) | |
| self.assertNotEqual(result["family_zone_hr_adjustment"], 0.0) | |
| self.assertNotEqual(result["arsenal_hr_adjustment"], 0.0) | |
| self.assertIn("zone", result["applied_layers"]) | |
| self.assertIn("family_zone", result["applied_layers"]) | |
| self.assertIn("arsenal", result["applied_layers"]) | |
| def test_environment_layers_are_recorded(self) -> None: | |
| with patch("models.hr_probability_engine.compute_pitcher_adjustment", return_value={"hr_adj": 0.0}), \ | |
| patch("models.hr_probability_engine.compute_environment_adjustment", return_value={"env_hr_boost": 0.03, "park_hr_boost": 0.01, "weather_hr_boost": 0.02}), \ | |
| patch("models.hr_probability_engine.build_trajectory_features", return_value={}), \ | |
| patch("models.hr_probability_engine.compute_trajectory_adjustment", return_value={"hr_adj": 0.0}), \ | |
| patch("models.hr_probability_engine.compute_upcoming_rolling_adjustment", return_value={"rolling_hr_adjustment": 0.0}): | |
| result = build_hr_probability_result( | |
| statcast_df=self.statcast_df, | |
| batter_name="Slugger Sam", | |
| mode="pregame", | |
| game_row={"venue": "Dodger Stadium"}, | |
| weather_row={"temperature_f": 88, "wind_speed_mph": 12, "wind_direction_deg": 180}, | |
| ) | |
| self.assertAlmostEqual(result["env_hr_adjustment"], 0.03, places=6) | |
| self.assertAlmostEqual(result["park_hr_adjustment"], 0.01, places=6) | |
| self.assertAlmostEqual(result["weather_hr_adjustment"], 0.02, places=6) | |
| self.assertIn("environment", result["applied_layers"]) | |
| def test_engine_returns_raw_calibrated_confidence_and_opportunity_fields(self) -> None: | |
| with patch("models.hr_probability_engine.compute_pitcher_adjustment", return_value={"hr_adj": 0.01}), \ | |
| patch("models.hr_probability_engine.compute_environment_adjustment", return_value={"env_hr_boost": 0.01, "park_hr_boost": 0.01, "weather_hr_boost": 0.0}), \ | |
| patch("models.hr_probability_engine.build_trajectory_features", return_value={}), \ | |
| patch("models.hr_probability_engine.compute_trajectory_adjustment", return_value={"hr_adj": 0.0}), \ | |
| patch("models.hr_probability_engine.compute_upcoming_rolling_adjustment", return_value={"rolling_hr_adjustment": 0.01, "pitcher_rolling_confidence": 0.9}): | |
| result = build_hr_probability_result( | |
| batter_statcast_df=self.batter_df, | |
| batter_name="Slugger Sam", | |
| pitcher_statcast_df=self.pitcher_df, | |
| pitcher_name="Pitcher Pete", | |
| game_row={ | |
| "venue": "Yankee Stadium", | |
| "lineup_slot": 3, | |
| "lineup_slot_source": "projected", | |
| "team_total": 4.8, | |
| "team_total_source": "projected", | |
| }, | |
| mode="pregame", | |
| ) | |
| self.assertIsNotNone(result["raw_hr_prob"]) | |
| self.assertIsNotNone(result["calibrated_hr_prob"]) | |
| self.assertAlmostEqual(result["pregame_hr_prob"], result["calibrated_hr_prob"], places=6) | |
| self.assertTrue(1.0 <= float(result["confidence_score"]) <= 100.0) | |
| self.assertEqual(result["lineup_slot_used"], 3) | |
| self.assertEqual(result["team_total_used"], 4.8) | |
| self.assertIn("opportunity", result["applied_layers"]) | |
| self.assertNotEqual(result["raw_hr_prob"], result["calibrated_hr_prob"]) | |
| self.assertEqual(result["projected_home_pitcher"], "") | |
| self.assertEqual(result["hr_model_tier"], "partial_telemetry") | |
| self.assertTrue(bool(result["modeled_row_available"])) | |
| def test_separate_batter_and_pitcher_dataframes_are_supported(self) -> None: | |
| with patch("models.hr_probability_engine.compute_pitcher_adjustment", return_value={"hr_adj": 0.01}), \ | |
| patch("models.hr_probability_engine.compute_environment_adjustment", return_value={"env_hr_boost": 0.0, "park_hr_boost": 0.0, "weather_hr_boost": 0.0}), \ | |
| patch("models.hr_probability_engine.build_trajectory_features", return_value={}), \ | |
| patch("models.hr_probability_engine.compute_trajectory_adjustment", return_value={"hr_adj": 0.0}), \ | |
| patch("models.hr_probability_engine.compute_upcoming_rolling_adjustment", return_value={"rolling_hr_adjustment": 0.0}): | |
| result = build_hr_probability_result( | |
| batter_statcast_df=self.batter_df, | |
| batter_name="Slugger Sam", | |
| pitcher_statcast_df=self.pitcher_df, | |
| pitcher_name="Pitcher Pete", | |
| mode="pregame", | |
| ) | |
| self.assertIsNotNone(result["baseline_hr_prob"]) | |
| self.assertGreater(result["adjusted_hr_prob"], result["baseline_hr_prob"]) | |
| self.assertEqual(result["pitcher_name"], "Pitcher Pete") | |
| def test_engine_tracks_projected_starter_fields_when_supplied(self) -> None: | |
| with patch("models.hr_probability_engine.compute_pitcher_adjustment", return_value={"hr_adj": 0.01}), \ | |
| patch("models.hr_probability_engine.compute_environment_adjustment", return_value={"env_hr_boost": 0.0, "park_hr_boost": 0.0, "weather_hr_boost": 0.0}), \ | |
| patch("models.hr_probability_engine.build_trajectory_features", return_value={}), \ | |
| patch("models.hr_probability_engine.compute_trajectory_adjustment", return_value={"hr_adj": 0.0}), \ | |
| patch("models.hr_probability_engine.compute_upcoming_rolling_adjustment", return_value={"rolling_hr_adjustment": 0.0}): | |
| result = build_hr_probability_result( | |
| batter_statcast_df=self.batter_df, | |
| batter_name="Slugger Sam", | |
| pitcher_statcast_df=self.pitcher_df, | |
| pitcher_name="Pitcher Pete", | |
| game_row={ | |
| "projected_home_pitcher": "Pitcher Pete", | |
| "projected_away_pitcher": "Other Arm", | |
| "projected_starter_available": True, | |
| "projected_starter_match_status": "matched_projected_home", | |
| }, | |
| mode="pregame", | |
| ) | |
| self.assertEqual(result["projected_home_pitcher"], "Pitcher Pete") | |
| self.assertEqual(result["projected_away_pitcher"], "Other Arm") | |
| self.assertTrue(bool(result["projected_starter_available"])) | |
| self.assertEqual(result["projected_starter_match_status"], "matched_projected_home") | |
| self.assertTrue(bool(result["modeled_row_available"])) | |
| if __name__ == "__main__": | |
| unittest.main() | |