from __future__ import annotations import os import sys import types import unittest from unittest.mock import patch import pandas as pd sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) from models.hr_probability_engine import build_hr_probability_result def _sample_statcast_df() -> pd.DataFrame: batter_rows = [ { "player_name": "Slugger Sam", "launch_speed": 102 + i, "launch_angle": 24 + (i % 4), "estimated_woba_using_speedangle": 0.390, "stand": "L", "game_date": f"2026-03-{10 + i:02d}", "game_pk": 100 + i, "events": "home_run" if i % 4 == 0 else "single", "description": "hit_into_play", "pitch_name": "Slider", "plate_x": 0.1, "plate_z": 2.6, } for i in range(10) ] pitcher_rows = [ { "player_name": "Pitcher Pete", "launch_speed": 89 + (i % 3), "launch_angle": 14 + (i % 5), "release_speed": 94 + (i % 2), "release_spin_rate": 2250 + i * 5, "release_extension": 6.1, "pfx_x": 0.8, "pfx_z": 1.2, "estimated_woba_using_speedangle": 0.305, "p_throws": "R", "game_date": f"2026-03-{10 + i:02d}", "game_pk": 200 + i, "events": "field_out", "description": "swinging_strike" if i % 3 == 0 else "called_strike", "pitch_name": "Four-Seam Fastball", "pitch_type": "FF", "plate_x": 0.2, "plate_z": 2.8, "release_pos_x": -1.2, "release_pos_y": 54.0, "release_pos_z": 5.8, "vx0": 5.0, "vz0": -4.0, "ax": -10.0, "az": -20.0, } for i in range(12) ] return pd.DataFrame(batter_rows + pitcher_rows) class TestHrProbabilityEngine(unittest.TestCase): def setUp(self) -> None: self.statcast_df = _sample_statcast_df() self.batter_df = self.statcast_df[ self.statcast_df["player_name"] == "Slugger Sam" ].reset_index(drop=True) self.pitcher_df = self.statcast_df[ self.statcast_df["player_name"] == "Pitcher Pete" ].reset_index(drop=True) def test_baseline_parity_when_context_is_absent(self) -> None: with patch("models.hr_probability_engine.compute_pitcher_adjustment", return_value={"hr_adj": 0.0}), \ patch("models.hr_probability_engine.compute_environment_adjustment", return_value={"env_hr_boost": 0.0, "park_hr_boost": 0.0, "weather_hr_boost": 0.0}), \ patch("models.hr_probability_engine.build_trajectory_features", return_value={}), \ patch("models.hr_probability_engine.compute_trajectory_adjustment", return_value={"hr_adj": 0.0}), \ patch("models.hr_probability_engine.compute_upcoming_rolling_adjustment", return_value={"rolling_hr_adjustment": 0.0}): result = build_hr_probability_result( statcast_df=self.statcast_df, batter_name="Slugger Sam", mode="pregame", ) self.assertIsNotNone(result["baseline_hr_prob"]) self.assertAlmostEqual(result["baseline_hr_prob"], result["adjusted_hr_prob"], places=6) self.assertIn("live_pitch_telemetry", result["skipped_layers"]) def test_pitcher_context_moves_probability(self) -> None: with patch("models.hr_probability_engine.compute_pitcher_adjustment", return_value={"hr_adj": 0.02}), \ patch("models.hr_probability_engine.compute_environment_adjustment", return_value={"env_hr_boost": 0.0, "park_hr_boost": 0.0, "weather_hr_boost": 0.0}), \ patch("models.hr_probability_engine.build_trajectory_features", return_value={}), \ patch("models.hr_probability_engine.compute_trajectory_adjustment", return_value={"hr_adj": 0.0}), \ patch("models.hr_probability_engine.compute_upcoming_rolling_adjustment", return_value={"rolling_hr_adjustment": 0.0}): result = build_hr_probability_result( statcast_df=self.statcast_df, batter_name="Slugger Sam", pitcher_name="Pitcher Pete", mode="pregame", ) self.assertGreater(result["adjusted_hr_prob"], result["baseline_hr_prob"]) self.assertAlmostEqual(result["pregame_pitcher_context_adj"], 0.02, places=6) self.assertIn("pitcher", result["applied_layers"]) def test_low_sample_pitcher_adjustment_is_shrunk(self) -> None: with patch("models.hr_probability_engine.build_pitcher_feature_row", return_value={"sample_size": 10, "p_throws": "R"}), \ patch("models.hr_probability_engine.compute_pitcher_adjustment", return_value={"hr_adj": 0.02}), \ patch("models.hr_probability_engine.compute_environment_adjustment", return_value={"env_hr_boost": 0.0, "park_hr_boost": 0.0, "weather_hr_boost": 0.0}), \ patch("models.hr_probability_engine.build_trajectory_features", return_value={}), \ patch("models.hr_probability_engine.compute_trajectory_adjustment", return_value={"hr_adj": 0.0}), \ patch("models.hr_probability_engine.compute_upcoming_rolling_adjustment", return_value={"rolling_hr_adjustment": 0.0, "pitcher_rolling_confidence": 0.0}): result = build_hr_probability_result( batter_statcast_df=self.batter_df, batter_name="Slugger Sam", pitcher_statcast_df=self.pitcher_df, pitcher_name="Pitcher Pete", mode="pregame", ) self.assertLess(float(result["pitcher_hr_adjustment"]), 0.02) self.assertLess(float(result["pitcher_reliability"]), 0.2) def test_matchup_layers_are_applied_in_pregame_mode(self) -> None: fake_batter_zone = types.SimpleNamespace( build_batter_zone_feature_row=lambda *args, **kwargs: {} ) fake_pitcher_zone = types.SimpleNamespace( build_pitcher_zone_feature_row=lambda *args, **kwargs: {} ) fake_zone_matchup = types.SimpleNamespace( compute_zone_matchup_adjustment=lambda *args, **kwargs: {"hr_zone_boost": 0.20} ) fake_family_store = types.SimpleNamespace( build_batter_family_zone_feature_row=lambda *args, **kwargs: {}, build_pitcher_family_zone_feature_row=lambda *args, **kwargs: {}, ) fake_matchup_model = types.SimpleNamespace( compute_family_zone_matchup_adjustment=lambda *args, **kwargs: {"family_zone_hr_boost": 0.10} ) fake_batter_arsenal = types.SimpleNamespace( build_batter_arsenal_feature_row=lambda *args, **kwargs: {} ) fake_pitcher_arsenal = types.SimpleNamespace( build_pitcher_arsenal_feature_row=lambda *args, **kwargs: {} ) fake_arsenal_matchup = types.SimpleNamespace( compute_arsenal_matchup_adjustment=lambda *args, **kwargs: {"arsenal_hr_boost": 0.10} ) with patch("models.hr_probability_engine.compute_pitcher_adjustment", return_value={"hr_adj": 0.0}), \ patch("models.hr_probability_engine.compute_environment_adjustment", return_value={"env_hr_boost": 0.0, "park_hr_boost": 0.0, "weather_hr_boost": 0.0}), \ patch("models.hr_probability_engine.build_trajectory_features", return_value={}), \ patch("models.hr_probability_engine.compute_trajectory_adjustment", return_value={"hr_adj": 0.0}), \ patch("models.hr_probability_engine.compute_upcoming_rolling_adjustment", return_value={"rolling_hr_adjustment": 0.0}), \ patch.dict( sys.modules, { "models.batter_zone_model": fake_batter_zone, "models.pitcher_zone_model": fake_pitcher_zone, "models.zone_matchup_model": fake_zone_matchup, "models.family_zone_profile_store": fake_family_store, "models.matchup_model": fake_matchup_model, "models.batter_arsenal_model": fake_batter_arsenal, "models.pitcher_arsenal_model": fake_pitcher_arsenal, "models.arsenal_matchup_model": fake_arsenal_matchup, }, ): result = build_hr_probability_result( statcast_df=self.statcast_df, batter_name="Slugger Sam", pitcher_name="Pitcher Pete", mode="pregame", ) self.assertNotEqual(result["zone_hr_adjustment"], 0.0) self.assertNotEqual(result["family_zone_hr_adjustment"], 0.0) self.assertNotEqual(result["arsenal_hr_adjustment"], 0.0) self.assertIn("zone", result["applied_layers"]) self.assertIn("family_zone", result["applied_layers"]) self.assertIn("arsenal", result["applied_layers"]) def test_environment_layers_are_recorded(self) -> None: with patch("models.hr_probability_engine.compute_pitcher_adjustment", return_value={"hr_adj": 0.0}), \ patch("models.hr_probability_engine.compute_environment_adjustment", return_value={"env_hr_boost": 0.03, "park_hr_boost": 0.01, "weather_hr_boost": 0.02}), \ patch("models.hr_probability_engine.build_trajectory_features", return_value={}), \ patch("models.hr_probability_engine.compute_trajectory_adjustment", return_value={"hr_adj": 0.0}), \ patch("models.hr_probability_engine.compute_upcoming_rolling_adjustment", return_value={"rolling_hr_adjustment": 0.0}): result = build_hr_probability_result( statcast_df=self.statcast_df, batter_name="Slugger Sam", mode="pregame", game_row={"venue": "Dodger Stadium"}, weather_row={"temperature_f": 88, "wind_speed_mph": 12, "wind_direction_deg": 180}, ) self.assertAlmostEqual(result["env_hr_adjustment"], 0.03, places=6) self.assertAlmostEqual(result["park_hr_adjustment"], 0.01, places=6) self.assertAlmostEqual(result["weather_hr_adjustment"], 0.02, places=6) self.assertIn("environment", result["applied_layers"]) def test_engine_returns_raw_calibrated_confidence_and_opportunity_fields(self) -> None: with patch("models.hr_probability_engine.compute_pitcher_adjustment", return_value={"hr_adj": 0.01}), \ patch("models.hr_probability_engine.compute_environment_adjustment", return_value={"env_hr_boost": 0.01, "park_hr_boost": 0.01, "weather_hr_boost": 0.0}), \ patch("models.hr_probability_engine.build_trajectory_features", return_value={}), \ patch("models.hr_probability_engine.compute_trajectory_adjustment", return_value={"hr_adj": 0.0}), \ patch("models.hr_probability_engine.compute_upcoming_rolling_adjustment", return_value={"rolling_hr_adjustment": 0.01, "pitcher_rolling_confidence": 0.9}): result = build_hr_probability_result( batter_statcast_df=self.batter_df, batter_name="Slugger Sam", pitcher_statcast_df=self.pitcher_df, pitcher_name="Pitcher Pete", game_row={ "venue": "Yankee Stadium", "lineup_slot": 3, "lineup_slot_source": "projected", "team_total": 4.8, "team_total_source": "projected", }, mode="pregame", ) self.assertIsNotNone(result["raw_hr_prob"]) self.assertIsNotNone(result["calibrated_hr_prob"]) self.assertAlmostEqual(result["pregame_hr_prob"], result["calibrated_hr_prob"], places=6) self.assertTrue(1.0 <= float(result["confidence_score"]) <= 100.0) self.assertEqual(result["lineup_slot_used"], 3) self.assertEqual(result["team_total_used"], 4.8) self.assertIn("opportunity", result["applied_layers"]) self.assertNotEqual(result["raw_hr_prob"], result["calibrated_hr_prob"]) self.assertEqual(result["projected_home_pitcher"], "") self.assertEqual(result["hr_model_tier"], "partial_telemetry") self.assertTrue(bool(result["modeled_row_available"])) def test_separate_batter_and_pitcher_dataframes_are_supported(self) -> None: with patch("models.hr_probability_engine.compute_pitcher_adjustment", return_value={"hr_adj": 0.01}), \ patch("models.hr_probability_engine.compute_environment_adjustment", return_value={"env_hr_boost": 0.0, "park_hr_boost": 0.0, "weather_hr_boost": 0.0}), \ patch("models.hr_probability_engine.build_trajectory_features", return_value={}), \ patch("models.hr_probability_engine.compute_trajectory_adjustment", return_value={"hr_adj": 0.0}), \ patch("models.hr_probability_engine.compute_upcoming_rolling_adjustment", return_value={"rolling_hr_adjustment": 0.0}): result = build_hr_probability_result( batter_statcast_df=self.batter_df, batter_name="Slugger Sam", pitcher_statcast_df=self.pitcher_df, pitcher_name="Pitcher Pete", mode="pregame", ) self.assertIsNotNone(result["baseline_hr_prob"]) self.assertGreater(result["adjusted_hr_prob"], result["baseline_hr_prob"]) self.assertEqual(result["pitcher_name"], "Pitcher Pete") def test_engine_tracks_projected_starter_fields_when_supplied(self) -> None: with patch("models.hr_probability_engine.compute_pitcher_adjustment", return_value={"hr_adj": 0.01}), \ patch("models.hr_probability_engine.compute_environment_adjustment", return_value={"env_hr_boost": 0.0, "park_hr_boost": 0.0, "weather_hr_boost": 0.0}), \ patch("models.hr_probability_engine.build_trajectory_features", return_value={}), \ patch("models.hr_probability_engine.compute_trajectory_adjustment", return_value={"hr_adj": 0.0}), \ patch("models.hr_probability_engine.compute_upcoming_rolling_adjustment", return_value={"rolling_hr_adjustment": 0.0}): result = build_hr_probability_result( batter_statcast_df=self.batter_df, batter_name="Slugger Sam", pitcher_statcast_df=self.pitcher_df, pitcher_name="Pitcher Pete", game_row={ "projected_home_pitcher": "Pitcher Pete", "projected_away_pitcher": "Other Arm", "projected_starter_available": True, "projected_starter_match_status": "matched_projected_home", }, mode="pregame", ) self.assertEqual(result["projected_home_pitcher"], "Pitcher Pete") self.assertEqual(result["projected_away_pitcher"], "Other Arm") self.assertTrue(bool(result["projected_starter_available"])) self.assertEqual(result["projected_starter_match_status"], "matched_projected_home") self.assertTrue(bool(result["modeled_row_available"])) if __name__ == "__main__": unittest.main()