2026_MLB_Model / tests /test_hr_probability_engine.py
Syntrex's picture
Promote strikeout v2 and harden telemetry models
50dc123
raw
history blame
15.4 kB
from __future__ import annotations
import os
import sys
import types
import unittest
from unittest.mock import patch
import pandas as pd
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
from models.hr_probability_engine import build_hr_probability_result
def _sample_statcast_df() -> pd.DataFrame:
batter_rows = [
{
"player_name": "Slugger Sam",
"launch_speed": 102 + i,
"launch_angle": 24 + (i % 4),
"estimated_woba_using_speedangle": 0.390,
"stand": "L",
"game_date": f"2026-03-{10 + i:02d}",
"game_pk": 100 + i,
"events": "home_run" if i % 4 == 0 else "single",
"description": "hit_into_play",
"pitch_name": "Slider",
"plate_x": 0.1,
"plate_z": 2.6,
}
for i in range(10)
]
pitcher_rows = [
{
"player_name": "Pitcher Pete",
"launch_speed": 89 + (i % 3),
"launch_angle": 14 + (i % 5),
"release_speed": 94 + (i % 2),
"release_spin_rate": 2250 + i * 5,
"release_extension": 6.1,
"pfx_x": 0.8,
"pfx_z": 1.2,
"estimated_woba_using_speedangle": 0.305,
"p_throws": "R",
"game_date": f"2026-03-{10 + i:02d}",
"game_pk": 200 + i,
"events": "field_out",
"description": "swinging_strike" if i % 3 == 0 else "called_strike",
"pitch_name": "Four-Seam Fastball",
"pitch_type": "FF",
"plate_x": 0.2,
"plate_z": 2.8,
"release_pos_x": -1.2,
"release_pos_y": 54.0,
"release_pos_z": 5.8,
"vx0": 5.0,
"vz0": -4.0,
"ax": -10.0,
"az": -20.0,
}
for i in range(12)
]
return pd.DataFrame(batter_rows + pitcher_rows)
class TestHrProbabilityEngine(unittest.TestCase):
def setUp(self) -> None:
self.statcast_df = _sample_statcast_df()
self.batter_df = self.statcast_df[
self.statcast_df["player_name"] == "Slugger Sam"
].reset_index(drop=True)
self.pitcher_df = self.statcast_df[
self.statcast_df["player_name"] == "Pitcher Pete"
].reset_index(drop=True)
def test_baseline_parity_when_context_is_absent(self) -> None:
with patch("models.hr_probability_engine.compute_pitcher_adjustment", return_value={"hr_adj": 0.0}), \
patch("models.hr_probability_engine.compute_environment_adjustment", return_value={"env_hr_boost": 0.0, "park_hr_boost": 0.0, "weather_hr_boost": 0.0}), \
patch("models.hr_probability_engine.build_trajectory_features", return_value={}), \
patch("models.hr_probability_engine.compute_trajectory_adjustment", return_value={"hr_adj": 0.0}), \
patch("models.hr_probability_engine.compute_upcoming_rolling_adjustment", return_value={"rolling_hr_adjustment": 0.0}):
result = build_hr_probability_result(
statcast_df=self.statcast_df,
batter_name="Slugger Sam",
mode="pregame",
)
self.assertIsNotNone(result["baseline_hr_prob"])
self.assertAlmostEqual(result["baseline_hr_prob"], result["adjusted_hr_prob"], places=6)
self.assertIn("live_pitch_telemetry", result["skipped_layers"])
def test_pitcher_context_moves_probability(self) -> None:
with patch("models.hr_probability_engine.compute_pitcher_adjustment", return_value={"hr_adj": 0.02}), \
patch("models.hr_probability_engine.compute_environment_adjustment", return_value={"env_hr_boost": 0.0, "park_hr_boost": 0.0, "weather_hr_boost": 0.0}), \
patch("models.hr_probability_engine.build_trajectory_features", return_value={}), \
patch("models.hr_probability_engine.compute_trajectory_adjustment", return_value={"hr_adj": 0.0}), \
patch("models.hr_probability_engine.compute_upcoming_rolling_adjustment", return_value={"rolling_hr_adjustment": 0.0}):
result = build_hr_probability_result(
statcast_df=self.statcast_df,
batter_name="Slugger Sam",
pitcher_name="Pitcher Pete",
mode="pregame",
)
self.assertGreater(result["adjusted_hr_prob"], result["baseline_hr_prob"])
self.assertAlmostEqual(result["pregame_pitcher_context_adj"], 0.02, places=6)
self.assertIn("pitcher", result["applied_layers"])
def test_low_sample_pitcher_adjustment_is_shrunk(self) -> None:
with patch("models.hr_probability_engine.build_pitcher_feature_row", return_value={"sample_size": 10, "p_throws": "R"}), \
patch("models.hr_probability_engine.compute_pitcher_adjustment", return_value={"hr_adj": 0.02}), \
patch("models.hr_probability_engine.compute_environment_adjustment", return_value={"env_hr_boost": 0.0, "park_hr_boost": 0.0, "weather_hr_boost": 0.0}), \
patch("models.hr_probability_engine.build_trajectory_features", return_value={}), \
patch("models.hr_probability_engine.compute_trajectory_adjustment", return_value={"hr_adj": 0.0}), \
patch("models.hr_probability_engine.compute_upcoming_rolling_adjustment", return_value={"rolling_hr_adjustment": 0.0, "pitcher_rolling_confidence": 0.0}):
result = build_hr_probability_result(
batter_statcast_df=self.batter_df,
batter_name="Slugger Sam",
pitcher_statcast_df=self.pitcher_df,
pitcher_name="Pitcher Pete",
mode="pregame",
)
self.assertLess(float(result["pitcher_hr_adjustment"]), 0.02)
self.assertLess(float(result["pitcher_reliability"]), 0.2)
def test_matchup_layers_are_applied_in_pregame_mode(self) -> None:
fake_batter_zone = types.SimpleNamespace(
build_batter_zone_feature_row=lambda *args, **kwargs: {}
)
fake_pitcher_zone = types.SimpleNamespace(
build_pitcher_zone_feature_row=lambda *args, **kwargs: {}
)
fake_zone_matchup = types.SimpleNamespace(
compute_zone_matchup_adjustment=lambda *args, **kwargs: {"hr_zone_boost": 0.20}
)
fake_family_store = types.SimpleNamespace(
build_batter_family_zone_feature_row=lambda *args, **kwargs: {},
build_pitcher_family_zone_feature_row=lambda *args, **kwargs: {},
)
fake_matchup_model = types.SimpleNamespace(
compute_family_zone_matchup_adjustment=lambda *args, **kwargs: {"family_zone_hr_boost": 0.10}
)
fake_batter_arsenal = types.SimpleNamespace(
build_batter_arsenal_feature_row=lambda *args, **kwargs: {}
)
fake_pitcher_arsenal = types.SimpleNamespace(
build_pitcher_arsenal_feature_row=lambda *args, **kwargs: {}
)
fake_arsenal_matchup = types.SimpleNamespace(
compute_arsenal_matchup_adjustment=lambda *args, **kwargs: {"arsenal_hr_boost": 0.10}
)
with patch("models.hr_probability_engine.compute_pitcher_adjustment", return_value={"hr_adj": 0.0}), \
patch("models.hr_probability_engine.compute_environment_adjustment", return_value={"env_hr_boost": 0.0, "park_hr_boost": 0.0, "weather_hr_boost": 0.0}), \
patch("models.hr_probability_engine.build_trajectory_features", return_value={}), \
patch("models.hr_probability_engine.compute_trajectory_adjustment", return_value={"hr_adj": 0.0}), \
patch("models.hr_probability_engine.compute_upcoming_rolling_adjustment", return_value={"rolling_hr_adjustment": 0.0}), \
patch.dict(
sys.modules,
{
"models.batter_zone_model": fake_batter_zone,
"models.pitcher_zone_model": fake_pitcher_zone,
"models.zone_matchup_model": fake_zone_matchup,
"models.family_zone_profile_store": fake_family_store,
"models.matchup_model": fake_matchup_model,
"models.batter_arsenal_model": fake_batter_arsenal,
"models.pitcher_arsenal_model": fake_pitcher_arsenal,
"models.arsenal_matchup_model": fake_arsenal_matchup,
},
):
result = build_hr_probability_result(
statcast_df=self.statcast_df,
batter_name="Slugger Sam",
pitcher_name="Pitcher Pete",
mode="pregame",
)
self.assertNotEqual(result["zone_hr_adjustment"], 0.0)
self.assertNotEqual(result["family_zone_hr_adjustment"], 0.0)
self.assertNotEqual(result["arsenal_hr_adjustment"], 0.0)
self.assertIn("zone", result["applied_layers"])
self.assertIn("family_zone", result["applied_layers"])
self.assertIn("arsenal", result["applied_layers"])
def test_environment_layers_are_recorded(self) -> None:
with patch("models.hr_probability_engine.compute_pitcher_adjustment", return_value={"hr_adj": 0.0}), \
patch("models.hr_probability_engine.compute_environment_adjustment", return_value={"env_hr_boost": 0.03, "park_hr_boost": 0.01, "weather_hr_boost": 0.02}), \
patch("models.hr_probability_engine.build_trajectory_features", return_value={}), \
patch("models.hr_probability_engine.compute_trajectory_adjustment", return_value={"hr_adj": 0.0}), \
patch("models.hr_probability_engine.compute_upcoming_rolling_adjustment", return_value={"rolling_hr_adjustment": 0.0}):
result = build_hr_probability_result(
statcast_df=self.statcast_df,
batter_name="Slugger Sam",
mode="pregame",
game_row={"venue": "Dodger Stadium"},
weather_row={"temperature_f": 88, "wind_speed_mph": 12, "wind_direction_deg": 180},
)
self.assertAlmostEqual(result["env_hr_adjustment"], 0.03, places=6)
self.assertAlmostEqual(result["park_hr_adjustment"], 0.01, places=6)
self.assertAlmostEqual(result["weather_hr_adjustment"], 0.02, places=6)
self.assertIn("environment", result["applied_layers"])
def test_engine_returns_raw_calibrated_confidence_and_opportunity_fields(self) -> None:
with patch("models.hr_probability_engine.compute_pitcher_adjustment", return_value={"hr_adj": 0.01}), \
patch("models.hr_probability_engine.compute_environment_adjustment", return_value={"env_hr_boost": 0.01, "park_hr_boost": 0.01, "weather_hr_boost": 0.0}), \
patch("models.hr_probability_engine.build_trajectory_features", return_value={}), \
patch("models.hr_probability_engine.compute_trajectory_adjustment", return_value={"hr_adj": 0.0}), \
patch("models.hr_probability_engine.compute_upcoming_rolling_adjustment", return_value={"rolling_hr_adjustment": 0.01, "pitcher_rolling_confidence": 0.9}):
result = build_hr_probability_result(
batter_statcast_df=self.batter_df,
batter_name="Slugger Sam",
pitcher_statcast_df=self.pitcher_df,
pitcher_name="Pitcher Pete",
game_row={
"venue": "Yankee Stadium",
"lineup_slot": 3,
"lineup_slot_source": "projected",
"team_total": 4.8,
"team_total_source": "projected",
},
mode="pregame",
)
self.assertIsNotNone(result["raw_hr_prob"])
self.assertIsNotNone(result["calibrated_hr_prob"])
self.assertAlmostEqual(result["pregame_hr_prob"], result["calibrated_hr_prob"], places=6)
self.assertTrue(1.0 <= float(result["confidence_score"]) <= 100.0)
self.assertEqual(result["lineup_slot_used"], 3)
self.assertEqual(result["team_total_used"], 4.8)
self.assertIn("opportunity", result["applied_layers"])
self.assertNotEqual(result["raw_hr_prob"], result["calibrated_hr_prob"])
self.assertEqual(result["projected_home_pitcher"], "")
self.assertEqual(result["hr_model_tier"], "partial_telemetry")
self.assertTrue(bool(result["modeled_row_available"]))
def test_separate_batter_and_pitcher_dataframes_are_supported(self) -> None:
with patch("models.hr_probability_engine.compute_pitcher_adjustment", return_value={"hr_adj": 0.01}), \
patch("models.hr_probability_engine.compute_environment_adjustment", return_value={"env_hr_boost": 0.0, "park_hr_boost": 0.0, "weather_hr_boost": 0.0}), \
patch("models.hr_probability_engine.build_trajectory_features", return_value={}), \
patch("models.hr_probability_engine.compute_trajectory_adjustment", return_value={"hr_adj": 0.0}), \
patch("models.hr_probability_engine.compute_upcoming_rolling_adjustment", return_value={"rolling_hr_adjustment": 0.0}):
result = build_hr_probability_result(
batter_statcast_df=self.batter_df,
batter_name="Slugger Sam",
pitcher_statcast_df=self.pitcher_df,
pitcher_name="Pitcher Pete",
mode="pregame",
)
self.assertIsNotNone(result["baseline_hr_prob"])
self.assertGreater(result["adjusted_hr_prob"], result["baseline_hr_prob"])
self.assertEqual(result["pitcher_name"], "Pitcher Pete")
def test_engine_tracks_projected_starter_fields_when_supplied(self) -> None:
with patch("models.hr_probability_engine.compute_pitcher_adjustment", return_value={"hr_adj": 0.01}), \
patch("models.hr_probability_engine.compute_environment_adjustment", return_value={"env_hr_boost": 0.0, "park_hr_boost": 0.0, "weather_hr_boost": 0.0}), \
patch("models.hr_probability_engine.build_trajectory_features", return_value={}), \
patch("models.hr_probability_engine.compute_trajectory_adjustment", return_value={"hr_adj": 0.0}), \
patch("models.hr_probability_engine.compute_upcoming_rolling_adjustment", return_value={"rolling_hr_adjustment": 0.0}):
result = build_hr_probability_result(
batter_statcast_df=self.batter_df,
batter_name="Slugger Sam",
pitcher_statcast_df=self.pitcher_df,
pitcher_name="Pitcher Pete",
game_row={
"projected_home_pitcher": "Pitcher Pete",
"projected_away_pitcher": "Other Arm",
"projected_starter_available": True,
"projected_starter_match_status": "matched_projected_home",
},
mode="pregame",
)
self.assertEqual(result["projected_home_pitcher"], "Pitcher Pete")
self.assertEqual(result["projected_away_pitcher"], "Other Arm")
self.assertTrue(bool(result["projected_starter_available"]))
self.assertEqual(result["projected_starter_match_status"], "matched_projected_home")
self.assertTrue(bool(result["modeled_row_available"]))
if __name__ == "__main__":
unittest.main()