from __future__ import annotations from datetime import datetime from io import StringIO import pandas as pd import requests from config.settings import STATCAST_SEARCH_URL HEADERS = { "User-Agent": "Mozilla/5.0", "Accept-Language": "en-US,en;q=0.9", } def _query_statcast(start_date: str, end_date: str, season: str, player_type: str = "batter") -> pd.DataFrame: params = { "all": "true", "hfPT": "", "hfAB": "", "hfBBT": "", "hfPR": "", "hfZ": "", "stadium": "", "hfBBL": "", "hfNewZones": "", "hfGT": "R|", "hfC": "", "hfSea": f"{season}|", "hfSit": "", "player_type": player_type, "hfOuts": "", "opponent": "", "pitcher_throws": "", "batter_stands": "", "hfSA": "", "game_date_gt": start_date, "game_date_lt": end_date, "team": "", "position": "", "hfRO": "", "home_road": "", "hfFlag": "", "metric_1": "", "hfInn": "", "min_pitches": "0", "min_results": "0", "group_by": "name", "sort_col": "pitches", "player_event_sort": "api_h_launch_speed", "sort_order": "desc", "min_abs": "0", "type": "details", } response = requests.get( STATCAST_SEARCH_URL, params=params, headers=HEADERS, timeout=180, ) response.raise_for_status() text = response.text.strip() if not text or text.startswith(" pd.DataFrame: """Fetch Statcast data for the given date range (MLB only). player_name = batter.""" season = str(datetime.strptime(start_date, "%Y-%m-%d").year) return _query_statcast(start_date, end_date, season=season, player_type="batter") def fetch_statcast_range_pitcher(start_date: str, end_date: str) -> pd.DataFrame: """Fetch pitcher-perspective Statcast for the given date range. player_name = pitcher.""" season = str(datetime.strptime(start_date, "%Y-%m-%d").year) return _query_statcast(start_date, end_date, season=season, player_type="pitcher") def normalize_statcast(df: pd.DataFrame) -> pd.DataFrame: if df.empty: return df rename_map = { "player_name": "player_name", "batter": "batter", "pitcher": "pitcher", "pitch_type": "pitch_type", "pitch_name": "pitch_name", "release_speed": "release_speed", "release_spin_rate": "release_spin_rate", "release_pos_x": "release_pos_x", "release_pos_z": "release_pos_z", "plate_x": "plate_x", "plate_z": "plate_z", "pfx_x": "pfx_x", "pfx_z": "pfx_z", "launch_speed": "launch_speed", "launch_angle": "launch_angle", "estimated_ba_using_speedangle": "xba", "estimated_woba_using_speedangle": "xwoba", "spray_angle": "spray_angle", "hc_x": "hc_x", "hc_y": "hc_y", "bb_type": "bb_type", "events": "events", "description": "description", "stand": "batter_stand", "p_throws": "pitcher_hand", "home_team": "home_team", "away_team": "away_team", "inning_topbot": "inning_topbot", "team": "team", "batter_team": "batter_team", "team_name": "team_name", "game_date": "game_date", "game_pk": "game_pk", "inning": "inning", "outs_when_up": "outs_when_up", "balls": "balls", "strikes": "strikes", "bat_score": "bat_score", "fld_score": "fld_score", "post_bat_score": "post_bat_score", "post_fld_score": "post_fld_score", } keep_cols = [col for col in rename_map if col in df.columns] out = df[keep_cols].copy() out = out.rename(columns={col: rename_map[col] for col in keep_cols}) if "pitcher_hand" in out.columns and "p_throws" not in out.columns: out["p_throws"] = out["pitcher_hand"] if "batter_stand" in out.columns and "stand" not in out.columns: out["stand"] = out["batter_stand"] numeric_cols = [ "release_speed", "release_spin_rate", "release_pos_x", "release_pos_z", "plate_x", "plate_z", "pfx_x", "pfx_z", "launch_speed", "launch_angle", "xba", "xwoba", "inning", "outs_when_up", "balls", "strikes", "bat_score", "fld_score", "post_bat_score", "post_fld_score", ] for col in numeric_cols: if col in out.columns: out[col] = pd.to_numeric(out[col], errors="coerce") if "game_date" in out.columns: out["game_date"] = pd.to_datetime(out["game_date"], errors="coerce") return out