Spaces:
Running
Running
| from __future__ import annotations | |
| from datetime import datetime | |
| from io import StringIO | |
| import pandas as pd | |
| import requests | |
| from config.settings import STATCAST_SEARCH_URL | |
| HEADERS = { | |
| "User-Agent": "Mozilla/5.0", | |
| "Accept-Language": "en-US,en;q=0.9", | |
| } | |
| def _query_statcast(start_date: str, end_date: str, season: str, player_type: str = "batter") -> pd.DataFrame: | |
| params = { | |
| "all": "true", | |
| "hfPT": "", | |
| "hfAB": "", | |
| "hfBBT": "", | |
| "hfPR": "", | |
| "hfZ": "", | |
| "stadium": "", | |
| "hfBBL": "", | |
| "hfNewZones": "", | |
| "hfGT": "R|", | |
| "hfC": "", | |
| "hfSea": f"{season}|", | |
| "hfSit": "", | |
| "player_type": player_type, | |
| "hfOuts": "", | |
| "opponent": "", | |
| "pitcher_throws": "", | |
| "batter_stands": "", | |
| "hfSA": "", | |
| "game_date_gt": start_date, | |
| "game_date_lt": end_date, | |
| "team": "", | |
| "position": "", | |
| "hfRO": "", | |
| "home_road": "", | |
| "hfFlag": "", | |
| "metric_1": "", | |
| "hfInn": "", | |
| "min_pitches": "0", | |
| "min_results": "0", | |
| "group_by": "name", | |
| "sort_col": "pitches", | |
| "player_event_sort": "api_h_launch_speed", | |
| "sort_order": "desc", | |
| "min_abs": "0", | |
| "type": "details", | |
| } | |
| response = requests.get( | |
| STATCAST_SEARCH_URL, | |
| params=params, | |
| headers=HEADERS, | |
| timeout=180, | |
| ) | |
| response.raise_for_status() | |
| text = response.text.strip() | |
| if not text or text.startswith("<!DOCTYPE html"): | |
| return pd.DataFrame() | |
| try: | |
| return pd.read_csv(StringIO(text)) | |
| except Exception: | |
| return pd.DataFrame() | |
| def fetch_statcast_range(start_date: str, end_date: str) -> pd.DataFrame: | |
| """Fetch Statcast data for the given date range (MLB only). player_name = batter.""" | |
| season = str(datetime.strptime(start_date, "%Y-%m-%d").year) | |
| return _query_statcast(start_date, end_date, season=season, player_type="batter") | |
| def fetch_statcast_range_pitcher(start_date: str, end_date: str) -> pd.DataFrame: | |
| """Fetch pitcher-perspective Statcast for the given date range. player_name = pitcher.""" | |
| season = str(datetime.strptime(start_date, "%Y-%m-%d").year) | |
| return _query_statcast(start_date, end_date, season=season, player_type="pitcher") | |
| def normalize_statcast(df: pd.DataFrame) -> pd.DataFrame: | |
| if df.empty: | |
| return df | |
| rename_map = { | |
| "player_name": "player_name", | |
| "batter": "batter", | |
| "pitcher": "pitcher", | |
| "pitch_type": "pitch_type", | |
| "pitch_name": "pitch_name", | |
| "release_speed": "release_speed", | |
| "release_spin_rate": "release_spin_rate", | |
| "release_pos_x": "release_pos_x", | |
| "release_pos_z": "release_pos_z", | |
| "plate_x": "plate_x", | |
| "plate_z": "plate_z", | |
| "pfx_x": "pfx_x", | |
| "pfx_z": "pfx_z", | |
| "launch_speed": "launch_speed", | |
| "launch_angle": "launch_angle", | |
| "estimated_ba_using_speedangle": "xba", | |
| "estimated_woba_using_speedangle": "xwoba", | |
| "spray_angle": "spray_angle", | |
| "hc_x": "hc_x", | |
| "hc_y": "hc_y", | |
| "bb_type": "bb_type", | |
| "events": "events", | |
| "description": "description", | |
| "stand": "batter_stand", | |
| "p_throws": "pitcher_hand", | |
| "home_team": "home_team", | |
| "away_team": "away_team", | |
| "inning_topbot": "inning_topbot", | |
| "team": "team", | |
| "batter_team": "batter_team", | |
| "team_name": "team_name", | |
| "game_date": "game_date", | |
| "game_pk": "game_pk", | |
| "inning": "inning", | |
| "outs_when_up": "outs_when_up", | |
| "balls": "balls", | |
| "strikes": "strikes", | |
| "bat_score": "bat_score", | |
| "fld_score": "fld_score", | |
| "post_bat_score": "post_bat_score", | |
| "post_fld_score": "post_fld_score", | |
| } | |
| keep_cols = [col for col in rename_map if col in df.columns] | |
| out = df[keep_cols].copy() | |
| out = out.rename(columns={col: rename_map[col] for col in keep_cols}) | |
| if "pitcher_hand" in out.columns and "p_throws" not in out.columns: | |
| out["p_throws"] = out["pitcher_hand"] | |
| if "batter_stand" in out.columns and "stand" not in out.columns: | |
| out["stand"] = out["batter_stand"] | |
| numeric_cols = [ | |
| "release_speed", | |
| "release_spin_rate", | |
| "release_pos_x", | |
| "release_pos_z", | |
| "plate_x", | |
| "plate_z", | |
| "pfx_x", | |
| "pfx_z", | |
| "launch_speed", | |
| "launch_angle", | |
| "xba", | |
| "xwoba", | |
| "inning", | |
| "outs_when_up", | |
| "balls", | |
| "strikes", | |
| "bat_score", | |
| "fld_score", | |
| "post_bat_score", | |
| "post_fld_score", | |
| ] | |
| for col in numeric_cols: | |
| if col in out.columns: | |
| out[col] = pd.to_numeric(out[col], errors="coerce") | |
| if "game_date" in out.columns: | |
| out["game_date"] = pd.to_datetime(out["game_date"], errors="coerce") | |
| return out | |