Spaces:
Running
Running
File size: 5,024 Bytes
1c23846 dca8dd7 1c23846 d563c3d 37fe240 cb8aa92 c5f09df cb8aa92 37fe240 cb8aa92 c5f09df cb8aa92 d144d29 37fe240 dca8dd7 37fe240 d144d29 1c23846 c4ae8c3 1c23846 d144d29 1c23846 d563c3d d144d29 1c23846 d563c3d 70995df d563c3d 1c23846 d563c3d c4ae8c3 d144d29 d563c3d d144d29 1c23846 d563c3d 1c23846 d563c3d c4ae8c3 d563c3d d144d29 d563c3d d144d29 d563c3d d144d29 c4ae8c3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 | from __future__ import annotations
from datetime import datetime
from io import StringIO
import pandas as pd
import requests
from config.settings import STATCAST_SEARCH_URL
HEADERS = {
"User-Agent": "Mozilla/5.0",
"Accept-Language": "en-US,en;q=0.9",
}
def _query_statcast(start_date: str, end_date: str, season: str, player_type: str = "batter") -> pd.DataFrame:
params = {
"all": "true",
"hfPT": "",
"hfAB": "",
"hfBBT": "",
"hfPR": "",
"hfZ": "",
"stadium": "",
"hfBBL": "",
"hfNewZones": "",
"hfGT": "R|",
"hfC": "",
"hfSea": f"{season}|",
"hfSit": "",
"player_type": player_type,
"hfOuts": "",
"opponent": "",
"pitcher_throws": "",
"batter_stands": "",
"hfSA": "",
"game_date_gt": start_date,
"game_date_lt": end_date,
"team": "",
"position": "",
"hfRO": "",
"home_road": "",
"hfFlag": "",
"metric_1": "",
"hfInn": "",
"min_pitches": "0",
"min_results": "0",
"group_by": "name",
"sort_col": "pitches",
"player_event_sort": "api_h_launch_speed",
"sort_order": "desc",
"min_abs": "0",
"type": "details",
}
response = requests.get(
STATCAST_SEARCH_URL,
params=params,
headers=HEADERS,
timeout=180,
)
response.raise_for_status()
text = response.text.strip()
if not text or text.startswith("<!DOCTYPE html"):
return pd.DataFrame()
try:
return pd.read_csv(StringIO(text))
except Exception:
return pd.DataFrame()
def fetch_statcast_range(start_date: str, end_date: str) -> pd.DataFrame:
"""Fetch Statcast data for the given date range (MLB only). player_name = batter."""
season = str(datetime.strptime(start_date, "%Y-%m-%d").year)
return _query_statcast(start_date, end_date, season=season, player_type="batter")
def fetch_statcast_range_pitcher(start_date: str, end_date: str) -> pd.DataFrame:
"""Fetch pitcher-perspective Statcast for the given date range. player_name = pitcher."""
season = str(datetime.strptime(start_date, "%Y-%m-%d").year)
return _query_statcast(start_date, end_date, season=season, player_type="pitcher")
def normalize_statcast(df: pd.DataFrame) -> pd.DataFrame:
if df.empty:
return df
rename_map = {
"player_name": "player_name",
"batter": "batter",
"pitcher": "pitcher",
"pitch_type": "pitch_type",
"pitch_name": "pitch_name",
"release_speed": "release_speed",
"release_spin_rate": "release_spin_rate",
"release_pos_x": "release_pos_x",
"release_pos_z": "release_pos_z",
"plate_x": "plate_x",
"plate_z": "plate_z",
"pfx_x": "pfx_x",
"pfx_z": "pfx_z",
"launch_speed": "launch_speed",
"launch_angle": "launch_angle",
"estimated_ba_using_speedangle": "xba",
"estimated_woba_using_speedangle": "xwoba",
"spray_angle": "spray_angle",
"hc_x": "hc_x",
"hc_y": "hc_y",
"bb_type": "bb_type",
"events": "events",
"description": "description",
"stand": "batter_stand",
"p_throws": "pitcher_hand",
"home_team": "home_team",
"away_team": "away_team",
"inning_topbot": "inning_topbot",
"team": "team",
"batter_team": "batter_team",
"team_name": "team_name",
"game_date": "game_date",
"game_pk": "game_pk",
"inning": "inning",
"outs_when_up": "outs_when_up",
"balls": "balls",
"strikes": "strikes",
"bat_score": "bat_score",
"fld_score": "fld_score",
"post_bat_score": "post_bat_score",
"post_fld_score": "post_fld_score",
}
keep_cols = [col for col in rename_map if col in df.columns]
out = df[keep_cols].copy()
out = out.rename(columns={col: rename_map[col] for col in keep_cols})
if "pitcher_hand" in out.columns and "p_throws" not in out.columns:
out["p_throws"] = out["pitcher_hand"]
if "batter_stand" in out.columns and "stand" not in out.columns:
out["stand"] = out["batter_stand"]
numeric_cols = [
"release_speed",
"release_spin_rate",
"release_pos_x",
"release_pos_z",
"plate_x",
"plate_z",
"pfx_x",
"pfx_z",
"launch_speed",
"launch_angle",
"xba",
"xwoba",
"inning",
"outs_when_up",
"balls",
"strikes",
"bat_score",
"fld_score",
"post_bat_score",
"post_fld_score",
]
for col in numeric_cols:
if col in out.columns:
out[col] = pd.to_numeric(out[col], errors="coerce")
if "game_date" in out.columns:
out["game_date"] = pd.to_datetime(out["game_date"], errors="coerce")
return out
|