| import gradio as gr |
| import requests |
| import re |
| import unicodedata |
| from datetime import datetime |
| from zoneinfo import ZoneInfo |
| from playwright.sync_api import sync_playwright |
|
|
| MLB_SCHEDULE_URL = "https://statsapi.mlb.com/api/v1/schedule" |
| MLB_LIVE_URL = "https://statsapi.mlb.com/api/v1.1/game/{game_pk}/feed/live" |
|
|
| SAVANT_PREVIEW_URL = ( |
| "https://baseballsavant.mlb.com/preview" |
| "?game_pk={game_pk}&game_date={game_date}&date={game_date}" |
| ) |
|
|
| SAVANT_HITTER_X_URL = ( |
| "https://baseballsavant.mlb.com/savant-player/" |
| "{slug}?stats=statcast-r-hitting-mlb" |
| ) |
|
|
| SAVANT_HITTER_SPLIT_URL = ( |
| "https://baseballsavant.mlb.com/savant-player/" |
| "{slug}?stats=splits-r-hitting-mlb&season={season}" |
| ) |
|
|
| SAVANT_PITCHER_X_URL = ( |
| "https://baseballsavant.mlb.com/savant-player/" |
| "{slug}?stats=statcast-r-pitching-mlb" |
| ) |
|
|
| HITTER_X_TABLE_SELECTOR = "#statcast_glance_batter > table" |
| HITTER_PLATOON_TABLE_SELECTOR = "#date-platoon-mlb > table" |
| PITCHER_X_TABLE_SELECTOR = "#statcast_stats_pitching > table" |
|
|
| PA_THRESHOLD_HITTER = 50 |
| PITCH_THRESHOLD_PITCHER = 500 |
|
|
|
|
| def today_pacific() -> str: |
| return datetime.now(ZoneInfo("America/Los_Angeles")).strftime("%Y-%m-%d") |
|
|
|
|
| def normalize_name(name: str) -> str: |
| if not name: |
| return "" |
| name = unicodedata.normalize("NFKD", name) |
| name = "".join(ch for ch in name if not unicodedata.combining(ch)) |
| name = name.lower().strip() |
| name = re.sub(r"[^\w\s]", "", name) |
| suffixes = {"jr", "sr", "ii", "iii", "iv", "v"} |
| parts = [p for p in name.split() if p not in suffixes] |
| return " ".join(parts) |
|
|
|
|
| def get_json(url: str, params=None) -> dict: |
| r = requests.get(url, params=params, timeout=25) |
| r.raise_for_status() |
| return r.json() |
|
|
|
|
| def safe_int(text: str) -> int: |
| text = (text or "").strip().replace(",", "") |
| try: |
| return int(text) |
| except Exception: |
| return 0 |
|
|
|
|
| def safe_float(text: str) -> float: |
| text = (text or "").strip().replace("%", "").replace(",", "") |
| try: |
| return float(text) |
| except Exception: |
| return 0.0 |
|
|
|
|
| def get_games(): |
| date_str = today_pacific() |
| data = get_json(MLB_SCHEDULE_URL, params={"sportId": 1, "date": date_str}) |
|
|
| games = [] |
| for d in data.get("dates", []): |
| for g in d.get("games", []): |
| game_pk = g.get("gamePk") |
| away = g.get("teams", {}).get("away", {}).get("team", {}).get("name", "Away") |
| home = g.get("teams", {}).get("home", {}).get("team", {}).get("name", "Home") |
| status = g.get("status", {}).get("detailedState", "") |
| label = f"{away} @ {home} | {status}" |
| games.append((label, game_pk)) |
|
|
| if not games: |
| return gr.update(choices=[], value=None) |
|
|
| return gr.update(choices=games, value=games[0][1]) |
|
|
|
|
| def fetch_game_players(game_pk: int): |
| data = get_json(MLB_LIVE_URL.format(game_pk=game_pk)) |
|
|
| game_data = data.get("gameData", {}) |
| live_data = data.get("liveData", {}) |
| box_teams = live_data.get("boxscore", {}).get("teams", {}) |
| players_meta = game_data.get("players", {}) |
| probable = game_data.get("probablePitchers", {}) |
|
|
| away_team = game_data.get("teams", {}).get("away", {}).get("name", "Away") |
| home_team = game_data.get("teams", {}).get("home", {}).get("name", "Home") |
|
|
| players = [] |
|
|
| for side in ["away", "home"]: |
| team_name = away_team if side == "away" else home_team |
|
|
| team_box = box_teams.get(side, {}) |
| batter_ids = team_box.get("batters", []) |
| players_box = team_box.get("players", {}) |
|
|
| for idx, pid in enumerate(batter_ids[:9], start=1): |
| player = players_box.get(f"ID{pid}", {}) |
| name = player.get("person", {}).get("fullName", "") |
| hand = players_meta.get(f"ID{pid}", {}).get("batSide", {}).get("code", "") |
| players.append( |
| { |
| "type": "hitter", |
| "order": idx, |
| "side": side, |
| "team": team_name, |
| "player_id": pid, |
| "name": name, |
| "hand": hand, |
| "norm_name": normalize_name(name), |
| } |
| ) |
|
|
| p = probable.get(side, {}) |
| pid = p.get("id") |
| name = p.get("fullName", "") |
| hand = players_meta.get(f"ID{pid}", {}).get("pitchHand", {}).get("code", "") if pid else "" |
| if name and pid: |
| players.append( |
| { |
| "type": "pitcher", |
| "order": None, |
| "side": side, |
| "team": team_name, |
| "player_id": pid, |
| "name": name, |
| "hand": hand, |
| "norm_name": normalize_name(name), |
| } |
| ) |
|
|
| return players, {"away_team": away_team, "home_team": home_team} |
|
|
|
|
| def scrape_savant_player_links_for_game(page, game_pk: int, game_date: str): |
| preview_url = SAVANT_PREVIEW_URL.format(game_pk=game_pk, game_date=game_date) |
|
|
| page.goto(preview_url, wait_until="domcontentloaded", timeout=60000) |
| page.wait_for_timeout(5000) |
|
|
| links = page.locator('a[href*="/savant-player/"]') |
| link_count = links.count() |
|
|
| rows = [] |
| seen = set() |
|
|
| for i in range(link_count): |
| href = links.nth(i).get_attribute("href") or "" |
| name = links.nth(i).inner_text().strip() |
| if not name: |
| continue |
|
|
| m = re.search(r"/savant-player/([a-z0-9-]+)", href) |
| if not m: |
| continue |
|
|
| slug = m.group(1) |
| norm = normalize_name(name) |
| if norm in seen: |
| continue |
|
|
| seen.add(norm) |
| rows.append( |
| { |
| "name": name, |
| "norm_name": norm, |
| "slug": slug, |
| } |
| ) |
|
|
| return rows |
|
|
|
|
| def match_players_to_savant(players, savant_rows): |
| savant_map = {r["norm_name"]: r for r in savant_rows} |
| matched = [] |
|
|
| for player in players: |
| savant = savant_map.get(player["norm_name"]) |
| row = dict(player) |
| if savant: |
| row["matched"] = True |
| row["savant_slug"] = savant["slug"] |
| else: |
| row["matched"] = False |
| row["savant_slug"] = None |
| matched.append(row) |
|
|
| return matched |
|
|
|
|
| def parse_hitter_x_stats_year_table(page): |
| page.wait_for_selector(HITTER_X_TABLE_SELECTOR, timeout=30000) |
| page.wait_for_timeout(800) |
|
|
| rows = page.locator(f"{HITTER_X_TABLE_SELECTOR} tbody tr") |
| out = {} |
|
|
| for i in range(rows.count()): |
| row = rows.nth(i) |
| cells = row.locator("td") |
| if cells.count() < 19: |
| continue |
|
|
| vals = [cells.nth(c).inner_text().strip() for c in range(cells.count())] |
| if not vals[0].isdigit(): |
| continue |
|
|
| yr = int(vals[0]) |
| out[yr] = { |
| "season": yr, |
| "pa": safe_int(vals[2]), |
| "xwoba": safe_float(vals[14]), |
| "xba": safe_float(vals[11]), |
| "xslg": safe_float(vals[12]), |
| "hard_hit_pct": safe_float(vals[16]), |
| "barrel_pct": safe_float(vals[5]), |
| "k_pct": safe_float(vals[17]), |
| "bb_pct": safe_float(vals[18]), |
| } |
|
|
| return out |
|
|
|
|
| def parse_hitter_platoon_rows(page, season: int): |
| page.wait_for_selector(HITTER_PLATOON_TABLE_SELECTOR, timeout=30000) |
| page.wait_for_timeout(800) |
|
|
| rows = page.locator(f"{HITTER_PLATOON_TABLE_SELECTOR} tbody tr") |
| out = {} |
|
|
| for i in range(min(rows.count(), 2)): |
| row = rows.nth(i) |
| cells = row.locator("td") |
| if cells.count() < 20: |
| continue |
|
|
| split_label = cells.nth(2).inner_text().strip() |
| split_key = "vs_L" if "Left" in split_label else "vs_R" |
|
|
| out[split_key] = { |
| "season": season, |
| "split": split_label, |
| "pa": safe_int(cells.nth(3).inner_text()), |
| "ops": safe_float(cells.nth(19).inner_text()), |
| } |
|
|
| return out |
|
|
|
|
| def combine_hitter_split_rows(row_2026, row_2025): |
| if not row_2026 and not row_2025: |
| return {} |
|
|
| if row_2026 and row_2026.get("pa", 0) >= PA_THRESHOLD_HITTER: |
| out = dict(row_2026) |
| out["source"] = "2026_only" |
| return out |
|
|
| if not row_2025: |
| out = dict(row_2026) if row_2026 else {} |
| if out: |
| out["source"] = "2026_only_no_2025" |
| return out |
|
|
| pa_26 = row_2026.get("pa", 0) if row_2026 else 0 |
| pa_25 = row_2025.get("pa", 0) |
| total = pa_26 + pa_25 |
| if total == 0: |
| return {} |
|
|
| def weighted(key): |
| v26 = row_2026.get(key, 0.0) if row_2026 else 0.0 |
| v25 = row_2025.get(key, 0.0) |
| return ((v26 * pa_26) + (v25 * pa_25)) / total |
|
|
| return { |
| "split": row_2026["split"] if row_2026 else row_2025["split"], |
| "pa_2026": pa_26, |
| "pa_2025": pa_25, |
| "pa_total_used": total, |
| "ops": weighted("ops"), |
| "source": "2026_plus_2025", |
| } |
|
|
|
|
| def blend_hitter_overall_xstats(x2026, x2025): |
| if not x2026 and not x2025: |
| return {} |
|
|
| if x2026 and x2026.get("pa", 0) >= PA_THRESHOLD_HITTER: |
| out = dict(x2026) |
| out["source"] = "2026_only" |
| return out |
|
|
| if not x2025: |
| out = dict(x2026) if x2026 else {} |
| if out: |
| out["source"] = "2026_only_no_2025" |
| return out |
|
|
| pa_26 = x2026.get("pa", 0) if x2026 else 0 |
| pa_25 = x2025.get("pa", 0) |
| total = pa_26 + pa_25 |
|
|
| def weighted(key): |
| v26 = x2026.get(key, 0.0) if x2026 else 0.0 |
| v25 = x2025.get(key, 0.0) |
| return ((v26 * pa_26) + (v25 * pa_25)) / total |
|
|
| return { |
| "pa_2026": pa_26, |
| "pa_2025": pa_25, |
| "pa_total_used": total, |
| "xwoba": weighted("xwoba"), |
| "xba": weighted("xba"), |
| "xslg": weighted("xslg"), |
| "hard_hit_pct": weighted("hard_hit_pct"), |
| "barrel_pct": weighted("barrel_pct"), |
| "k_pct": weighted("k_pct"), |
| "bb_pct": weighted("bb_pct"), |
| "source": "2026_plus_2025", |
| } |
|
|
|
|
| def parse_pitcher_xstats(page): |
| page.wait_for_selector(PITCHER_X_TABLE_SELECTOR, timeout=30000) |
| page.wait_for_timeout(800) |
|
|
| rows = page.locator(f"{PITCHER_X_TABLE_SELECTOR} tbody tr") |
| out = {} |
|
|
| for i in range(rows.count()): |
| row = rows.nth(i) |
| cells = row.locator("td") |
| if cells.count() < 21: |
| continue |
|
|
| vals = [cells.nth(c).inner_text().strip() for c in range(cells.count())] |
| if not vals[0].isdigit(): |
| continue |
|
|
| yr = int(vals[0]) |
| out[yr] = { |
| "season": yr, |
| "pitches": safe_int(vals[2]), |
| "xwoba": safe_float(vals[14]), |
| "xba": safe_float(vals[11]), |
| "xslg": safe_float(vals[12]), |
| "hard_hit_pct": safe_float(vals[16]), |
| "barrel_pct": safe_float(vals[5]), |
| "k_pct": safe_float(vals[17]), |
| "bb_pct": safe_float(vals[18]), |
| "era": safe_float(vals[19]), |
| "xera": safe_float(vals[20]), |
| } |
|
|
| return out |
|
|
|
|
| def blend_pitcher_xstats(x2026, x2025): |
| if not x2026 and not x2025: |
| return {} |
|
|
| if x2026 and x2026.get("pitches", 0) >= PITCH_THRESHOLD_PITCHER: |
| out = dict(x2026) |
| out["source"] = "2026_only" |
| return out |
|
|
| if not x2025: |
| out = dict(x2026) if x2026 else {} |
| if out: |
| out["source"] = "2026_only_no_2025" |
| return out |
|
|
| p26 = x2026.get("pitches", 0) if x2026 else 0 |
| p25 = x2025.get("pitches", 0) |
| total = p26 + p25 |
|
|
| def weighted(key): |
| v26 = x2026.get(key, 0.0) if x2026 else 0.0 |
| v25 = x2025.get(key, 0.0) |
| return ((v26 * p26) + (v25 * p25)) / total |
|
|
| return { |
| "pitches_2026": p26, |
| "pitches_2025": p25, |
| "pitches_total_used": total, |
| "xwoba": weighted("xwoba"), |
| "xba": weighted("xba"), |
| "xslg": weighted("xslg"), |
| "hard_hit_pct": weighted("hard_hit_pct"), |
| "barrel_pct": weighted("barrel_pct"), |
| "k_pct": weighted("k_pct"), |
| "bb_pct": weighted("bb_pct"), |
| "era": weighted("era"), |
| "xera": weighted("xera"), |
| "source": "2026_plus_2025", |
| } |
|
|
|
|
| def scrape_hitter_savant_data(page, player_slug: str): |
| x_url = SAVANT_HITTER_X_URL.format(slug=player_slug) |
| split_2026_url = SAVANT_HITTER_SPLIT_URL.format(slug=player_slug, season=2026) |
| split_2025_url = SAVANT_HITTER_SPLIT_URL.format(slug=player_slug, season=2025) |
|
|
| page.goto(x_url, wait_until="domcontentloaded", timeout=60000) |
| page.wait_for_timeout(2200) |
| x_years = parse_hitter_x_stats_year_table(page) |
|
|
| page.goto(split_2026_url, wait_until="domcontentloaded", timeout=60000) |
| page.wait_for_timeout(2200) |
| split_2026 = parse_hitter_platoon_rows(page, 2026) |
|
|
| page.goto(split_2025_url, wait_until="domcontentloaded", timeout=60000) |
| page.wait_for_timeout(2200) |
| split_2025 = parse_hitter_platoon_rows(page, 2025) |
|
|
| x_2026 = x_years.get(2026, {}) |
| x_2025 = x_years.get(2025, {}) |
|
|
| return { |
| "overall_used": blend_hitter_overall_xstats(x_2026, x_2025), |
| "vs_L_used": combine_hitter_split_rows(split_2026.get("vs_L"), split_2025.get("vs_L")), |
| "vs_R_used": combine_hitter_split_rows(split_2026.get("vs_R"), split_2025.get("vs_R")), |
| } |
|
|
|
|
| def scrape_pitcher_savant_data(page, player_slug: str): |
| x_url = SAVANT_PITCHER_X_URL.format(slug=player_slug) |
|
|
| page.goto(x_url, wait_until="domcontentloaded", timeout=60000) |
| page.wait_for_timeout(2500) |
| x_years = parse_pitcher_xstats(page) |
|
|
| x_2026 = x_years.get(2026, {}) |
| x_2025 = x_years.get(2025, {}) |
|
|
| return {"overall_used": blend_pitcher_xstats(x_2026, x_2025)} |
|
|
|
|
| def compact_player_block(player, data): |
| if player["type"] == "pitcher": |
| overall = data.get("overall_used", {}) |
| return ( |
| f"SP {player['team']} | {player['name']} ({player['hand']}) | " |
| f"xwOBA {overall.get('xwoba', 0):.3f} | xERA {overall.get('xera', 0):.2f} | " |
| f"K% {overall.get('k_pct', 0):.1f} | BB% {overall.get('bb_pct', 0):.1f} | " |
| f"HH% {overall.get('hard_hit_pct', 0):.1f} | Barrel% {overall.get('barrel_pct', 0):.1f}" |
| ) |
|
|
| overall = data.get("overall_used", {}) |
| vs_l = data.get("vs_L_used", {}) |
| vs_r = data.get("vs_R_used", {}) |
|
|
| return ( |
| f"{player['order']:>2}. {player['name']} ({player['hand']}) | " |
| f"xwOBA {overall.get('xwoba', 0):.3f} | xSLG {overall.get('xslg', 0):.3f} | " |
| f"HH% {overall.get('hard_hit_pct', 0):.1f} | K% {overall.get('k_pct', 0):.1f} | " |
| f"BB% {overall.get('bb_pct', 0):.1f} | " |
| f"OPS vL {vs_l.get('ops', 0):.3f} | OPS vR {vs_r.get('ops', 0):.3f}" |
| ) |
|
|
|
|
| def build_final_report(meta, matched, scraped_map, started_at): |
| elapsed = int((datetime.now() - started_at).total_seconds()) |
|
|
| away_pitcher = None |
| home_pitcher = None |
| away_hitters = [] |
| home_hitters = [] |
|
|
| for player in matched: |
| pdata = scraped_map.get(player["name"], {}) |
| line = compact_player_block(player, pdata) if pdata else ( |
| f"{player['type'].upper()} | {player['team']} | {player['name']} ({player['hand']}) | No Savant match" |
| ) |
|
|
| if player["type"] == "pitcher": |
| if player["side"] == "away": |
| away_pitcher = line |
| else: |
| home_pitcher = line |
| else: |
| if player["side"] == "away": |
| away_hitters.append(line) |
| else: |
| home_hitters.append(line) |
|
|
| report = [ |
| f"{meta['away_team']} @ {meta['home_team']}", |
| f"Date (PT): {today_pacific()}", |
| f"Build time: {elapsed}s", |
| "", |
| away_pitcher or "Away SP not found", |
| home_pitcher or "Home SP not found", |
| "", |
| f"{meta['away_team']} lineup", |
| *away_hitters, |
| "", |
| f"{meta['home_team']} lineup", |
| *home_hitters, |
| ] |
|
|
| return "\n".join(report) |
|
|
|
|
| def run_selected_game(game_pk): |
| if not game_pk: |
| yield "No game selected." |
| return |
|
|
| started_at = datetime.now() |
| game_date = today_pacific() |
|
|
| try: |
| yield f"Loading selected game...\nDate (PT): {game_date}" |
|
|
| players, meta = fetch_game_players(game_pk) |
| yield ( |
| f"Loading selected game...\n" |
| f"{meta['away_team']} @ {meta['home_team']}\n" |
| f"Found {len(players)} MLB players to process." |
| ) |
|
|
| with sync_playwright() as p: |
| browser = p.chromium.launch( |
| headless=True, |
| args=["--no-sandbox", "--disable-dev-shm-usage"] |
| ) |
| page = browser.new_page() |
|
|
| savant_rows = scrape_savant_player_links_for_game(page, game_pk, game_date) |
| matched = match_players_to_savant(players, savant_rows) |
|
|
| total = len(matched) |
| scraped_map = {} |
| progress_lines = [ |
| f"{meta['away_team']} @ {meta['home_team']}", |
| f"Matched {sum(1 for x in matched if x['matched'])}/{total} players to Savant.", |
| "", |
| ] |
|
|
| for idx, player in enumerate(matched, start=1): |
| elapsed = int((datetime.now() - started_at).total_seconds()) |
|
|
| if not player["matched"] or not player["savant_slug"]: |
| progress_lines.append( |
| f"[{idx}/{total}] {player['type']} {player['name']} -> no Savant match" |
| ) |
| yield "\n".join(progress_lines[-12:]) |
| continue |
|
|
| progress_lines.append( |
| f"[{idx}/{total}] loading {player['type']} {player['name']}..." |
| ) |
| yield "\n".join(progress_lines[-12:]) |
|
|
| if player["type"] == "hitter": |
| pdata = scrape_hitter_savant_data(page, player["savant_slug"]) |
| else: |
| pdata = scrape_pitcher_savant_data(page, player["savant_slug"]) |
|
|
| scraped_map[player["name"]] = pdata |
|
|
| progress_lines.append( |
| f"[{idx}/{total}] done {player['name']}" |
| ) |
|
|
| if elapsed >= 60: |
| progress_lines.append( |
| "Still working normally — full lineup scraping just takes a bit." |
| ) |
|
|
| yield "\n".join(progress_lines[-12:]) |
|
|
| browser.close() |
|
|
| final_report = build_final_report(meta, matched, scraped_map, started_at) |
| yield final_report |
|
|
| except Exception as e: |
| yield f"Error: {repr(e)}" |
|
|
|
|
| with gr.Blocks() as app: |
| gr.Markdown("## MLB Savant Quick Runner") |
| gr.Markdown("Today's games auto-load using Pacific time. Click Run for the selected game.") |
|
|
| dropdown = gr.Dropdown(label="Today's Games", choices=[]) |
| button = gr.Button("Run Selected Game") |
| output = gr.Textbox(label="Output", lines=30) |
|
|
| app.load(get_games, outputs=dropdown) |
| button.click(run_selected_game, inputs=dropdown, outputs=output) |
|
|
| app.queue() |
|
|
| if __name__ == "__main__": |
| app.launch(server_name="0.0.0.0", server_port=7860) |