from __future__ import annotations from datetime import datetime from typing import Any import pandas as pd import requests HEADERS = { "User-Agent": "Mozilla/5.0", "Accept-Language": "en-US,en;q=0.9", } SCORES_API_URL = "https://statsapi.mlb.com/api/v1/schedule" SPORT_IDS = [1] # MLB only TEAM_NORMALIZATION = { "Chinese Taipei": "Chinese Taipei", "Czech Republic": "Czechia", "South Korea": "Korea", "USA": "United States", "U.S.A.": "United States", } def _normalize_team_name(name: Any) -> str: text = str(name or "").strip() if not text: return "" return TEAM_NORMALIZATION.get(text, text) def _normalize_status(abstract_state: str, detailed_state: str, inning_state: str, current_inning: Any) -> str: abstract = str(abstract_state or "").strip().lower() detailed = str(detailed_state or "").strip() inning = str(inning_state or "").strip() inning_num = current_inning if abstract == "final": return "Final" if abstract == "live": if inning and inning_num not in (None, ""): return f"{inning} {inning_num}" return detailed or "Live" if abstract == "preview": return "Scheduled" if str(detailed).strip(): return str(detailed).strip() return "" def _safe_int(value: Any) -> int | None: try: if value is None: return None text = str(value).strip().lower() if text in {"", "nan", "none"}: return None return int(float(value)) except Exception: return None def _fetch_scores_for_sport_id(date_str: str, sport_id: int) -> pd.DataFrame: params = { "sportId": sport_id, "date": date_str, "hydrate": "linescore,broadcasts", } response = requests.get(SCORES_API_URL, headers=HEADERS, params=params, timeout=30) response.raise_for_status() payload = response.json() dates = payload.get("dates", []) or [] rows: list[dict[str, Any]] = [] for date_block in dates: games = date_block.get("games", []) or [] for game in games: game_pk = game.get("gamePk") teams = game.get("teams", {}) or {} away = teams.get("away", {}) or {} home = teams.get("home", {}) or {} away_team_info = (away.get("team", {}) or {}) home_team_info = (home.get("team", {}) or {}) away_team = _normalize_team_name(away_team_info.get("name")) home_team = _normalize_team_name(home_team_info.get("name")) status_info = game.get("status", {}) or {} abstract_state = status_info.get("abstractGameState", "") detailed_state = status_info.get("detailedState", "") linescore = game.get("linescore", {}) or {} inning_state = linescore.get("inningState", "") current_inning = linescore.get("currentInning") status = _normalize_status( abstract_state=abstract_state, detailed_state=detailed_state, inning_state=inning_state, current_inning=current_inning, ) away_score = _safe_int(away.get("score")) home_score = _safe_int(home.get("score")) linescore_teams = linescore.get("teams", {}) or {} away_ls = linescore_teams.get("away", {}) or {} home_ls = linescore_teams.get("home", {}) or {} if away_score is None: away_score = _safe_int(away_ls.get("runs")) if home_score is None: home_score = _safe_int(home_ls.get("runs")) away_hits = _safe_int(away_ls.get("hits")) home_hits = _safe_int(home_ls.get("hits")) away_errors = _safe_int(away_ls.get("errors")) home_errors = _safe_int(home_ls.get("errors")) game_datetime = game.get("gameDate", "") start_time_et = "" if game_datetime: try: ts = pd.to_datetime(game_datetime, utc=True).tz_convert("America/New_York") start_time_et = ts.strftime("%-I:%M %p ET") except Exception: start_time_et = "" broadcasts = game.get("broadcasts", []) or [] tv = "" if broadcasts: names = [] for b in broadcasts: name = str((b.get("name") or "")).strip() if name and name not in names: names.append(name) tv = ", ".join(names) if away_team and home_team: rows.append( { "score_fetch_time": datetime.utcnow(), "game_date": date_str, "game_pk": str(game_pk) if game_pk is not None else "", "away_team": away_team, "home_team": home_team, "away_score": away_score, "home_score": home_score, "away_hits": away_hits, "home_hits": home_hits, "away_errors": away_errors, "home_errors": home_errors, "status": status, "start_time_et": start_time_et, "tv": tv, "sport_id": sport_id, } ) if not rows: return pd.DataFrame() df = pd.DataFrame(rows) df = df.drop_duplicates(subset=["game_pk", "away_team", "home_team", "status"], keep="last") return df.reset_index(drop=True) def fetch_scores_for_date(date_str: str) -> pd.DataFrame: parts: list[pd.DataFrame] = [] for sport_id in SPORT_IDS: try: sport_df = _fetch_scores_for_sport_id(date_str, sport_id) if sport_df is not None and not sport_df.empty: parts.append(sport_df) except Exception: pass if not parts: return pd.DataFrame( columns=[ "score_fetch_time", "game_date", "game_pk", "away_team", "home_team", "away_score", "home_score", "away_hits", "home_hits", "away_errors", "home_errors", "status", "start_time_et", "tv", "sport_id", ] ) df = pd.concat(parts, ignore_index=True) df = df.drop_duplicates(subset=["game_pk", "away_team", "home_team"], keep="last") return df.reset_index(drop=True)