Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| from datetime import datetime | |
| from typing import Any | |
| import pandas as pd | |
| import requests | |
| HEADERS = { | |
| "User-Agent": "Mozilla/5.0", | |
| "Accept-Language": "en-US,en;q=0.9", | |
| } | |
| SCORES_API_URL = "https://statsapi.mlb.com/api/v1/schedule" | |
| SPORT_IDS = [1] # MLB only | |
| TEAM_NORMALIZATION = { | |
| "Chinese Taipei": "Chinese Taipei", | |
| "Czech Republic": "Czechia", | |
| "South Korea": "Korea", | |
| "USA": "United States", | |
| "U.S.A.": "United States", | |
| } | |
| def _normalize_team_name(name: Any) -> str: | |
| text = str(name or "").strip() | |
| if not text: | |
| return "" | |
| return TEAM_NORMALIZATION.get(text, text) | |
| def _normalize_status(abstract_state: str, detailed_state: str, inning_state: str, current_inning: Any) -> str: | |
| abstract = str(abstract_state or "").strip().lower() | |
| detailed = str(detailed_state or "").strip() | |
| inning = str(inning_state or "").strip() | |
| inning_num = current_inning | |
| if abstract == "final": | |
| return "Final" | |
| if abstract == "live": | |
| if inning and inning_num not in (None, ""): | |
| return f"{inning} {inning_num}" | |
| return detailed or "Live" | |
| if abstract == "preview": | |
| return "Scheduled" | |
| if str(detailed).strip(): | |
| return str(detailed).strip() | |
| return "" | |
| def _safe_int(value: Any) -> int | None: | |
| try: | |
| if value is None: | |
| return None | |
| text = str(value).strip().lower() | |
| if text in {"", "nan", "none"}: | |
| return None | |
| return int(float(value)) | |
| except Exception: | |
| return None | |
| def _fetch_scores_for_sport_id(date_str: str, sport_id: int) -> pd.DataFrame: | |
| params = { | |
| "sportId": sport_id, | |
| "date": date_str, | |
| "hydrate": "linescore,broadcasts", | |
| } | |
| response = requests.get(SCORES_API_URL, headers=HEADERS, params=params, timeout=30) | |
| response.raise_for_status() | |
| payload = response.json() | |
| dates = payload.get("dates", []) or [] | |
| rows: list[dict[str, Any]] = [] | |
| for date_block in dates: | |
| games = date_block.get("games", []) or [] | |
| for game in games: | |
| game_pk = game.get("gamePk") | |
| teams = game.get("teams", {}) or {} | |
| away = teams.get("away", {}) or {} | |
| home = teams.get("home", {}) or {} | |
| away_team_info = (away.get("team", {}) or {}) | |
| home_team_info = (home.get("team", {}) or {}) | |
| away_team = _normalize_team_name(away_team_info.get("name")) | |
| home_team = _normalize_team_name(home_team_info.get("name")) | |
| status_info = game.get("status", {}) or {} | |
| abstract_state = status_info.get("abstractGameState", "") | |
| detailed_state = status_info.get("detailedState", "") | |
| linescore = game.get("linescore", {}) or {} | |
| inning_state = linescore.get("inningState", "") | |
| current_inning = linescore.get("currentInning") | |
| status = _normalize_status( | |
| abstract_state=abstract_state, | |
| detailed_state=detailed_state, | |
| inning_state=inning_state, | |
| current_inning=current_inning, | |
| ) | |
| away_score = _safe_int(away.get("score")) | |
| home_score = _safe_int(home.get("score")) | |
| linescore_teams = linescore.get("teams", {}) or {} | |
| away_ls = linescore_teams.get("away", {}) or {} | |
| home_ls = linescore_teams.get("home", {}) or {} | |
| if away_score is None: | |
| away_score = _safe_int(away_ls.get("runs")) | |
| if home_score is None: | |
| home_score = _safe_int(home_ls.get("runs")) | |
| away_hits = _safe_int(away_ls.get("hits")) | |
| home_hits = _safe_int(home_ls.get("hits")) | |
| away_errors = _safe_int(away_ls.get("errors")) | |
| home_errors = _safe_int(home_ls.get("errors")) | |
| game_datetime = game.get("gameDate", "") | |
| start_time_et = "" | |
| if game_datetime: | |
| try: | |
| ts = pd.to_datetime(game_datetime, utc=True).tz_convert("America/New_York") | |
| start_time_et = ts.strftime("%-I:%M %p ET") | |
| except Exception: | |
| start_time_et = "" | |
| broadcasts = game.get("broadcasts", []) or [] | |
| tv = "" | |
| if broadcasts: | |
| names = [] | |
| for b in broadcasts: | |
| name = str((b.get("name") or "")).strip() | |
| if name and name not in names: | |
| names.append(name) | |
| tv = ", ".join(names) | |
| if away_team and home_team: | |
| rows.append( | |
| { | |
| "score_fetch_time": datetime.utcnow(), | |
| "game_date": date_str, | |
| "game_pk": str(game_pk) if game_pk is not None else "", | |
| "away_team": away_team, | |
| "home_team": home_team, | |
| "away_score": away_score, | |
| "home_score": home_score, | |
| "away_hits": away_hits, | |
| "home_hits": home_hits, | |
| "away_errors": away_errors, | |
| "home_errors": home_errors, | |
| "status": status, | |
| "start_time_et": start_time_et, | |
| "tv": tv, | |
| "sport_id": sport_id, | |
| } | |
| ) | |
| if not rows: | |
| return pd.DataFrame() | |
| df = pd.DataFrame(rows) | |
| df = df.drop_duplicates(subset=["game_pk", "away_team", "home_team", "status"], keep="last") | |
| return df.reset_index(drop=True) | |
| def fetch_scores_for_date(date_str: str) -> pd.DataFrame: | |
| parts: list[pd.DataFrame] = [] | |
| for sport_id in SPORT_IDS: | |
| try: | |
| sport_df = _fetch_scores_for_sport_id(date_str, sport_id) | |
| if sport_df is not None and not sport_df.empty: | |
| parts.append(sport_df) | |
| except Exception: | |
| pass | |
| if not parts: | |
| return pd.DataFrame( | |
| columns=[ | |
| "score_fetch_time", | |
| "game_date", | |
| "game_pk", | |
| "away_team", | |
| "home_team", | |
| "away_score", | |
| "home_score", | |
| "away_hits", | |
| "home_hits", | |
| "away_errors", | |
| "home_errors", | |
| "status", | |
| "start_time_et", | |
| "tv", | |
| "sport_id", | |
| ] | |
| ) | |
| df = pd.concat(parts, ignore_index=True) | |
| df = df.drop_duplicates(subset=["game_pk", "away_team", "home_team"], keep="last") | |
| return df.reset_index(drop=True) |