from __future__ import annotations import concurrent.futures import logging as _logging from typing import Any import pandas as pd import requests _diag_log = _logging.getLogger(__name__) from config.settings import ODDS_API_KEY from data.market_provider_base import MarketProviderBase from data.odds_name_map import map_odds_name_to_model_name ODDS_API_BASE = "https://api.the-odds-api.com/v4/sports" # --------------------------------------------------------------------------- # Provider strategy (Batch 14) # Active v1: The Odds API → DraftKings, FanDuel, BetMGM, Caesars (williamhill_us) # Sharp feed: Pinnacle — planned as separate PinnacleProvider class with its own # API key; register it in live_prop_odds.py when ready # Deferred: Bet365, Circa (unclear API availability on The Odds API) # Enterprise: ENABLE_ENTERPRISE_PROVIDER flag in config/settings.py # --------------------------------------------------------------------------- SUPPORTED_BOOKS = { "draftkings", "fanduel", "betmgm", "williamhill_us", # Caesars } SUPPORTED_MARKETS = { "batter_home_runs", "batter_hits", "batter_total_bases", "pitcher_strikeouts", "pitcher_strikeouts_alternate", } MARKET_NAME_MAP = { "batter_home_runs": "hr", "batter_hits": "hit", "batter_total_bases": "tb", "pitcher_strikeouts": "k", "pitcher_strikeouts_alternate": "k", } BOOK_KEY_MAP = { "draftkings": "DraftKings", "fanduel": "FanDuel", "betmgm": "BetMGM", "williamhill_us": "Caesars", } _MAX_EVENTS = 15 _MAX_PARALLEL_ODDS_WORKERS = 8 # concurrent per-event HTTP calls _TOTAL_ODDS_FETCH_TIMEOUT_S = 45 # wall-clock cap for all parallel fetches TEAM_NAME_ALIASES = { "usa": "united states", "united states": "united states", "japan": "japan", "korea": "korea", "south korea": "korea", "chinese taipei": "chinese taipei", "taiwan": "chinese taipei", "czech republic": "czechia", "czechia": "czechia", "dominican republic": "dominican republic", "puerto rico": "puerto rico", "great britain": "great britain", "netherlands": "netherlands", "venezuela": "venezuela", "mexico": "mexico", "canada": "canada", "colombia": "colombia", "cuba": "cuba", "panama": "panama", "brazil": "brazil", "italy": "italy", "australia": "australia", "china": "china", "nicaragua": "nicaragua", "israel": "israel", } def _canon_team(name: str) -> str: text = str(name or "").strip().lower() return TEAM_NAME_ALIASES.get(text, text) def _safe_float(value: Any) -> float | None: try: if value is None: return None text = str(value).strip().lower() if text in {"", "nan", "none"}: return None return float(value) except Exception: return None def _fetch_event_odds( event: dict, books: list[str], market_keys: list[str], provider_name: str, ) -> tuple[list[dict[str, Any]], bool]: """ Fetch and parse odds for a single event. Returns (rows, is_rate_limited). Designed to be called from a thread pool. """ event_id = str(event.get("id", "") or "") away_team = str(event.get("away_team", "") or "") home_team = str(event.get("home_team", "") or "") commence_time = str(event.get("commence_time", "") or "") odds_url = f"{ODDS_API_BASE}/baseball_mlb/events/{event_id}/odds" odds_params = { "apiKey": ODDS_API_KEY, "regions": "us", "markets": ",".join(market_keys), "bookmakers": ",".join(books), "oddsFormat": "american", "dateFormat": "iso", } _diag_log.info( "[upcoming_hr_props] Step2 event_id=%s %s@%s", event_id, away_team, home_team, ) try: r2 = requests.get(odds_url, params=odds_params, timeout=30) _diag_log.warning( "[upcoming_hr_props] Step2 HTTP %s | remaining=%s | event_id=%s %s@%s", r2.status_code, r2.headers.get("x-requests-remaining", "?"), event_id, away_team, home_team, ) r2.raise_for_status() except (requests.HTTPError, requests.RequestException) as exc: _is_429 = ( isinstance(exc, requests.HTTPError) and exc.response is not None and exc.response.status_code == 429 ) _diag_log.warning( "[upcoming_hr_props] event %s@%s odds failed (429=%s): %s", away_team, home_team, _is_429, exc, ) return [], _is_429 event_data = r2.json() bookmakers = ( event_data.get("bookmakers", []) if isinstance(event_data, dict) else [] ) rows: list[dict[str, Any]] = [] for bookmaker in bookmakers: book_key = str(bookmaker.get("key", "") or "") book_name = BOOK_KEY_MAP.get(book_key, book_key) for market in bookmaker.get("markets", []) or []: market_key = str(market.get("key", "") or "") if market_key not in market_keys: continue market_name = MARKET_NAME_MAP.get(market_key, market_key) for outcome in market.get("outcomes", []) or []: player_name_raw = str( outcome.get("description", "") or outcome.get("name", "") or "" ).strip() if not player_name_raw: continue price = outcome.get("price") if price is None: continue rows.append( { "provider": provider_name, "event_id": event_id, "commence_time": commence_time, "away_team": away_team, "home_team": home_team, "sportsbook": book_name, "sportsbook_key": book_key, "market_key": market_key, "market": market_name, "player_name_raw": player_name_raw, "selection_label": str(outcome.get("name", "") or "").strip(), "player_name": map_odds_name_to_model_name(player_name_raw), "odds_american": int(price), "line": _safe_float(outcome.get("point")), } ) _diag_log.warning( "[upcoming_hr_props] %s@%s rows=%d", away_team, home_team, len(rows), ) return rows, False class TheOddsAPIProvider(MarketProviderBase): provider_name = "theoddsapi" def fetch_live_prop_odds( self, game_context: dict[str, Any], sportsbooks: list[str] | None = None, markets: list[str] | None = None, ) -> pd.DataFrame: if not ODDS_API_KEY: return pd.DataFrame() sportsbooks = sportsbooks or ["draftkings", "fanduel", "betmgm"] markets = markets or ["batter_home_runs", "batter_hits", "batter_total_bases"] books = [b for b in sportsbooks if b in SUPPORTED_BOOKS] mkts = [m for m in markets if m in SUPPORTED_MARKETS] if not books or not mkts: return pd.DataFrame() away_key = _canon_team(game_context.get("away_team", "")) home_key = _canon_team(game_context.get("home_team", "")) requested_books = sportsbooks or ["draftkings", "fanduel", "betmgm"] books = [b for b in requested_books if b in SUPPORTED_BOOKS] if not books: _diag_log.warning( "[upcoming_hr_props] no supported requested books from %s", requested_books, ) return pd.DataFrame() from datetime import datetime, timezone, timedelta now = datetime.now(timezone.utc) events_url = f"{ODDS_API_BASE}/baseball_mlb/events" events_params = { "apiKey": ODDS_API_KEY, "dateFormat": "iso", "commenceTimeFrom": (now - timedelta(hours=6)).strftime("%Y-%m-%dT%H:%M:%SZ"), "commenceTimeTo": (now + timedelta(days=1)).strftime("%Y-%m-%dT%H:%M:%SZ"), } try: r1 = requests.get(events_url, params=events_params, timeout=30) r1.raise_for_status() except requests.HTTPError as exc: body = (exc.response.text[:300] if exc.response is not None else "") raise RuntimeError( f"Odds API events list HTTP {exc.response.status_code}: {body}" ) from exc except requests.RequestException as exc: raise RuntimeError(f"Odds API events network error: {exc}") from exc events = r1.json() # Find the event matching this game's teams event_id = None away_team_orig = "" home_team_orig = "" commence_time = "" for ev in events: ev_away = _canon_team(ev.get("away_team", "")) ev_home = _canon_team(ev.get("home_team", "")) if ev_away == away_key and ev_home == home_key: event_id = str(ev.get("id", "") or "") away_team_orig = str(ev.get("away_team", "") or "") home_team_orig = str(ev.get("home_team", "") or "") commence_time = str(ev.get("commence_time", "") or "") break if not event_id: _diag_log.info( "[live_prop_odds] no matching event for %s@%s in %d events", away_key, home_key, len(events), ) return pd.DataFrame() odds_url = f"{ODDS_API_BASE}/baseball_mlb/events/{event_id}/odds" odds_params = { "apiKey": ODDS_API_KEY, "regions": "us", "markets": ",".join(mkts), "bookmakers": ",".join(books), "oddsFormat": "american", "dateFormat": "iso", } try: r2 = requests.get(odds_url, params=odds_params, timeout=30) r2.raise_for_status() except requests.HTTPError as exc: body = (exc.response.text[:300] if exc.response is not None else "") raise RuntimeError( f"Odds API event odds HTTP {exc.response.status_code}: {body}" ) from exc except requests.RequestException as exc: raise RuntimeError(f"Odds API event odds network error: {exc}") from exc event_data = r2.json() bookmakers = ( event_data.get("bookmakers", []) if isinstance(event_data, dict) else [] ) rows: list[dict[str, Any]] = [] for bookmaker in bookmakers: book_key = str(bookmaker.get("key", "") or "") book_name = BOOK_KEY_MAP.get(book_key, book_key) for market in bookmaker.get("markets", []) or []: market_key = str(market.get("key", "") or "") market_name = MARKET_NAME_MAP.get(market_key, market_key) for outcome in market.get("outcomes", []) or []: player_name_raw = str( outcome.get("description", "") or outcome.get("name", "") or "" ).strip() if not player_name_raw: continue price = outcome.get("price") if price is None: continue rows.append( { "provider": self.provider_name, "event_id": event_id, "commence_time": commence_time, "away_team": away_team_orig, "home_team": home_team_orig, "sportsbook": book_name, "sportsbook_key": book_key, "market_key": market_key, "market": market_name, "player_name_raw": player_name_raw, "selection_label": str(outcome.get("name", "") or "").strip(), "player_name": map_odds_name_to_model_name(player_name_raw), "odds_american": int(price), "line": _safe_float(outcome.get("point")), } ) return pd.DataFrame(rows) def fetch_all_upcoming_hr_props( self, sportsbooks: list[str] | None = None, markets: list[str] | None = None, ) -> pd.DataFrame: """ Fetch HR props for ALL upcoming MLB events in a single API call. Unlike fetch_live_prop_odds(), this applies no game-level team filter — every event in the payload is included. Upcoming supported props for all upcoming MLB events. """ if not ODDS_API_KEY: _diag_log.warning("[upcoming_hr_props] ODDS_API_KEY is empty — aborting") return pd.DataFrame() requested_markets = markets or ["batter_home_runs"] market_keys = [m for m in requested_markets if m in SUPPORTED_MARKETS] if not market_keys: _diag_log.warning( "[upcoming_hr_props] no supported requested markets from %s", requested_markets, ) return pd.DataFrame() from datetime import datetime, timezone, timedelta now = datetime.now(timezone.utc) events_url = f"{ODDS_API_BASE}/baseball_mlb/events" events_params = { "apiKey": ODDS_API_KEY, "dateFormat": "iso", "commenceTimeFrom": now.strftime("%Y-%m-%dT%H:%M:%SZ"), "commenceTimeTo": (now + timedelta(days=7)).strftime("%Y-%m-%dT%H:%M:%SZ"), } _diag_log.info( "[upcoming_hr_props] Step1 GET %s params=%s", events_url, {k: (v if k != "apiKey" else v[:6] + "...") for k, v in events_params.items()}, ) try: r1 = requests.get(events_url, params=events_params, timeout=30) _diag_log.warning( "[upcoming_hr_props] events HTTP %s | remaining=%s", r1.status_code, r1.headers.get("x-requests-remaining", "?"), ) r1.raise_for_status() except requests.HTTPError as exc: body = (exc.response.text[:300] if exc.response is not None else "") raise RuntimeError( f"Odds API events list HTTP {exc.response.status_code}: {body}" ) from exc except requests.RequestException as exc: raise RuntimeError(f"Odds API events network error: {exc}") from exc events = r1.json() _diag_log.warning( "[upcoming_hr_props] events found=%d (cap=%d)", len(events), _MAX_EVENTS ) events = events[:_MAX_EVENTS] requested_books = sportsbooks or ["draftkings", "fanduel", "betmgm"] books = [b for b in requested_books if b in SUPPORTED_BOOKS] if not books: _diag_log.warning( "[upcoming_hr_props] no supported requested books from %s", requested_books, ) return pd.DataFrame() # Deduplicate events seen_ids: set[str] = set() valid_events: list[dict] = [] for event in events: event_id = str(event.get("id", "") or "") if event_id and event_id not in seen_ids: seen_ids.add(event_id) valid_events.append(event) _diag_log.warning( "[upcoming_hr_props] fetching odds for %d events in parallel (max_workers=%d, timeout=%ds)", len(valid_events), _MAX_PARALLEL_ODDS_WORKERS, _TOTAL_ODDS_FETCH_TIMEOUT_S, ) rows: list[dict[str, Any]] = [] _events_attempted = len(valid_events) _events_rate_limited = 0 _events_timed_out = 0 with concurrent.futures.ThreadPoolExecutor( max_workers=_MAX_PARALLEL_ODDS_WORKERS ) as executor: future_to_event = { executor.submit( _fetch_event_odds, event, books, market_keys, self.provider_name ): event for event in valid_events } done, not_done = concurrent.futures.wait( future_to_event, timeout=_TOTAL_ODDS_FETCH_TIMEOUT_S, ) for future in not_done: future.cancel() ev = future_to_event[future] _diag_log.warning( "[upcoming_hr_props] event %s@%s timed out after %ds", ev.get("away_team", "?"), ev.get("home_team", "?"), _TOTAL_ODDS_FETCH_TIMEOUT_S, ) _events_timed_out += 1 for future in done: try: event_rows, is_429 = future.result() rows.extend(event_rows) if is_429: _events_rate_limited += 1 except Exception as exc: ev = future_to_event[future] _diag_log.warning( "[upcoming_hr_props] event %s@%s raised: %s", ev.get("away_team", "?"), ev.get("home_team", "?"), exc, ) _diag_log.warning( "[upcoming_hr_props] SUMMARY books=%s markets=%s events_returned=%d events_attempted=%d " "events_rate_limited=%d events_timed_out=%d total_rows=%d", books, market_keys, len(events), _events_attempted, _events_rate_limited, _events_timed_out, len(rows), ) return pd.DataFrame(rows) def fetch_upcoming_market_coverage_probe( self, sportsbooks: list[str] | None = None, markets: list[str] | None = None, max_events: int = 5, ) -> pd.DataFrame: if not ODDS_API_KEY: return pd.DataFrame() requested_books = sportsbooks or ["draftkings", "fanduel", "betmgm", "williamhill_us"] books = [b for b in requested_books if b in SUPPORTED_BOOKS] probe_markets = markets or [ "batter_home_runs", "batter_hits", "pitcher_strikeouts", ] probe_markets = [m for m in probe_markets if m] if not books or not probe_markets: return pd.DataFrame() from datetime import datetime, timezone, timedelta now = datetime.now(timezone.utc) events_url = f"{ODDS_API_BASE}/baseball_mlb/events" events_params = { "apiKey": ODDS_API_KEY, "dateFormat": "iso", "commenceTimeFrom": now.strftime("%Y-%m-%dT%H:%M:%SZ"), "commenceTimeTo": (now + timedelta(days=7)).strftime("%Y-%m-%dT%H:%M:%SZ"), } try: r1 = requests.get(events_url, params=events_params, timeout=30) r1.raise_for_status() events = r1.json()[: max(1, int(max_events))] except Exception as exc: _diag_log.warning("[coverage_probe] events fetch failed: %s", exc) return pd.DataFrame() rows: list[dict[str, Any]] = [] for event in events: event_id = str(event.get("id", "") or "") away_team = str(event.get("away_team", "") or "") home_team = str(event.get("home_team", "") or "") commence_time = str(event.get("commence_time", "") or "") if not event_id: continue for market_key in probe_markets: for book_key in books: odds_url = f"{ODDS_API_BASE}/baseball_mlb/events/{event_id}/odds" odds_params = { "apiKey": ODDS_API_KEY, "regions": "us", "markets": market_key, "bookmakers": book_key, "oddsFormat": "american", "dateFormat": "iso", } bookmaker_count = 0 outcome_count = 0 response_status = None error_text = "" returned_books: list[str] = [] try: r2 = requests.get(odds_url, params=odds_params, timeout=30) response_status = r2.status_code r2.raise_for_status() event_data = r2.json() bookmakers = ( event_data.get("bookmakers", []) if isinstance(event_data, dict) else [] ) bookmaker_count = len(bookmakers) returned_books = [ str(bookmaker.get("key", "") or "") for bookmaker in bookmakers ] outcome_count = sum( len(market.get("outcomes", []) or []) for bookmaker in bookmakers for market in bookmaker.get("markets", []) or [] ) except requests.HTTPError as exc: response_status = exc.response.status_code if exc.response is not None else None error_text = f"http_{response_status}" except requests.RequestException as exc: error_text = str(exc) except Exception as exc: error_text = str(exc) rows.append( { "provider": self.provider_name, "event_id": event_id, "away_team": away_team, "home_team": home_team, "commence_time": commence_time, "sportsbook_key": book_key, "sportsbook": BOOK_KEY_MAP.get(book_key, book_key), "market_key": market_key, "bookmakers_returned": bookmaker_count, "outcomes_returned": outcome_count, "returned_books": "|".join(returned_books), "has_data": bookmaker_count > 0 and outcome_count > 0, "response_status": response_status, "error": error_text, } ) return pd.DataFrame(rows)