Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import re | |
| from typing import Any | |
| import pandas as pd | |
| from config.settings import DEFAULT_UPCOMING_PROP_MARKETS, ENABLE_ENTERPRISE_PROVIDER | |
| from data.provider_enterprise import EnterpriseMarketProvider | |
| from data.provider_scrape import ScrapeFallbackProvider | |
| from data.provider_theoddsapi import TheOddsAPIProvider | |
| from utils.logger import logger | |
| def normalize_prop_odds(raw_df: pd.DataFrame) -> pd.DataFrame: | |
| if raw_df.empty: | |
| return raw_df | |
| out = raw_df.copy() | |
| required_cols = [ | |
| "provider", | |
| "event_id", | |
| "commence_time", | |
| "away_team", | |
| "home_team", | |
| "sportsbook", | |
| "sportsbook_key", | |
| "market", | |
| "player_name_raw", | |
| "player_name", | |
| "odds_american", | |
| "line", | |
| "selection_label", | |
| "selection_scope", | |
| "selection_side", | |
| "market_family", | |
| "market_variant", | |
| "threshold", | |
| "display_label", | |
| "is_primary_line", | |
| "is_modeled", | |
| "player_event_market_key", | |
| ] | |
| for col in required_cols: | |
| if col not in out.columns: | |
| out[col] = None | |
| def _safe_float(val): | |
| try: | |
| if pd.isna(val): | |
| return None | |
| return float(val) | |
| except Exception: | |
| return None | |
| def _infer_threshold(row: pd.Series) -> int | None: | |
| existing = row.get("threshold") | |
| if pd.notna(existing): | |
| try: | |
| return int(existing) | |
| except Exception: | |
| pass | |
| selection_label = str(row.get("selection_label") or "").strip() | |
| if selection_label: | |
| match = re.search(r"(\d+)\s*\+", selection_label) | |
| if match: | |
| return int(match.group(1)) | |
| market_text = " ".join( | |
| [ | |
| str(row.get("market_key") or ""), | |
| str(row.get("market") or ""), | |
| str(row.get("player_name_raw") or ""), | |
| ] | |
| ) | |
| match = re.search(r"(\d+)\s*\+", market_text) | |
| if match: | |
| return int(match.group(1)) | |
| line = _safe_float(row.get("line")) | |
| if line is None: | |
| return 1 | |
| if line >= 0: | |
| return max(1, int(line) + 1) | |
| return 1 | |
| def _infer_market_family(row: pd.Series) -> str: | |
| existing = str(row.get("market_family") or "").strip().lower() | |
| if existing: | |
| return existing | |
| market = str(row.get("market") or "").strip().lower() | |
| if market in {"hr", "hit", "tb", "k", "no_hr"}: | |
| return market | |
| market_key = str(row.get("market_key") or "").strip().lower() | |
| if "home_run" in market_key: | |
| if "no" in market_key: | |
| return "no_hr" | |
| return "hr" | |
| if "strikeout" in market_key: | |
| return "k" | |
| if "hits" in market_key: | |
| return "hit" | |
| if "total_bases" in market_key: | |
| return "tb" | |
| return market or market_key or "unknown" | |
| def _infer_selection_side(row: pd.Series) -> str | None: | |
| existing = str(row.get("selection_side") or "").strip().lower() | |
| if existing: | |
| return existing | |
| label = str(row.get("selection_label") or "").strip().lower() | |
| if label.startswith("over"): | |
| return "over" | |
| if label.startswith("under"): | |
| return "under" | |
| return None | |
| out["market_family"] = out.apply(_infer_market_family, axis=1) | |
| out["selection_side"] = out.apply(_infer_selection_side, axis=1) | |
| hr_mask = out["market_family"].eq("hr") | |
| k_mask = out["market_family"].eq("k") | |
| no_hr_mask = out["market_family"].eq("no_hr") | |
| out.loc[hr_mask, "threshold"] = out.loc[hr_mask].apply(_infer_threshold, axis=1) | |
| out.loc[~hr_mask, "threshold"] = out.loc[~hr_mask, "threshold"].where( | |
| out.loc[~hr_mask, "threshold"].notna(), | |
| None, | |
| ) | |
| out.loc[k_mask, "threshold"] = out.loc[k_mask, "line"].apply(_safe_float) | |
| out.loc[hr_mask, "market_variant"] = out.loc[hr_mask, "threshold"].apply( | |
| lambda v: "primary" if pd.notna(v) and int(v) == 1 else "alternate" | |
| ) | |
| out.loc[hr_mask, "display_label"] = out.loc[hr_mask, "threshold"].apply( | |
| lambda v: f"{int(v)}+ HR" if pd.notna(v) else "HR" | |
| ) | |
| out.loc[hr_mask, "is_primary_line"] = out.loc[hr_mask, "threshold"].apply( | |
| lambda v: bool(pd.notna(v) and int(v) == 1) | |
| ) | |
| out.loc[hr_mask, "is_modeled"] = out.loc[hr_mask, "threshold"].apply( | |
| lambda v: bool(pd.notna(v) and int(v) == 1) | |
| ) | |
| out.loc[~hr_mask, "market_variant"] = out.loc[~hr_mask, "market_variant"].where( | |
| out.loc[~hr_mask, "market_variant"].notna(), | |
| "standard", | |
| ) | |
| if "market_key" in out.columns: | |
| k_alt_mask = k_mask & out["market_key"].astype(str).str.strip().eq("pitcher_strikeouts_alternate") | |
| out.loc[k_alt_mask, "market_variant"] = "alternate" | |
| out.loc[k_mask, "selection_scope"] = out.loc[k_mask, "selection_scope"].where( | |
| out.loc[k_mask, "selection_scope"].notna(), | |
| "pitcher", | |
| ) | |
| out.loc[k_mask, "display_label"] = out.loc[k_mask].apply( | |
| lambda row: ( | |
| f"{str(row.get('selection_side') or '').capitalize()} {str(row.get('line') or '').rstrip('0').rstrip('.')} K" | |
| if str(row.get("selection_side") or "").strip() | |
| else f"{str(row.get('line') or '').rstrip('0').rstrip('.')} K" | |
| ), | |
| axis=1, | |
| ) | |
| out.loc[k_mask, "is_primary_line"] = out.loc[k_mask, "market_variant"].apply( | |
| lambda v: v != "alternate" | |
| ) | |
| out.loc[k_mask, "is_modeled"] = out.loc[k_mask, "selection_side"].isin(["over", "under"]) | |
| out.loc[no_hr_mask, "selection_scope"] = out.loc[no_hr_mask, "selection_scope"].where( | |
| out.loc[no_hr_mask, "selection_scope"].notna(), | |
| "game", | |
| ) | |
| out.loc[no_hr_mask, "display_label"] = out.loc[no_hr_mask, "display_label"].where( | |
| out.loc[no_hr_mask, "display_label"].notna(), | |
| "No Home Run", | |
| ) | |
| out.loc[no_hr_mask, "is_primary_line"] = True | |
| other_mask = ~(hr_mask | k_mask | no_hr_mask) | |
| out.loc[other_mask, "display_label"] = out.loc[other_mask, "display_label"].where( | |
| out.loc[other_mask, "display_label"].notna(), | |
| out.loc[other_mask, "market_family"].astype(str).str.upper(), | |
| ) | |
| out.loc[~hr_mask, "is_primary_line"] = out.loc[~hr_mask, "is_primary_line"].where( | |
| out.loc[~hr_mask, "is_primary_line"].notna(), | |
| True, | |
| ) | |
| out.loc[~hr_mask, "is_modeled"] = out.loc[~hr_mask, "is_modeled"].where( | |
| out.loc[~hr_mask, "is_modeled"].notna(), | |
| False, | |
| ) | |
| out["player_event_market_key"] = ( | |
| out["event_id"].fillna("").astype(str).str.strip() | |
| + "|" | |
| + out["player_name"].fillna("").astype(str).str.strip().str.lower() | |
| + "|" | |
| + out["market_family"].fillna("").astype(str).str.strip().str.lower() | |
| + "|" | |
| + out["threshold"].fillna("").astype(str).str.strip() | |
| + "|" | |
| + out["selection_side"].fillna("").astype(str).str.strip().str.lower() | |
| + "|" | |
| + out["line"].fillna("").astype(str).str.strip() | |
| ) | |
| return out | |
| def _normalize_requested_books(sportsbooks: list[str] | None) -> list[str]: | |
| default_books = ["draftkings", "fanduel", "betmgm", "williamhill_us"] | |
| books = sportsbooks or default_books | |
| normalized: list[str] = [] | |
| for book in books: | |
| book_key = str(book or "").strip().lower() | |
| if book_key and book_key not in normalized: | |
| normalized.append(book_key) | |
| return normalized | |
| def _normalize_requested_markets(markets: list[str] | None) -> list[str]: | |
| normalized: list[str] = [] | |
| for market in (markets or DEFAULT_UPCOMING_PROP_MARKETS): | |
| market_key = str(market or "").strip().lower() | |
| if market_key and market_key not in normalized: | |
| normalized.append(market_key) | |
| return normalized | |
| def _requested_market_families(markets: list[str] | None) -> list[str]: | |
| families: list[str] = [] | |
| for market_key in _normalize_requested_markets(markets): | |
| family = "" | |
| if "home_run" in market_key: | |
| family = "hr" | |
| elif "strikeout" in market_key: | |
| family = "k" | |
| elif "hits" in market_key: | |
| family = "hit" | |
| elif "total_bases" in market_key: | |
| family = "tb" | |
| if family and family not in families: | |
| families.append(family) | |
| return families | |
| def _event_signature(df: pd.DataFrame) -> pd.Series: | |
| away = df.get("away_team", pd.Series(index=df.index, dtype="object")).fillna("").astype(str).str.strip().str.lower() | |
| home = df.get("home_team", pd.Series(index=df.index, dtype="object")).fillna("").astype(str).str.strip().str.lower() | |
| commence = df.get("commence_time", pd.Series(index=df.index, dtype="object")).fillna("").astype(str).str.strip().str.lower() | |
| return away + "|" + home + "|" + commence | |
| def _dedupe_props(df: pd.DataFrame) -> pd.DataFrame: | |
| if df.empty: | |
| return df | |
| out = normalize_prop_odds(df) | |
| out["_odds_score"] = out["odds_american"].apply( | |
| lambda x: int(x) if pd.notna(x) else -9999 | |
| ) | |
| out["_event_signature"] = _event_signature(out) | |
| out = ( | |
| out.sort_values("_odds_score", ascending=False) | |
| .drop_duplicates( | |
| subset=[ | |
| "_event_signature", | |
| "player_name", | |
| "sportsbook_key", | |
| "market_family", | |
| "threshold", | |
| "selection_side", | |
| "line", | |
| ], | |
| keep="first", | |
| ) | |
| .drop(columns=["_odds_score", "_event_signature"]) | |
| .reset_index(drop=True) | |
| ) | |
| return out | |
| def _build_provider_summary(*frames: pd.DataFrame) -> pd.DataFrame: | |
| normalized_frames = [] | |
| for df in frames: | |
| if df is None or df.empty: | |
| continue | |
| frame = normalize_prop_odds(df) | |
| frame["_event_signature"] = _event_signature(frame) | |
| normalized_frames.append(frame) | |
| if not normalized_frames: | |
| return pd.DataFrame( | |
| columns=["provider", "sportsbook", "rows", "unique_events", "unique_players"] | |
| ) | |
| summary_source = pd.concat(normalized_frames, ignore_index=True) | |
| return ( | |
| summary_source.groupby(["provider", "sportsbook"], dropna=False) | |
| .agg( | |
| rows=("player_name", "size"), | |
| unique_events=("_event_signature", "nunique"), | |
| unique_players=("player_name", "nunique"), | |
| ) | |
| .reset_index() | |
| .sort_values(["provider", "sportsbook"], na_position="last") | |
| .reset_index(drop=True) | |
| ) | |
| def _build_market_book_summary(df: pd.DataFrame) -> pd.DataFrame: | |
| if df is None or df.empty: | |
| return pd.DataFrame( | |
| columns=["market_family", "sportsbook", "rows", "unique_events", "unique_players"] | |
| ) | |
| working = normalize_prop_odds(df) | |
| working["_event_signature"] = _event_signature(working) | |
| return ( | |
| working.groupby(["market_family", "sportsbook"], dropna=False) | |
| .agg( | |
| rows=("player_name", "size"), | |
| unique_events=("_event_signature", "nunique"), | |
| unique_players=("player_name", "nunique"), | |
| ) | |
| .reset_index() | |
| .sort_values(["market_family", "sportsbook"], na_position="last") | |
| .reset_index(drop=True) | |
| ) | |
| def _filter_market_family(df: pd.DataFrame, market_family: str) -> pd.DataFrame: | |
| if df is None or df.empty: | |
| return pd.DataFrame() | |
| working = normalize_prop_odds(df) | |
| if "market_family" not in working.columns: | |
| return pd.DataFrame() | |
| return working[ | |
| working["market_family"].astype(str).str.strip().str.lower() == str(market_family or "").strip().lower() | |
| ].copy() | |
| def _build_missing_books_by_market( | |
| df: pd.DataFrame, | |
| *, | |
| requested_books: list[str], | |
| requested_markets: list[str], | |
| ) -> pd.DataFrame: | |
| market_families = _requested_market_families(requested_markets) | |
| if not market_families: | |
| return pd.DataFrame( | |
| columns=["market_family", "available_books", "missing_books", "available_count", "missing_count"] | |
| ) | |
| working = normalize_prop_odds(df) if df is not None else pd.DataFrame() | |
| rows: list[dict[str, Any]] = [] | |
| requested_book_set = {str(book).strip().lower() for book in requested_books if str(book).strip()} | |
| for market_family in market_families: | |
| market_rows = working[ | |
| working.get("market_family", pd.Series(dtype="object")).astype(str).str.lower() == market_family | |
| ].copy() if not working.empty else pd.DataFrame() | |
| available_books = sorted( | |
| { | |
| str(book).strip().lower() | |
| for book in market_rows.get("sportsbook_key", pd.Series(dtype="object")).dropna().astype(str).tolist() | |
| if str(book).strip() | |
| } | |
| ) | |
| missing_books = sorted(requested_book_set - set(available_books)) | |
| rows.append( | |
| { | |
| "market_family": market_family, | |
| "available_books": ", ".join(available_books), | |
| "missing_books": ", ".join(missing_books), | |
| "available_count": len(available_books), | |
| "missing_count": len(missing_books), | |
| } | |
| ) | |
| return pd.DataFrame(rows) | |
| def _build_missing_event_books_by_market( | |
| df: pd.DataFrame, | |
| *, | |
| requested_books: list[str], | |
| requested_markets: list[str], | |
| ) -> pd.DataFrame: | |
| market_families = _requested_market_families(requested_markets) | |
| if df is None or df.empty or not market_families: | |
| return pd.DataFrame( | |
| columns=["event_id", "away_team", "home_team", "commence_time", "market_family", "available_books", "missing_books", "available_count", "missing_count"] | |
| ) | |
| working = normalize_prop_odds(df) | |
| group_cols = [c for c in ["event_id", "away_team", "home_team", "commence_time"] if c in working.columns] | |
| if not group_cols: | |
| return pd.DataFrame() | |
| requested_book_set = {str(book).strip().lower() for book in requested_books if str(book).strip()} | |
| rows: list[dict[str, Any]] = [] | |
| for key, event_df in working.groupby(group_cols, dropna=False): | |
| if not isinstance(key, tuple): | |
| key = (key,) | |
| key_map = dict(zip(group_cols, key)) | |
| for market_family in market_families: | |
| market_rows = event_df[event_df["market_family"].astype(str).str.lower() == market_family].copy() | |
| available_books = sorted( | |
| { | |
| str(book).strip().lower() | |
| for book in market_rows.get("sportsbook_key", pd.Series(dtype="object")).dropna().astype(str).tolist() | |
| if str(book).strip() | |
| } | |
| ) | |
| missing_books = sorted(requested_book_set - set(available_books)) | |
| rows.append( | |
| { | |
| **key_map, | |
| "market_family": market_family, | |
| "available_books": ", ".join(available_books), | |
| "missing_books": ", ".join(missing_books), | |
| "available_count": len(available_books), | |
| "missing_count": len(missing_books), | |
| } | |
| ) | |
| return pd.DataFrame(rows).sort_values(["market_family", "event_id"], na_position="last").reset_index(drop=True) | |
| def _build_hr_snapshot_completeness( | |
| merged_df: pd.DataFrame, | |
| *, | |
| requested_books: list[str], | |
| ) -> dict[str, Any]: | |
| hr_df = _filter_market_family(merged_df, "hr") | |
| requested = sorted({str(book).strip().lower() for book in requested_books if str(book).strip()}) | |
| present = sorted( | |
| { | |
| str(book).strip().lower() | |
| for book in hr_df.get("sportsbook_key", pd.Series(dtype="object")).dropna().astype(str).tolist() | |
| if str(book).strip() | |
| } | |
| ) | |
| missing = sorted(set(requested) - set(present)) | |
| return { | |
| "market_family": "hr", | |
| "requested_books": requested, | |
| "present_books": present, | |
| "missing_books": missing, | |
| "requested_count": len(requested), | |
| "present_count": len(present), | |
| "missing_count": len(missing), | |
| "is_complete": len(missing) == 0, | |
| "row_count": int(len(hr_df)), | |
| "event_count": int(hr_df["event_id"].nunique()) if not hr_df.empty and "event_id" in hr_df.columns else 0, | |
| } | |
| def _book_event_hr_coverage_needs( | |
| api_df: pd.DataFrame, | |
| *, | |
| requested_books: list[str], | |
| ) -> dict[str, list[str]]: | |
| requested = {str(book).strip().lower() for book in requested_books if str(book).strip()} | |
| needs: dict[str, list[str]] = {} | |
| if "draftkings" not in requested: | |
| return needs | |
| if api_df is None or api_df.empty or "event_id" not in api_df.columns: | |
| return {"draftkings": []} | |
| working = normalize_prop_odds(api_df) | |
| all_event_ids = [ | |
| str(event_id).strip() | |
| for event_id in working.get("event_id", pd.Series(dtype="object")).dropna().astype(str).tolist() | |
| if str(event_id).strip() | |
| ] | |
| if not all_event_ids: | |
| return {"draftkings": []} | |
| hr_rows = working[ | |
| working.get("market_family", pd.Series(dtype="object")).astype(str).str.lower() == "hr" | |
| ].copy() | |
| dk_hr_events = { | |
| str(event_id).strip() | |
| for event_id in hr_rows.loc[ | |
| hr_rows.get("sportsbook_key", pd.Series(dtype="object")).astype(str).str.lower() == "draftkings", | |
| "event_id", | |
| ].dropna().astype(str).tolist() | |
| if str(event_id).strip() | |
| } | |
| missing_event_ids = sorted(set(all_event_ids) - dk_hr_events) | |
| if missing_event_ids: | |
| needs["draftkings"] = missing_event_ids | |
| return needs | |
| def _merge_primary_and_fallback( | |
| primary_df: pd.DataFrame, | |
| fallback_df: pd.DataFrame, | |
| requested_books: list[str], | |
| ) -> pd.DataFrame: | |
| del requested_books | |
| primary = _dedupe_props(primary_df) | |
| fallback = _dedupe_props(fallback_df) | |
| if primary.empty: | |
| return fallback | |
| if fallback.empty: | |
| return primary | |
| primary["_event_signature"] = _event_signature(primary) | |
| fallback["_event_signature"] = _event_signature(fallback) | |
| primary_keys = set( | |
| zip( | |
| primary["_event_signature"], | |
| primary["player_name"].fillna("").astype(str).str.lower(), | |
| primary["sportsbook_key"].fillna("").astype(str).str.lower(), | |
| primary["market_family"].fillna("").astype(str).str.lower(), | |
| primary["selection_side"].fillna("").astype(str).str.lower(), | |
| primary["line"].fillna("").astype(str), | |
| ) | |
| ) | |
| fallback["_sportsbook_key_norm"] = fallback["sportsbook_key"].fillna("").astype(str).str.lower() | |
| fallback["_player_name_norm"] = fallback["player_name"].fillna("").astype(str).str.lower() | |
| fallback["_market_norm"] = fallback["market_family"].fillna("").astype(str).str.lower() | |
| fallback["_selection_side_norm"] = fallback["selection_side"].fillna("").astype(str).str.lower() | |
| fallback["_line_norm"] = fallback["line"].fillna("").astype(str) | |
| fallback_fill = fallback[ | |
| ~fallback.apply( | |
| lambda row: ( | |
| row["_event_signature"], | |
| row["_player_name_norm"], | |
| row["_sportsbook_key_norm"], | |
| row["_market_norm"], | |
| row["_selection_side_norm"], | |
| row["_line_norm"], | |
| ) in primary_keys, | |
| axis=1, | |
| ) | |
| ].copy() | |
| if fallback_fill.empty: | |
| return primary.drop(columns=["_event_signature"]).reset_index(drop=True) | |
| merged = pd.concat( | |
| [ | |
| primary.drop(columns=["_event_signature"]), | |
| fallback_fill.drop( | |
| columns=[ | |
| "_event_signature", | |
| "_sportsbook_key_norm", | |
| "_player_name_norm", | |
| "_market_norm", | |
| "_selection_side_norm", | |
| "_line_norm", | |
| ] | |
| ), | |
| ], | |
| ignore_index=True, | |
| ) | |
| return _dedupe_props(merged) | |
| def fetch_all_upcoming_hr_props_bundle( | |
| sportsbooks: list[str] | None = None, | |
| markets: list[str] | None = None, | |
| ) -> dict[str, pd.DataFrame]: | |
| requested_books = _normalize_requested_books(sportsbooks) | |
| requested_markets = _normalize_requested_markets(markets) | |
| odds_api_provider = TheOddsAPIProvider() | |
| scraper_provider = ScrapeFallbackProvider() | |
| odds_api_raw = pd.DataFrame() | |
| scraper_raw = pd.DataFrame() | |
| try: | |
| odds_api_raw = odds_api_provider.fetch_all_upcoming_hr_props( | |
| sportsbooks=requested_books, | |
| markets=requested_markets, | |
| ) | |
| except Exception as exc: | |
| logger.warning( | |
| "[fetch_all_upcoming_hr_props_bundle] theoddsapi failure: %s", | |
| exc, | |
| exc_info=True, | |
| ) | |
| api_normalized = normalize_prop_odds(odds_api_raw) | |
| scraper_meta: dict[str, Any] = { | |
| "adapter_status_by_book": {}, | |
| "adapter_error_by_book": {}, | |
| "adapter_rows_by_book": {}, | |
| "adapter_last_attempted_at_by_book": {}, | |
| "adapter_retry_after_by_book": {}, | |
| } | |
| supplemental_needs = _book_event_hr_coverage_needs( | |
| api_normalized, | |
| requested_books=requested_books, | |
| ) | |
| supplemental_books = sorted(supplemental_needs.keys()) | |
| try: | |
| if supplemental_books: | |
| scraper_raw, scraper_meta = scraper_provider.fetch_all_upcoming_hr_props_with_meta( | |
| sportsbooks=supplemental_books, | |
| markets=["batter_home_runs"], | |
| ) | |
| else: | |
| scraper_raw = pd.DataFrame() | |
| except Exception as exc: | |
| logger.warning( | |
| "[fetch_all_upcoming_hr_props_bundle] scraper failure: %s", | |
| exc, | |
| exc_info=True, | |
| ) | |
| scraper_raw = pd.DataFrame() | |
| scraper_normalized = normalize_prop_odds(scraper_raw) | |
| if not scraper_normalized.empty and supplemental_needs.get("draftkings"): | |
| dk_missing_events = {str(event_id).strip() for event_id in supplemental_needs["draftkings"] if str(event_id).strip()} | |
| scraper_normalized = scraper_normalized[ | |
| ~scraper_normalized.get("sportsbook_key", pd.Series(dtype="object")).astype(str).str.lower().eq("draftkings") | |
| | scraper_normalized.get("event_id", pd.Series(dtype="object")).astype(str).isin(dk_missing_events) | |
| ].copy() | |
| merged = _merge_primary_and_fallback( | |
| primary_df=api_normalized, | |
| fallback_df=scraper_normalized, | |
| requested_books=requested_books, | |
| ) | |
| if not merged.empty: | |
| merged = merged.copy() | |
| merged["row_source_type"] = merged.get("provider", pd.Series(dtype="object")).apply( | |
| lambda value: "api_primary" if str(value or "").strip().lower() == "theoddsapi" else "supplemental_hr" | |
| ) | |
| scraper_added = merged[ | |
| merged.get("provider", pd.Series(dtype="object")).astype(str).str.lower() == "scrape_fallback" | |
| ].copy() if not merged.empty else pd.DataFrame() | |
| summary = _build_provider_summary(api_normalized, scraper_normalized, merged) | |
| coverage_summary_api = _build_market_book_summary(api_normalized) | |
| coverage_summary_scraper_added = _build_market_book_summary(scraper_added) | |
| coverage_summary_final = _build_market_book_summary(merged) | |
| coverage_summary_hr_api = _build_market_book_summary(_filter_market_family(api_normalized, "hr")) | |
| coverage_summary_hr_supplemental = _build_market_book_summary(_filter_market_family(scraper_added, "hr")) | |
| coverage_summary_hr_final = _build_market_book_summary(_filter_market_family(merged, "hr")) | |
| missing_books_by_market = _build_missing_books_by_market( | |
| merged, | |
| requested_books=requested_books, | |
| requested_markets=requested_markets, | |
| ) | |
| missing_event_books_by_market = _build_missing_event_books_by_market( | |
| merged, | |
| requested_books=requested_books, | |
| requested_markets=requested_markets, | |
| ) | |
| missing_hr_books_global = missing_books_by_market[ | |
| missing_books_by_market["market_family"].astype(str).str.lower() == "hr" | |
| ].copy() if not missing_books_by_market.empty else pd.DataFrame() | |
| missing_hr_books_by_event = missing_event_books_by_market[ | |
| missing_event_books_by_market["market_family"].astype(str).str.lower() == "hr" | |
| ].copy() if not missing_event_books_by_market.empty else pd.DataFrame() | |
| hr_snapshot_completeness = _build_hr_snapshot_completeness( | |
| merged, | |
| requested_books=requested_books, | |
| ) | |
| if not merged.empty: | |
| merged["coverage_completion_status"] = ( | |
| "complete" if bool(hr_snapshot_completeness.get("is_complete")) else "partial" | |
| ) | |
| merged["hr_books_requested"] = ", ".join(hr_snapshot_completeness.get("requested_books") or []) | |
| merged["hr_books_present"] = ", ".join(hr_snapshot_completeness.get("present_books") or []) | |
| merged["hr_books_missing"] = ", ".join(hr_snapshot_completeness.get("missing_books") or []) | |
| scraper_candidate_count = int(len(scraper_normalized)) | |
| scraper_added_count = int(len(scraper_added)) | |
| scraper_duplicate_reject_count = max(0, scraper_candidate_count - scraper_added_count) | |
| logger.warning( | |
| "[fetch_all_upcoming_hr_props_bundle] requested_books=%s requested_markets=%s api_rows=%d scraper_rows=%d scraper_added=%d duplicate_rejects=%d merged_rows=%d merged_books=%s", | |
| requested_books, | |
| requested_markets, | |
| len(api_normalized), | |
| len(scraper_normalized), | |
| scraper_added_count, | |
| scraper_duplicate_reject_count, | |
| len(merged), | |
| sorted(merged["sportsbook"].dropna().unique().tolist()) if not merged.empty else [], | |
| ) | |
| return { | |
| "odds_api_raw": api_normalized.reset_index(drop=True), | |
| "scraper_raw": scraper_normalized.reset_index(drop=True), | |
| "merged_props_feed": merged.reset_index(drop=True), | |
| "coverage_summary": summary, | |
| "coverage_summary_api": coverage_summary_api.reset_index(drop=True), | |
| "coverage_summary_scraper_added": coverage_summary_scraper_added.reset_index(drop=True), | |
| "coverage_summary_final": coverage_summary_final.reset_index(drop=True), | |
| "coverage_summary_hr_api": coverage_summary_hr_api.reset_index(drop=True), | |
| "coverage_summary_hr_supplemental": coverage_summary_hr_supplemental.reset_index(drop=True), | |
| "coverage_summary_hr_final": coverage_summary_hr_final.reset_index(drop=True), | |
| "missing_books_by_market": missing_books_by_market.reset_index(drop=True), | |
| "missing_event_books_by_market": missing_event_books_by_market.reset_index(drop=True), | |
| "missing_hr_books_global": missing_hr_books_global.reset_index(drop=True), | |
| "missing_hr_books_by_event": missing_hr_books_by_event.reset_index(drop=True), | |
| "hr_snapshot_completeness": hr_snapshot_completeness, | |
| "adapter_status_by_book": dict(scraper_meta.get("adapter_status_by_book") or {}), | |
| "adapter_error_by_book": dict(scraper_meta.get("adapter_error_by_book") or {}), | |
| "adapter_rows_by_book": dict(scraper_meta.get("adapter_rows_by_book") or {}), | |
| "adapter_last_attempted_at_by_book": dict(scraper_meta.get("adapter_last_attempted_at_by_book") or {}), | |
| "adapter_retry_after_by_book": dict(scraper_meta.get("adapter_retry_after_by_book") or {}), | |
| "scraper_candidate_count": scraper_candidate_count, | |
| "scraper_added_count": scraper_added_count, | |
| "scraper_duplicate_reject_count": scraper_duplicate_reject_count, | |
| } | |
| def fetch_upcoming_props_coverage_probe( | |
| sportsbooks: list[str] | None = None, | |
| markets: list[str] | None = None, | |
| max_events: int = 5, | |
| ) -> dict[str, pd.DataFrame]: | |
| requested_books = _normalize_requested_books(sportsbooks) | |
| probe_books = requested_books.copy() | |
| if "williamhill_us" not in probe_books: | |
| probe_books.append("williamhill_us") | |
| provider = TheOddsAPIProvider() | |
| try: | |
| raw = provider.fetch_upcoming_market_coverage_probe( | |
| sportsbooks=probe_books, | |
| markets=markets, | |
| max_events=max_events, | |
| ) | |
| except Exception as exc: | |
| logger.warning( | |
| "[fetch_upcoming_props_coverage_probe] failure: %s", | |
| exc, | |
| exc_info=True, | |
| ) | |
| raw = pd.DataFrame() | |
| if raw.empty: | |
| summary = pd.DataFrame( | |
| columns=[ | |
| "sportsbook", | |
| "market_key", | |
| "events_checked", | |
| "events_with_data", | |
| "rows_with_data", | |
| ] | |
| ) | |
| else: | |
| summary = ( | |
| raw.groupby(["sportsbook", "market_key"], dropna=False) | |
| .agg( | |
| events_checked=("event_id", "nunique"), | |
| events_with_data=("has_data", lambda s: int(pd.Series(s).astype(bool).sum())), | |
| rows_with_data=("outcomes_returned", lambda s: int((pd.to_numeric(s, errors="coerce").fillna(0) > 0).sum())), | |
| ) | |
| .reset_index() | |
| .sort_values(["market_key", "sportsbook"], na_position="last") | |
| .reset_index(drop=True) | |
| ) | |
| return { | |
| "coverage_probe_raw": raw.reset_index(drop=True), | |
| "coverage_probe_summary": summary, | |
| } | |
| def best_book_by_player_market(df: pd.DataFrame) -> pd.DataFrame: | |
| if df.empty: | |
| return df | |
| rows = [] | |
| grouped = df.groupby(["player_name", "market"], dropna=False) | |
| for (_, _), group in grouped: | |
| if group.empty: | |
| continue | |
| best_idx = None | |
| best_score = None | |
| for idx, row in group.iterrows(): | |
| odds = int(row["odds_american"]) | |
| score = odds if odds > 0 else -abs(odds) | |
| if best_idx is None or score > best_score: | |
| best_idx = idx | |
| best_score = score | |
| if best_idx is not None: | |
| rows.append(group.loc[best_idx].to_dict()) | |
| return pd.DataFrame(rows) | |
| def fetch_all_upcoming_hr_props( | |
| sportsbooks: list[str] | None = None, | |
| markets: list[str] | None = None, | |
| ) -> pd.DataFrame: | |
| """ | |
| Fetch HR props for all upcoming MLB games, using The Odds API as primary | |
| and scraper fallback only to fill missing coverage. | |
| """ | |
| bundle = fetch_all_upcoming_hr_props_bundle(sportsbooks=sportsbooks, markets=markets) | |
| return bundle["merged_props_feed"] | |
| def fetch_live_prop_odds( | |
| game_context: dict, | |
| sportsbooks: list[str] | None = None, | |
| markets: list[str] | None = None, | |
| ) -> pd.DataFrame: | |
| providers = [] | |
| if ENABLE_ENTERPRISE_PROVIDER: | |
| providers.append(EnterpriseMarketProvider()) | |
| providers.append(TheOddsAPIProvider()) | |
| frames = [] | |
| for provider in providers: | |
| try: | |
| df = provider.fetch_live_prop_odds( | |
| game_context=game_context, | |
| sportsbooks=sportsbooks, | |
| markets=markets, | |
| ) | |
| if not df.empty: | |
| frames.append(df) | |
| except Exception as exc: | |
| logger.warning("[fetch_live_prop_odds] provider failure: %s", exc) | |
| continue | |
| if not frames: | |
| return pd.DataFrame() | |
| merged = pd.concat(frames, ignore_index=True) | |
| return normalize_prop_odds(merged) | |