2026_MLB_Model / data /live_prop_odds.py
Syntrex's picture
Odds tab: add pitcher strikeout alt lines with Standard/Alt Lines/Both view toggle
17be445
raw
history blame
31.5 kB
from __future__ import annotations
import re
from typing import Any
import pandas as pd
from config.settings import DEFAULT_UPCOMING_PROP_MARKETS, ENABLE_ENTERPRISE_PROVIDER
from data.provider_enterprise import EnterpriseMarketProvider
from data.provider_scrape import ScrapeFallbackProvider
from data.provider_theoddsapi import TheOddsAPIProvider
from utils.logger import logger
def normalize_prop_odds(raw_df: pd.DataFrame) -> pd.DataFrame:
if raw_df.empty:
return raw_df
out = raw_df.copy()
required_cols = [
"provider",
"event_id",
"commence_time",
"away_team",
"home_team",
"sportsbook",
"sportsbook_key",
"market",
"player_name_raw",
"player_name",
"odds_american",
"line",
"selection_label",
"selection_scope",
"selection_side",
"market_family",
"market_variant",
"threshold",
"display_label",
"is_primary_line",
"is_modeled",
"player_event_market_key",
]
for col in required_cols:
if col not in out.columns:
out[col] = None
def _safe_float(val):
try:
if pd.isna(val):
return None
return float(val)
except Exception:
return None
def _infer_threshold(row: pd.Series) -> int | None:
existing = row.get("threshold")
if pd.notna(existing):
try:
return int(existing)
except Exception:
pass
selection_label = str(row.get("selection_label") or "").strip()
if selection_label:
match = re.search(r"(\d+)\s*\+", selection_label)
if match:
return int(match.group(1))
market_text = " ".join(
[
str(row.get("market_key") or ""),
str(row.get("market") or ""),
str(row.get("player_name_raw") or ""),
]
)
match = re.search(r"(\d+)\s*\+", market_text)
if match:
return int(match.group(1))
line = _safe_float(row.get("line"))
if line is None:
return 1
if line >= 0:
return max(1, int(line) + 1)
return 1
def _infer_market_family(row: pd.Series) -> str:
existing = str(row.get("market_family") or "").strip().lower()
if existing:
return existing
market = str(row.get("market") or "").strip().lower()
if market in {"hr", "hit", "tb", "k", "no_hr"}:
return market
market_key = str(row.get("market_key") or "").strip().lower()
if "home_run" in market_key:
if "no" in market_key:
return "no_hr"
return "hr"
if "strikeout" in market_key:
return "k"
if "hits" in market_key:
return "hit"
if "total_bases" in market_key:
return "tb"
return market or market_key or "unknown"
def _infer_selection_side(row: pd.Series) -> str | None:
existing = str(row.get("selection_side") or "").strip().lower()
if existing:
return existing
label = str(row.get("selection_label") or "").strip().lower()
if label.startswith("over"):
return "over"
if label.startswith("under"):
return "under"
return None
out["market_family"] = out.apply(_infer_market_family, axis=1)
out["selection_side"] = out.apply(_infer_selection_side, axis=1)
hr_mask = out["market_family"].eq("hr")
k_mask = out["market_family"].eq("k")
no_hr_mask = out["market_family"].eq("no_hr")
out.loc[hr_mask, "threshold"] = out.loc[hr_mask].apply(_infer_threshold, axis=1)
out.loc[~hr_mask, "threshold"] = out.loc[~hr_mask, "threshold"].where(
out.loc[~hr_mask, "threshold"].notna(),
None,
)
out.loc[k_mask, "threshold"] = out.loc[k_mask, "line"].apply(_safe_float)
out.loc[hr_mask, "market_variant"] = out.loc[hr_mask, "threshold"].apply(
lambda v: "primary" if pd.notna(v) and int(v) == 1 else "alternate"
)
out.loc[hr_mask, "display_label"] = out.loc[hr_mask, "threshold"].apply(
lambda v: f"{int(v)}+ HR" if pd.notna(v) else "HR"
)
out.loc[hr_mask, "is_primary_line"] = out.loc[hr_mask, "threshold"].apply(
lambda v: bool(pd.notna(v) and int(v) == 1)
)
out.loc[hr_mask, "is_modeled"] = out.loc[hr_mask, "threshold"].apply(
lambda v: bool(pd.notna(v) and int(v) == 1)
)
out.loc[~hr_mask, "market_variant"] = out.loc[~hr_mask, "market_variant"].where(
out.loc[~hr_mask, "market_variant"].notna(),
"standard",
)
if "market_key" in out.columns:
k_alt_mask = k_mask & out["market_key"].astype(str).str.strip().eq("pitcher_strikeouts_alternate")
out.loc[k_alt_mask, "market_variant"] = "alternate"
out.loc[k_mask, "selection_scope"] = out.loc[k_mask, "selection_scope"].where(
out.loc[k_mask, "selection_scope"].notna(),
"pitcher",
)
out.loc[k_mask, "display_label"] = out.loc[k_mask].apply(
lambda row: (
f"{str(row.get('selection_side') or '').capitalize()} {str(row.get('line') or '').rstrip('0').rstrip('.')} K"
if str(row.get("selection_side") or "").strip()
else f"{str(row.get('line') or '').rstrip('0').rstrip('.')} K"
),
axis=1,
)
out.loc[k_mask, "is_primary_line"] = out.loc[k_mask, "market_variant"].apply(
lambda v: v != "alternate"
)
out.loc[k_mask, "is_modeled"] = out.loc[k_mask, "selection_side"].isin(["over", "under"])
out.loc[no_hr_mask, "selection_scope"] = out.loc[no_hr_mask, "selection_scope"].where(
out.loc[no_hr_mask, "selection_scope"].notna(),
"game",
)
out.loc[no_hr_mask, "display_label"] = out.loc[no_hr_mask, "display_label"].where(
out.loc[no_hr_mask, "display_label"].notna(),
"No Home Run",
)
out.loc[no_hr_mask, "is_primary_line"] = True
other_mask = ~(hr_mask | k_mask | no_hr_mask)
out.loc[other_mask, "display_label"] = out.loc[other_mask, "display_label"].where(
out.loc[other_mask, "display_label"].notna(),
out.loc[other_mask, "market_family"].astype(str).str.upper(),
)
out.loc[~hr_mask, "is_primary_line"] = out.loc[~hr_mask, "is_primary_line"].where(
out.loc[~hr_mask, "is_primary_line"].notna(),
True,
)
out.loc[~hr_mask, "is_modeled"] = out.loc[~hr_mask, "is_modeled"].where(
out.loc[~hr_mask, "is_modeled"].notna(),
False,
)
out["player_event_market_key"] = (
out["event_id"].fillna("").astype(str).str.strip()
+ "|"
+ out["player_name"].fillna("").astype(str).str.strip().str.lower()
+ "|"
+ out["market_family"].fillna("").astype(str).str.strip().str.lower()
+ "|"
+ out["threshold"].fillna("").astype(str).str.strip()
+ "|"
+ out["selection_side"].fillna("").astype(str).str.strip().str.lower()
+ "|"
+ out["line"].fillna("").astype(str).str.strip()
)
return out
def _normalize_requested_books(sportsbooks: list[str] | None) -> list[str]:
default_books = ["draftkings", "fanduel", "betmgm", "williamhill_us"]
books = sportsbooks or default_books
normalized: list[str] = []
for book in books:
book_key = str(book or "").strip().lower()
if book_key and book_key not in normalized:
normalized.append(book_key)
return normalized
def _normalize_requested_markets(markets: list[str] | None) -> list[str]:
normalized: list[str] = []
for market in (markets or DEFAULT_UPCOMING_PROP_MARKETS):
market_key = str(market or "").strip().lower()
if market_key and market_key not in normalized:
normalized.append(market_key)
return normalized
def _requested_market_families(markets: list[str] | None) -> list[str]:
families: list[str] = []
for market_key in _normalize_requested_markets(markets):
family = ""
if "home_run" in market_key:
family = "hr"
elif "strikeout" in market_key:
family = "k"
elif "hits" in market_key:
family = "hit"
elif "total_bases" in market_key:
family = "tb"
if family and family not in families:
families.append(family)
return families
def _event_signature(df: pd.DataFrame) -> pd.Series:
away = df.get("away_team", pd.Series(index=df.index, dtype="object")).fillna("").astype(str).str.strip().str.lower()
home = df.get("home_team", pd.Series(index=df.index, dtype="object")).fillna("").astype(str).str.strip().str.lower()
commence = df.get("commence_time", pd.Series(index=df.index, dtype="object")).fillna("").astype(str).str.strip().str.lower()
return away + "|" + home + "|" + commence
def _dedupe_props(df: pd.DataFrame) -> pd.DataFrame:
if df.empty:
return df
out = normalize_prop_odds(df)
out["_odds_score"] = out["odds_american"].apply(
lambda x: int(x) if pd.notna(x) else -9999
)
out["_event_signature"] = _event_signature(out)
out = (
out.sort_values("_odds_score", ascending=False)
.drop_duplicates(
subset=[
"_event_signature",
"player_name",
"sportsbook_key",
"market_family",
"threshold",
"selection_side",
"line",
],
keep="first",
)
.drop(columns=["_odds_score", "_event_signature"])
.reset_index(drop=True)
)
return out
def _build_provider_summary(*frames: pd.DataFrame) -> pd.DataFrame:
normalized_frames = []
for df in frames:
if df is None or df.empty:
continue
frame = normalize_prop_odds(df)
frame["_event_signature"] = _event_signature(frame)
normalized_frames.append(frame)
if not normalized_frames:
return pd.DataFrame(
columns=["provider", "sportsbook", "rows", "unique_events", "unique_players"]
)
summary_source = pd.concat(normalized_frames, ignore_index=True)
return (
summary_source.groupby(["provider", "sportsbook"], dropna=False)
.agg(
rows=("player_name", "size"),
unique_events=("_event_signature", "nunique"),
unique_players=("player_name", "nunique"),
)
.reset_index()
.sort_values(["provider", "sportsbook"], na_position="last")
.reset_index(drop=True)
)
def _build_market_book_summary(df: pd.DataFrame) -> pd.DataFrame:
if df is None or df.empty:
return pd.DataFrame(
columns=["market_family", "sportsbook", "rows", "unique_events", "unique_players"]
)
working = normalize_prop_odds(df)
working["_event_signature"] = _event_signature(working)
return (
working.groupby(["market_family", "sportsbook"], dropna=False)
.agg(
rows=("player_name", "size"),
unique_events=("_event_signature", "nunique"),
unique_players=("player_name", "nunique"),
)
.reset_index()
.sort_values(["market_family", "sportsbook"], na_position="last")
.reset_index(drop=True)
)
def _filter_market_family(df: pd.DataFrame, market_family: str) -> pd.DataFrame:
if df is None or df.empty:
return pd.DataFrame()
working = normalize_prop_odds(df)
if "market_family" not in working.columns:
return pd.DataFrame()
return working[
working["market_family"].astype(str).str.strip().str.lower() == str(market_family or "").strip().lower()
].copy()
def _build_missing_books_by_market(
df: pd.DataFrame,
*,
requested_books: list[str],
requested_markets: list[str],
) -> pd.DataFrame:
market_families = _requested_market_families(requested_markets)
if not market_families:
return pd.DataFrame(
columns=["market_family", "available_books", "missing_books", "available_count", "missing_count"]
)
working = normalize_prop_odds(df) if df is not None else pd.DataFrame()
rows: list[dict[str, Any]] = []
requested_book_set = {str(book).strip().lower() for book in requested_books if str(book).strip()}
for market_family in market_families:
market_rows = working[
working.get("market_family", pd.Series(dtype="object")).astype(str).str.lower() == market_family
].copy() if not working.empty else pd.DataFrame()
available_books = sorted(
{
str(book).strip().lower()
for book in market_rows.get("sportsbook_key", pd.Series(dtype="object")).dropna().astype(str).tolist()
if str(book).strip()
}
)
missing_books = sorted(requested_book_set - set(available_books))
rows.append(
{
"market_family": market_family,
"available_books": ", ".join(available_books),
"missing_books": ", ".join(missing_books),
"available_count": len(available_books),
"missing_count": len(missing_books),
}
)
return pd.DataFrame(rows)
def _build_missing_event_books_by_market(
df: pd.DataFrame,
*,
requested_books: list[str],
requested_markets: list[str],
) -> pd.DataFrame:
market_families = _requested_market_families(requested_markets)
if df is None or df.empty or not market_families:
return pd.DataFrame(
columns=["event_id", "away_team", "home_team", "commence_time", "market_family", "available_books", "missing_books", "available_count", "missing_count"]
)
working = normalize_prop_odds(df)
group_cols = [c for c in ["event_id", "away_team", "home_team", "commence_time"] if c in working.columns]
if not group_cols:
return pd.DataFrame()
requested_book_set = {str(book).strip().lower() for book in requested_books if str(book).strip()}
rows: list[dict[str, Any]] = []
for key, event_df in working.groupby(group_cols, dropna=False):
if not isinstance(key, tuple):
key = (key,)
key_map = dict(zip(group_cols, key))
for market_family in market_families:
market_rows = event_df[event_df["market_family"].astype(str).str.lower() == market_family].copy()
available_books = sorted(
{
str(book).strip().lower()
for book in market_rows.get("sportsbook_key", pd.Series(dtype="object")).dropna().astype(str).tolist()
if str(book).strip()
}
)
missing_books = sorted(requested_book_set - set(available_books))
rows.append(
{
**key_map,
"market_family": market_family,
"available_books": ", ".join(available_books),
"missing_books": ", ".join(missing_books),
"available_count": len(available_books),
"missing_count": len(missing_books),
}
)
return pd.DataFrame(rows).sort_values(["market_family", "event_id"], na_position="last").reset_index(drop=True)
def _build_hr_snapshot_completeness(
merged_df: pd.DataFrame,
*,
requested_books: list[str],
) -> dict[str, Any]:
hr_df = _filter_market_family(merged_df, "hr")
requested = sorted({str(book).strip().lower() for book in requested_books if str(book).strip()})
present = sorted(
{
str(book).strip().lower()
for book in hr_df.get("sportsbook_key", pd.Series(dtype="object")).dropna().astype(str).tolist()
if str(book).strip()
}
)
missing = sorted(set(requested) - set(present))
return {
"market_family": "hr",
"requested_books": requested,
"present_books": present,
"missing_books": missing,
"requested_count": len(requested),
"present_count": len(present),
"missing_count": len(missing),
"is_complete": len(missing) == 0,
"row_count": int(len(hr_df)),
"event_count": int(hr_df["event_id"].nunique()) if not hr_df.empty and "event_id" in hr_df.columns else 0,
}
def _book_event_hr_coverage_needs(
api_df: pd.DataFrame,
*,
requested_books: list[str],
) -> dict[str, list[str]]:
requested = {str(book).strip().lower() for book in requested_books if str(book).strip()}
needs: dict[str, list[str]] = {}
if "draftkings" not in requested:
return needs
if api_df is None or api_df.empty or "event_id" not in api_df.columns:
return {"draftkings": []}
working = normalize_prop_odds(api_df)
all_event_ids = [
str(event_id).strip()
for event_id in working.get("event_id", pd.Series(dtype="object")).dropna().astype(str).tolist()
if str(event_id).strip()
]
if not all_event_ids:
return {"draftkings": []}
hr_rows = working[
working.get("market_family", pd.Series(dtype="object")).astype(str).str.lower() == "hr"
].copy()
dk_hr_events = {
str(event_id).strip()
for event_id in hr_rows.loc[
hr_rows.get("sportsbook_key", pd.Series(dtype="object")).astype(str).str.lower() == "draftkings",
"event_id",
].dropna().astype(str).tolist()
if str(event_id).strip()
}
missing_event_ids = sorted(set(all_event_ids) - dk_hr_events)
if missing_event_ids:
needs["draftkings"] = missing_event_ids
return needs
def _merge_primary_and_fallback(
primary_df: pd.DataFrame,
fallback_df: pd.DataFrame,
requested_books: list[str],
) -> pd.DataFrame:
del requested_books
primary = _dedupe_props(primary_df)
fallback = _dedupe_props(fallback_df)
if primary.empty:
return fallback
if fallback.empty:
return primary
primary["_event_signature"] = _event_signature(primary)
fallback["_event_signature"] = _event_signature(fallback)
primary_keys = set(
zip(
primary["_event_signature"],
primary["player_name"].fillna("").astype(str).str.lower(),
primary["sportsbook_key"].fillna("").astype(str).str.lower(),
primary["market_family"].fillna("").astype(str).str.lower(),
primary["selection_side"].fillna("").astype(str).str.lower(),
primary["line"].fillna("").astype(str),
)
)
fallback["_sportsbook_key_norm"] = fallback["sportsbook_key"].fillna("").astype(str).str.lower()
fallback["_player_name_norm"] = fallback["player_name"].fillna("").astype(str).str.lower()
fallback["_market_norm"] = fallback["market_family"].fillna("").astype(str).str.lower()
fallback["_selection_side_norm"] = fallback["selection_side"].fillna("").astype(str).str.lower()
fallback["_line_norm"] = fallback["line"].fillna("").astype(str)
fallback_fill = fallback[
~fallback.apply(
lambda row: (
row["_event_signature"],
row["_player_name_norm"],
row["_sportsbook_key_norm"],
row["_market_norm"],
row["_selection_side_norm"],
row["_line_norm"],
) in primary_keys,
axis=1,
)
].copy()
if fallback_fill.empty:
return primary.drop(columns=["_event_signature"]).reset_index(drop=True)
merged = pd.concat(
[
primary.drop(columns=["_event_signature"]),
fallback_fill.drop(
columns=[
"_event_signature",
"_sportsbook_key_norm",
"_player_name_norm",
"_market_norm",
"_selection_side_norm",
"_line_norm",
]
),
],
ignore_index=True,
)
return _dedupe_props(merged)
def fetch_all_upcoming_hr_props_bundle(
sportsbooks: list[str] | None = None,
markets: list[str] | None = None,
) -> dict[str, pd.DataFrame]:
requested_books = _normalize_requested_books(sportsbooks)
requested_markets = _normalize_requested_markets(markets)
odds_api_provider = TheOddsAPIProvider()
scraper_provider = ScrapeFallbackProvider()
odds_api_raw = pd.DataFrame()
scraper_raw = pd.DataFrame()
try:
odds_api_raw = odds_api_provider.fetch_all_upcoming_hr_props(
sportsbooks=requested_books,
markets=requested_markets,
)
except Exception as exc:
logger.warning(
"[fetch_all_upcoming_hr_props_bundle] theoddsapi failure: %s",
exc,
exc_info=True,
)
api_normalized = normalize_prop_odds(odds_api_raw)
scraper_meta: dict[str, Any] = {
"adapter_status_by_book": {},
"adapter_error_by_book": {},
"adapter_rows_by_book": {},
"adapter_last_attempted_at_by_book": {},
"adapter_retry_after_by_book": {},
}
supplemental_needs = _book_event_hr_coverage_needs(
api_normalized,
requested_books=requested_books,
)
supplemental_books = sorted(supplemental_needs.keys())
try:
if supplemental_books:
scraper_raw, scraper_meta = scraper_provider.fetch_all_upcoming_hr_props_with_meta(
sportsbooks=supplemental_books,
markets=["batter_home_runs"],
)
else:
scraper_raw = pd.DataFrame()
except Exception as exc:
logger.warning(
"[fetch_all_upcoming_hr_props_bundle] scraper failure: %s",
exc,
exc_info=True,
)
scraper_raw = pd.DataFrame()
scraper_normalized = normalize_prop_odds(scraper_raw)
if not scraper_normalized.empty and supplemental_needs.get("draftkings"):
dk_missing_events = {str(event_id).strip() for event_id in supplemental_needs["draftkings"] if str(event_id).strip()}
scraper_normalized = scraper_normalized[
~scraper_normalized.get("sportsbook_key", pd.Series(dtype="object")).astype(str).str.lower().eq("draftkings")
| scraper_normalized.get("event_id", pd.Series(dtype="object")).astype(str).isin(dk_missing_events)
].copy()
merged = _merge_primary_and_fallback(
primary_df=api_normalized,
fallback_df=scraper_normalized,
requested_books=requested_books,
)
if not merged.empty:
merged = merged.copy()
merged["row_source_type"] = merged.get("provider", pd.Series(dtype="object")).apply(
lambda value: "api_primary" if str(value or "").strip().lower() == "theoddsapi" else "supplemental_hr"
)
scraper_added = merged[
merged.get("provider", pd.Series(dtype="object")).astype(str).str.lower() == "scrape_fallback"
].copy() if not merged.empty else pd.DataFrame()
summary = _build_provider_summary(api_normalized, scraper_normalized, merged)
coverage_summary_api = _build_market_book_summary(api_normalized)
coverage_summary_scraper_added = _build_market_book_summary(scraper_added)
coverage_summary_final = _build_market_book_summary(merged)
coverage_summary_hr_api = _build_market_book_summary(_filter_market_family(api_normalized, "hr"))
coverage_summary_hr_supplemental = _build_market_book_summary(_filter_market_family(scraper_added, "hr"))
coverage_summary_hr_final = _build_market_book_summary(_filter_market_family(merged, "hr"))
missing_books_by_market = _build_missing_books_by_market(
merged,
requested_books=requested_books,
requested_markets=requested_markets,
)
missing_event_books_by_market = _build_missing_event_books_by_market(
merged,
requested_books=requested_books,
requested_markets=requested_markets,
)
missing_hr_books_global = missing_books_by_market[
missing_books_by_market["market_family"].astype(str).str.lower() == "hr"
].copy() if not missing_books_by_market.empty else pd.DataFrame()
missing_hr_books_by_event = missing_event_books_by_market[
missing_event_books_by_market["market_family"].astype(str).str.lower() == "hr"
].copy() if not missing_event_books_by_market.empty else pd.DataFrame()
hr_snapshot_completeness = _build_hr_snapshot_completeness(
merged,
requested_books=requested_books,
)
if not merged.empty:
merged["coverage_completion_status"] = (
"complete" if bool(hr_snapshot_completeness.get("is_complete")) else "partial"
)
merged["hr_books_requested"] = ", ".join(hr_snapshot_completeness.get("requested_books") or [])
merged["hr_books_present"] = ", ".join(hr_snapshot_completeness.get("present_books") or [])
merged["hr_books_missing"] = ", ".join(hr_snapshot_completeness.get("missing_books") or [])
scraper_candidate_count = int(len(scraper_normalized))
scraper_added_count = int(len(scraper_added))
scraper_duplicate_reject_count = max(0, scraper_candidate_count - scraper_added_count)
logger.warning(
"[fetch_all_upcoming_hr_props_bundle] requested_books=%s requested_markets=%s api_rows=%d scraper_rows=%d scraper_added=%d duplicate_rejects=%d merged_rows=%d merged_books=%s",
requested_books,
requested_markets,
len(api_normalized),
len(scraper_normalized),
scraper_added_count,
scraper_duplicate_reject_count,
len(merged),
sorted(merged["sportsbook"].dropna().unique().tolist()) if not merged.empty else [],
)
return {
"odds_api_raw": api_normalized.reset_index(drop=True),
"scraper_raw": scraper_normalized.reset_index(drop=True),
"merged_props_feed": merged.reset_index(drop=True),
"coverage_summary": summary,
"coverage_summary_api": coverage_summary_api.reset_index(drop=True),
"coverage_summary_scraper_added": coverage_summary_scraper_added.reset_index(drop=True),
"coverage_summary_final": coverage_summary_final.reset_index(drop=True),
"coverage_summary_hr_api": coverage_summary_hr_api.reset_index(drop=True),
"coverage_summary_hr_supplemental": coverage_summary_hr_supplemental.reset_index(drop=True),
"coverage_summary_hr_final": coverage_summary_hr_final.reset_index(drop=True),
"missing_books_by_market": missing_books_by_market.reset_index(drop=True),
"missing_event_books_by_market": missing_event_books_by_market.reset_index(drop=True),
"missing_hr_books_global": missing_hr_books_global.reset_index(drop=True),
"missing_hr_books_by_event": missing_hr_books_by_event.reset_index(drop=True),
"hr_snapshot_completeness": hr_snapshot_completeness,
"adapter_status_by_book": dict(scraper_meta.get("adapter_status_by_book") or {}),
"adapter_error_by_book": dict(scraper_meta.get("adapter_error_by_book") or {}),
"adapter_rows_by_book": dict(scraper_meta.get("adapter_rows_by_book") or {}),
"adapter_last_attempted_at_by_book": dict(scraper_meta.get("adapter_last_attempted_at_by_book") or {}),
"adapter_retry_after_by_book": dict(scraper_meta.get("adapter_retry_after_by_book") or {}),
"scraper_candidate_count": scraper_candidate_count,
"scraper_added_count": scraper_added_count,
"scraper_duplicate_reject_count": scraper_duplicate_reject_count,
}
def fetch_upcoming_props_coverage_probe(
sportsbooks: list[str] | None = None,
markets: list[str] | None = None,
max_events: int = 5,
) -> dict[str, pd.DataFrame]:
requested_books = _normalize_requested_books(sportsbooks)
probe_books = requested_books.copy()
if "williamhill_us" not in probe_books:
probe_books.append("williamhill_us")
provider = TheOddsAPIProvider()
try:
raw = provider.fetch_upcoming_market_coverage_probe(
sportsbooks=probe_books,
markets=markets,
max_events=max_events,
)
except Exception as exc:
logger.warning(
"[fetch_upcoming_props_coverage_probe] failure: %s",
exc,
exc_info=True,
)
raw = pd.DataFrame()
if raw.empty:
summary = pd.DataFrame(
columns=[
"sportsbook",
"market_key",
"events_checked",
"events_with_data",
"rows_with_data",
]
)
else:
summary = (
raw.groupby(["sportsbook", "market_key"], dropna=False)
.agg(
events_checked=("event_id", "nunique"),
events_with_data=("has_data", lambda s: int(pd.Series(s).astype(bool).sum())),
rows_with_data=("outcomes_returned", lambda s: int((pd.to_numeric(s, errors="coerce").fillna(0) > 0).sum())),
)
.reset_index()
.sort_values(["market_key", "sportsbook"], na_position="last")
.reset_index(drop=True)
)
return {
"coverage_probe_raw": raw.reset_index(drop=True),
"coverage_probe_summary": summary,
}
def best_book_by_player_market(df: pd.DataFrame) -> pd.DataFrame:
if df.empty:
return df
rows = []
grouped = df.groupby(["player_name", "market"], dropna=False)
for (_, _), group in grouped:
if group.empty:
continue
best_idx = None
best_score = None
for idx, row in group.iterrows():
odds = int(row["odds_american"])
score = odds if odds > 0 else -abs(odds)
if best_idx is None or score > best_score:
best_idx = idx
best_score = score
if best_idx is not None:
rows.append(group.loc[best_idx].to_dict())
return pd.DataFrame(rows)
def fetch_all_upcoming_hr_props(
sportsbooks: list[str] | None = None,
markets: list[str] | None = None,
) -> pd.DataFrame:
"""
Fetch HR props for all upcoming MLB games, using The Odds API as primary
and scraper fallback only to fill missing coverage.
"""
bundle = fetch_all_upcoming_hr_props_bundle(sportsbooks=sportsbooks, markets=markets)
return bundle["merged_props_feed"]
def fetch_live_prop_odds(
game_context: dict,
sportsbooks: list[str] | None = None,
markets: list[str] | None = None,
) -> pd.DataFrame:
providers = []
if ENABLE_ENTERPRISE_PROVIDER:
providers.append(EnterpriseMarketProvider())
providers.append(TheOddsAPIProvider())
frames = []
for provider in providers:
try:
df = provider.fetch_live_prop_odds(
game_context=game_context,
sportsbooks=sportsbooks,
markets=markets,
)
if not df.empty:
frames.append(df)
except Exception as exc:
logger.warning("[fetch_live_prop_odds] provider failure: %s", exc)
continue
if not frames:
return pd.DataFrame()
merged = pd.concat(frames, ignore_index=True)
return normalize_prop_odds(merged)