Syntrex Claude Sonnet 4.6 commited on
Commit
0ff601a
·
1 Parent(s): 5a49085

Add multi-book scrape fallback for HR props + restore Odds API market key

Browse files

The Odds API indexes h2h first, player props later (24-48h before game
time). FanDuel/DK have props posted but Odds API hasn't crawled them yet.

Changes:
- provider_theoddsapi.py: restore markets=batter_home_runs (correct key),
remove DIAGNOSTIC block (served its purpose)
- provider_scrape.py (new): ScrapeFallbackProvider with independent
fetchers for DraftKings, FanDuel, BetMGM, and Caesars using their
semi-public JSON APIs. Each book's failure is caught independently.
DraftKings uses dynamic category discovery (survives ID changes).
- live_prop_odds.py: change from concat-all to stop-at-first-success
chaining. Register ScrapeFallbackProvider after TheOddsAPIProvider.
Scraper only runs when Odds API returns empty.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

data/live_prop_odds.py CHANGED
@@ -4,6 +4,7 @@ import pandas as pd
4
 
5
  from config.settings import ENABLE_ENTERPRISE_PROVIDER
6
  from data.provider_enterprise import EnterpriseMarketProvider
 
7
  from data.provider_theoddsapi import TheOddsAPIProvider
8
  from utils.logger import logger
9
 
@@ -76,8 +77,8 @@ def fetch_all_upcoming_hr_props(
76
  providers.append(EnterpriseMarketProvider())
77
 
78
  providers.append(TheOddsAPIProvider())
 
79
 
80
- frames = []
81
  for provider in providers:
82
  try:
83
  fetch_fn = getattr(provider, "fetch_all_upcoming_hr_props", None)
@@ -85,16 +86,12 @@ def fetch_all_upcoming_hr_props(
85
  continue
86
  df = fetch_fn(sportsbooks=sportsbooks)
87
  if not df.empty:
88
- frames.append(df)
89
  except Exception as e:
90
  logger.warning(f"[odds_provider_fetch] failure: {e}", exc_info=True)
91
  continue
92
 
93
- if not frames:
94
- return pd.DataFrame()
95
-
96
- merged = pd.concat(frames, ignore_index=True)
97
- return normalize_prop_odds(merged)
98
 
99
 
100
  def fetch_live_prop_odds(
 
4
 
5
  from config.settings import ENABLE_ENTERPRISE_PROVIDER
6
  from data.provider_enterprise import EnterpriseMarketProvider
7
+ from data.provider_scrape import ScrapeFallbackProvider
8
  from data.provider_theoddsapi import TheOddsAPIProvider
9
  from utils.logger import logger
10
 
 
77
  providers.append(EnterpriseMarketProvider())
78
 
79
  providers.append(TheOddsAPIProvider())
80
+ providers.append(ScrapeFallbackProvider()) # fallback if Odds API returns empty
81
 
 
82
  for provider in providers:
83
  try:
84
  fetch_fn = getattr(provider, "fetch_all_upcoming_hr_props", None)
 
86
  continue
87
  df = fetch_fn(sportsbooks=sportsbooks)
88
  if not df.empty:
89
+ return normalize_prop_odds(df) # stop at first provider that returns data
90
  except Exception as e:
91
  logger.warning(f"[odds_provider_fetch] failure: {e}", exc_info=True)
92
  continue
93
 
94
+ return pd.DataFrame()
 
 
 
 
95
 
96
 
97
  def fetch_live_prop_odds(
data/provider_scrape.py ADDED
@@ -0,0 +1,334 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ data/provider_scrape.py
3
+
4
+ Fallback scraper for HR props when The Odds API hasn't yet indexed player props.
5
+ Hits each book's semi-public JSON API directly using requests only (no browser).
6
+
7
+ Books: DraftKings, FanDuel, BetMGM, Caesars
8
+ Each book's fetch is independent — one failure does not block the others.
9
+ Results are concatenated across all books that respond successfully.
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import logging
14
+ from typing import Any
15
+
16
+ import pandas as pd
17
+ import requests
18
+
19
+ from data.market_provider_base import MarketProviderBase
20
+ from data.odds_name_map import map_odds_name_to_model_name
21
+
22
+ _log = logging.getLogger(__name__)
23
+
24
+ _HEADERS = {
25
+ "User-Agent": (
26
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
27
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
28
+ "Chrome/120.0.0.0 Safari/537.36"
29
+ ),
30
+ "Accept": "application/json, text/plain, */*",
31
+ "Accept-Language": "en-US,en;q=0.9",
32
+ }
33
+
34
+ _DK_BASE = "https://sportsbook-nash.draftkings.com/api/odds/v1/league/84240"
35
+ _FD_BASE = "https://sbapi.il.sportsbook.fanduel.com/api"
36
+ _FD_AK = "FhMFpcPWXMeyZxOx"
37
+ _BETMGM_ACCESS_ID = "NmFjNjYzMmQtMmZlNS00MDQ3LWIzZjctNGMxMjhmNjNmNWVm"
38
+
39
+
40
+ def _make_row(
41
+ provider_name: str,
42
+ event_id: str,
43
+ commence_time: str,
44
+ away_team: str,
45
+ home_team: str,
46
+ sportsbook: str,
47
+ sportsbook_key: str,
48
+ player_name_raw: str,
49
+ odds_american: int,
50
+ ) -> dict[str, Any]:
51
+ return {
52
+ "provider": provider_name,
53
+ "event_id": event_id,
54
+ "commence_time": commence_time,
55
+ "away_team": away_team,
56
+ "home_team": home_team,
57
+ "sportsbook": sportsbook,
58
+ "sportsbook_key": sportsbook_key,
59
+ "market_key": "batter_home_runs",
60
+ "market": "hr",
61
+ "player_name_raw": player_name_raw,
62
+ "player_name": map_odds_name_to_model_name(player_name_raw),
63
+ "odds_american": odds_american,
64
+ "line": 0.5,
65
+ }
66
+
67
+
68
+ class ScrapeFallbackProvider(MarketProviderBase):
69
+ provider_name = "scrape_fallback"
70
+
71
+ def fetch_live_prop_odds(
72
+ self,
73
+ game_context,
74
+ sportsbooks=None,
75
+ markets=None,
76
+ ) -> pd.DataFrame:
77
+ return pd.DataFrame()
78
+
79
+ def fetch_all_upcoming_hr_props(self, sportsbooks=None) -> pd.DataFrame:
80
+ frames = []
81
+ for fetch_fn in [
82
+ self._fetch_draftkings,
83
+ self._fetch_fanduel,
84
+ self._fetch_betmgm,
85
+ self._fetch_caesars,
86
+ ]:
87
+ try:
88
+ df = fetch_fn()
89
+ if not df.empty:
90
+ frames.append(df)
91
+ _log.warning(
92
+ "[scrape_fallback] %s returned %d rows",
93
+ fetch_fn.__name__,
94
+ len(df),
95
+ )
96
+ else:
97
+ _log.warning("[scrape_fallback] %s returned 0 rows", fetch_fn.__name__)
98
+ except Exception as exc:
99
+ _log.warning("[scrape_fallback] %s failed: %s", fetch_fn.__name__, exc)
100
+
101
+ result = pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()
102
+ _log.warning("[scrape_fallback] SUMMARY total_rows=%d", len(result))
103
+ return result
104
+
105
+ # ---------------------------------------------------------------------------
106
+ # DraftKings
107
+ # ---------------------------------------------------------------------------
108
+
109
+ def _fetch_draftkings(self) -> pd.DataFrame:
110
+ cat_id, sub_id = self._discover_dk_hr_category()
111
+ if not cat_id or not sub_id:
112
+ _log.warning("[dk_scrape] HR subcategory not found in categories response")
113
+ return pd.DataFrame()
114
+ url = f"{_DK_BASE}/categories/{cat_id}/subcategories/{sub_id}"
115
+ r = requests.get(url, headers=_HEADERS, timeout=20)
116
+ _log.warning("[dk_scrape] HTTP %s cat=%s sub=%s", r.status_code, cat_id, sub_id)
117
+ r.raise_for_status()
118
+ return self._parse_dk(r.json())
119
+
120
+ def _discover_dk_hr_category(self) -> tuple[int | None, int | None]:
121
+ r = requests.get(f"{_DK_BASE}/categories", headers=_HEADERS, timeout=15)
122
+ r.raise_for_status()
123
+ for cat in r.json().get("eventGroupSubcategories", []):
124
+ cat_name = str(cat.get("name", "") or "").lower()
125
+ if "batter" in cat_name or "player" in cat_name:
126
+ cat_id = cat.get("categoryId")
127
+ for sub in cat.get("subcategories", []):
128
+ sub_name = str(sub.get("name", "") or "").lower()
129
+ if "home run" in sub_name or "anytime hr" in sub_name:
130
+ return cat_id, sub.get("subcategoryId")
131
+ return None, None
132
+
133
+ def _parse_dk(self, data: dict) -> pd.DataFrame:
134
+ rows: list[dict[str, Any]] = []
135
+ event_group = data.get("eventGroup", {})
136
+ for offer_cat in event_group.get("offerCategories", []):
137
+ for sub_desc in offer_cat.get("offerSubcategoryDescriptors", []):
138
+ for offer_group in sub_desc.get("offerGroups", []):
139
+ event_id = str(offer_group.get("eventId", "") or "")
140
+ event_desc = str(offer_group.get("eventDescription", "") or "")
141
+ parts = [p.strip() for p in event_desc.replace(" vs ", " @ ").split(" @ ")]
142
+ away_team = parts[0] if len(parts) >= 2 else ""
143
+ home_team = parts[1] if len(parts) >= 2 else ""
144
+ commence_time = str(offer_group.get("startDate", "") or "")
145
+ for offer_list in offer_group.get("offers", []):
146
+ for offer in (offer_list if isinstance(offer_list, list) else [offer_list]):
147
+ player_name_raw = str(offer.get("label", "") or "").strip()
148
+ if not player_name_raw:
149
+ continue
150
+ for outcome in offer.get("outcomes", []):
151
+ price_str = str(outcome.get("oddsAmerican", "") or "").replace("+", "")
152
+ if not price_str:
153
+ continue
154
+ try:
155
+ price = int(price_str)
156
+ except ValueError:
157
+ continue
158
+ rows.append(_make_row(
159
+ self.provider_name, event_id, commence_time,
160
+ away_team, home_team,
161
+ "DraftKings", "draftkings",
162
+ player_name_raw, price,
163
+ ))
164
+ return pd.DataFrame(rows)
165
+
166
+ # ---------------------------------------------------------------------------
167
+ # FanDuel
168
+ # ---------------------------------------------------------------------------
169
+
170
+ def _fetch_fanduel(self) -> pd.DataFrame:
171
+ url = (
172
+ f"{_FD_BASE}/content-managed-page"
173
+ f"?page=SPORT_LEAGUE&countryCode=US&regionCode=IL"
174
+ f"&channel=BASEBALL&lang=en-US&_ak={_FD_AK}"
175
+ )
176
+ r = requests.get(url, headers=_HEADERS, timeout=20)
177
+ _log.warning("[fd_scrape] HTTP %s", r.status_code)
178
+ r.raise_for_status()
179
+ return self._parse_fd(r.json())
180
+
181
+ def _parse_fd(self, data: dict) -> pd.DataFrame:
182
+ rows: list[dict[str, Any]] = []
183
+ attachments = data.get("attachments", {})
184
+ events = attachments.get("events", {})
185
+ markets = attachments.get("markets", {})
186
+ for _market_id, market in markets.items():
187
+ market_type = str(market.get("marketType", "") or "").upper()
188
+ if "HOME_RUN" not in market_type and "HR" not in market_type:
189
+ continue
190
+ event_id = str(market.get("eventId", "") or "")
191
+ event = events.get(str(event_id), {})
192
+ away_team = str(
193
+ event.get("awayTeam", {}).get("name", "")
194
+ or event.get("awayTeamName", "")
195
+ or ""
196
+ )
197
+ home_team = str(
198
+ event.get("homeTeam", {}).get("name", "")
199
+ or event.get("homeTeamName", "")
200
+ or ""
201
+ )
202
+ commence_time = str(event.get("openDate", "") or "")
203
+ for runner in market.get("runners", []):
204
+ player_name_raw = str(runner.get("runnerName", "") or "").strip()
205
+ if not player_name_raw:
206
+ continue
207
+ win_odds = (
208
+ runner.get("winRunnerOdds", {})
209
+ .get("americanDisplayOdds", {})
210
+ .get("americanOdds", "")
211
+ )
212
+ if not win_odds:
213
+ continue
214
+ try:
215
+ price = int(str(win_odds).replace("+", ""))
216
+ except ValueError:
217
+ continue
218
+ rows.append(_make_row(
219
+ self.provider_name, event_id, commence_time,
220
+ away_team, home_team,
221
+ "FanDuel", "fanduel",
222
+ player_name_raw, price,
223
+ ))
224
+ return pd.DataFrame(rows)
225
+
226
+ # ---------------------------------------------------------------------------
227
+ # BetMGM
228
+ # ---------------------------------------------------------------------------
229
+
230
+ def _fetch_betmgm(self) -> pd.DataFrame:
231
+ url = (
232
+ "https://sports.nj.betmgm.com/en/sports/api/v2/leagues/baseball-mlb/events"
233
+ f"?lang=en-us&x-bwin-accessid={_BETMGM_ACCESS_ID}"
234
+ )
235
+ r = requests.get(url, headers=_HEADERS, timeout=20)
236
+ _log.warning("[betmgm_scrape] HTTP %s", r.status_code)
237
+ r.raise_for_status()
238
+ return self._parse_betmgm(r.json())
239
+
240
+ def _parse_betmgm(self, data: dict | list) -> pd.DataFrame:
241
+ rows: list[dict[str, Any]] = []
242
+ events = (
243
+ data
244
+ if isinstance(data, list)
245
+ else data.get("result", {}).get("dataList", data.get("events", []))
246
+ )
247
+ for event in events:
248
+ event_id = str(event.get("id", "") or "")
249
+ name_obj = event.get("name", {})
250
+ name = str(name_obj.get("value", "") if isinstance(name_obj, dict) else name_obj or "")
251
+ parts = [p.strip() for p in name.replace(" vs ", " @ ").split(" @ ")]
252
+ away_team = parts[0] if len(parts) >= 2 else ""
253
+ home_team = parts[1] if len(parts) >= 2 else ""
254
+ commence_time = str(event.get("startDate", "") or "")
255
+ for fixture in event.get("markets", []):
256
+ mkt_name_obj = fixture.get("name", {})
257
+ mkt_name = str(
258
+ mkt_name_obj.get("value", "") if isinstance(mkt_name_obj, dict) else mkt_name_obj or ""
259
+ ).lower()
260
+ if "home run" not in mkt_name and "hr" not in mkt_name:
261
+ continue
262
+ for selection in fixture.get("selections", []):
263
+ sel_name_obj = selection.get("name", {})
264
+ player_name_raw = str(
265
+ sel_name_obj.get("value", "") if isinstance(sel_name_obj, dict) else sel_name_obj or ""
266
+ ).strip()
267
+ if not player_name_raw:
268
+ continue
269
+ price_val = selection.get("price", {}).get("americanOdds")
270
+ if price_val is None:
271
+ continue
272
+ try:
273
+ price = int(price_val)
274
+ except (TypeError, ValueError):
275
+ continue
276
+ rows.append(_make_row(
277
+ self.provider_name, event_id, commence_time,
278
+ away_team, home_team,
279
+ "BetMGM", "betmgm",
280
+ player_name_raw, price,
281
+ ))
282
+ return pd.DataFrame(rows)
283
+
284
+ # ---------------------------------------------------------------------------
285
+ # Caesars
286
+ # ---------------------------------------------------------------------------
287
+
288
+ def _fetch_caesars(self) -> pd.DataFrame:
289
+ url = (
290
+ "https://api.levelmgr.caesarssportsbook.com/api/v1"
291
+ "/leagues/baseball-mlb/player-props/home-run"
292
+ )
293
+ r = requests.get(url, headers=_HEADERS, timeout=20)
294
+ _log.warning("[caesars_scrape] HTTP %s", r.status_code)
295
+ r.raise_for_status()
296
+ return self._parse_caesars(r.json())
297
+
298
+ def _parse_caesars(self, data: dict | list) -> pd.DataFrame:
299
+ rows: list[dict[str, Any]] = []
300
+ items = (
301
+ data
302
+ if isinstance(data, list)
303
+ else data.get("data", data.get("events", data.get("items", [])))
304
+ )
305
+ for item in items:
306
+ event_id = str(item.get("eventId", item.get("id", "")) or "")
307
+ away_team = str(item.get("awayTeamName", item.get("away_team", "")) or "")
308
+ home_team = str(item.get("homeTeamName", item.get("home_team", "")) or "")
309
+ commence_time = str(item.get("eventDate", item.get("startTime", "")) or "")
310
+ selections = item.get("participants", item.get("props", item.get("selections", [])))
311
+ for prop in selections:
312
+ player_name_raw = str(
313
+ prop.get("name", prop.get("participantName", prop.get("playerName", ""))) or ""
314
+ ).strip()
315
+ if not player_name_raw:
316
+ continue
317
+ price_val = (
318
+ prop.get("odds", {}).get("american")
319
+ if isinstance(prop.get("odds"), dict)
320
+ else prop.get("americanOdds", prop.get("price"))
321
+ )
322
+ if price_val is None:
323
+ continue
324
+ try:
325
+ price = int(str(price_val).replace("+", ""))
326
+ except (TypeError, ValueError):
327
+ continue
328
+ rows.append(_make_row(
329
+ self.provider_name, event_id, commence_time,
330
+ away_team, home_team,
331
+ "Caesars", "williamhill_us",
332
+ player_name_raw, price,
333
+ ))
334
+ return pd.DataFrame(rows)
data/provider_theoddsapi.py CHANGED
@@ -309,6 +309,7 @@ class TheOddsAPIProvider(MarketProviderBase):
309
  odds_params = {
310
  "apiKey": ODDS_API_KEY,
311
  "regions": "us",
 
312
  "oddsFormat": "american",
313
  "dateFormat": "iso",
314
  }
@@ -367,17 +368,6 @@ class TheOddsAPIProvider(MarketProviderBase):
367
  away_team, home_team, _data_type, _top_keys, _bk_keys,
368
  _markets_by_book, _outcome_counts,
369
  )
370
- if _events_attempted == 1:
371
- _all_markets = sorted({
372
- m.get("key") for b in bookmakers for m in b.get("markets", [])
373
- })
374
- _diag_log.warning(
375
- "[upcoming_hr_props] DIAGNOSTIC first_event=%s@%s books_returned=%s markets_available=%s",
376
- away_team, home_team,
377
- [b.get("key") for b in bookmakers],
378
- _all_markets,
379
- )
380
-
381
  _bookmakers_seen = 0
382
  _markets_seen = 0
383
  _outcomes_seen = 0
 
309
  odds_params = {
310
  "apiKey": ODDS_API_KEY,
311
  "regions": "us",
312
+ "markets": "batter_home_runs",
313
  "oddsFormat": "american",
314
  "dateFormat": "iso",
315
  }
 
368
  away_team, home_team, _data_type, _top_keys, _bk_keys,
369
  _markets_by_book, _outcome_counts,
370
  )
 
 
 
 
 
 
 
 
 
 
 
371
  _bookmakers_seen = 0
372
  _markets_seen = 0
373
  _outcomes_seen = 0