Syntrex commited on
Commit
0dfe795
·
1 Parent(s): 51b1a14

Remove Fangraphs starter fallback in favor of MLB and Odds API

Browse files
data/mlb_starters.py CHANGED
@@ -32,6 +32,21 @@ def _normalize_team(name: str) -> str:
32
  return text
33
 
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  def fetch_probable_starters_for_props() -> dict[tuple[str, str], dict[str, str | None]]:
36
  """
37
  Fetch probable starters for all MLB games in the next 7 days.
@@ -110,55 +125,199 @@ def fetch_probable_starters_for_props() -> dict[tuple[str, str], dict[str, str |
110
  return result
111
 
112
 
113
- def _first_rotation_pitcher(team_payload: dict[str, Any] | None) -> str | None:
114
- payload = dict(team_payload or {})
115
- rotation = payload.get("rotation") or []
116
- if not rotation:
117
- return None
118
- first = rotation[0]
119
- if isinstance(first, dict):
120
- pitcher_name = str(first.get("pitcher_name") or first.get("player_name") or "").strip()
121
- return pitcher_name or None
122
- pitcher_name = str(first or "").strip()
123
- return pitcher_name or None
124
-
 
 
 
 
 
 
 
 
125
 
126
- def build_fangraphs_starter_fallback_map(
127
- matchups: list[tuple[str, str]] | tuple[tuple[str, str], ...],
128
- projected_lineups: dict[str, dict[str, Any]] | None,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  ) -> dict[tuple[str, str], dict[str, str | None]]:
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  results: dict[tuple[str, str], dict[str, str | None]] = {}
131
- for away_team, home_team in list(matchups or []):
132
- away_raw = str(away_team or "").strip()
133
- home_raw = str(home_team or "").strip()
134
- away_norm = _normalize_team(away_raw)
135
- home_norm = _normalize_team(home_raw)
 
 
 
 
 
136
  if not away_norm or not home_norm:
137
  continue
138
 
139
- away_payload = (projected_lineups or {}).get(away_norm) or {}
140
- home_payload = (projected_lineups or {}).get(home_norm) or {}
141
- away_pitcher = _first_rotation_pitcher(away_payload)
142
- home_pitcher = _first_rotation_pitcher(home_payload)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  results[(away_norm, home_norm)] = {
144
- "away_team_raw": away_raw,
145
- "home_team_raw": home_raw,
146
  "away_pitcher": away_pitcher,
147
  "home_pitcher": home_pitcher,
148
- "away_pitcher_source": "fangraphs_rotation_fallback" if away_pitcher else "unresolved",
149
- "home_pitcher_source": "fangraphs_rotation_fallback" if home_pitcher else "unresolved",
150
- "starter_cache_source": "fangraphs_rotation_fallback",
151
- "fallback_used": bool(away_pitcher or home_pitcher),
 
 
152
  }
153
  return results
154
 
155
 
156
- def merge_probable_starters_with_fallback(
157
  primary_starters: dict[tuple[str, str], dict[str, Any]] | None,
158
- fallback_starters: dict[tuple[str, str], dict[str, Any]] | None,
159
  ) -> dict[tuple[str, str], dict[str, str | None]]:
160
  primary = dict(primary_starters or {})
161
- fallback = dict(fallback_starters or {})
162
  merged: dict[tuple[str, str], dict[str, str | None]] = {}
163
  for key in sorted(set(primary.keys()) | set(fallback.keys())):
164
  primary_payload = dict(primary.get(key) or {})
@@ -175,13 +334,16 @@ def merge_probable_starters_with_fallback(
175
  or str(fallback_payload.get("home_pitcher_source") or "").strip()
176
  or "unresolved"
177
  )
178
- fallback_used = away_source.startswith("fangraphs_") or home_source.startswith("fangraphs_")
179
  if away_source.startswith("statsapi_") or home_source.startswith("statsapi_"):
180
- starter_cache_source = "statsapi_probable_pitcher" if not fallback_used else "statsapi_plus_fangraphs_fallback"
181
  elif fallback_used:
182
- starter_cache_source = "fangraphs_rotation_fallback"
 
 
 
183
  else:
184
- starter_cache_source = "unresolved"
185
  merged[key] = {
186
  "away_team_raw": str(primary_payload.get("away_team_raw") or fallback_payload.get("away_team_raw") or "").strip(),
187
  "home_team_raw": str(primary_payload.get("home_team_raw") or fallback_payload.get("home_team_raw") or "").strip(),
 
32
  return text
33
 
34
 
35
+ def _normalize_person(name: str) -> str:
36
+ text = str(name or "").strip().lower()
37
+ text = unicodedata.normalize("NFKD", text)
38
+ text = "".join(ch for ch in text if not unicodedata.combining(ch))
39
+ text = re.sub(r"[^a-z0-9 ]", "", text)
40
+ text = re.sub(r"\s+", " ", text).strip()
41
+ return text
42
+
43
+
44
+ def _names_match(left: str, right: str) -> bool:
45
+ left_norm = _normalize_person(left)
46
+ right_norm = _normalize_person(right)
47
+ return bool(left_norm and right_norm and left_norm == right_norm)
48
+
49
+
50
  def fetch_probable_starters_for_props() -> dict[tuple[str, str], dict[str, str | None]]:
51
  """
52
  Fetch probable starters for all MLB games in the next 7 days.
 
125
  return result
126
 
127
 
128
+ def _infer_pitcher_team_from_rows(
129
+ pitcher_name: str,
130
+ pitcher_statcast_df: pd.DataFrame | None,
131
+ away_team: str,
132
+ home_team: str,
133
+ ) -> str:
134
+ if (
135
+ not pitcher_name
136
+ or pitcher_statcast_df is None
137
+ or pitcher_statcast_df.empty
138
+ or "player_name" not in pitcher_statcast_df.columns
139
+ ):
140
+ return ""
141
+
142
+ target = _normalize_person(pitcher_name)
143
+ rows = pitcher_statcast_df[
144
+ pitcher_statcast_df["player_name"].astype(str).map(_normalize_person) == target
145
+ ].copy()
146
+ if rows.empty:
147
+ return ""
148
 
149
+ away_norm = _normalize_team(away_team)
150
+ home_norm = _normalize_team(home_team)
151
+ team_candidates: list[str] = []
152
+
153
+ if {"inning_topbot", "home_team", "away_team"}.issubset(rows.columns):
154
+ inning_half = rows["inning_topbot"].fillna("").astype(str).str.lower()
155
+ top_mask = inning_half.str.contains("top")
156
+ bottom_mask = inning_half.str.contains("bot|bottom")
157
+ if top_mask.any():
158
+ team_candidates.extend(
159
+ rows.loc[top_mask, "home_team"].dropna().astype(str).tolist()
160
+ )
161
+ if bottom_mask.any():
162
+ team_candidates.extend(
163
+ rows.loc[bottom_mask, "away_team"].dropna().astype(str).tolist()
164
+ )
165
+
166
+ for col in ("team", "pitcher_team", "team_name"):
167
+ if col in rows.columns:
168
+ team_candidates.extend(rows[col].dropna().astype(str).tolist())
169
+
170
+ normalized = [_normalize_team(value) for value in team_candidates if str(value).strip()]
171
+ if not normalized:
172
+ return ""
173
+ mode = pd.Series(normalized).mode()
174
+ inferred = str(mode.iloc[0]).strip() if not mode.empty else ""
175
+ if inferred == away_norm:
176
+ return away_team
177
+ if inferred == home_norm:
178
+ return home_team
179
+ return ""
180
+
181
+
182
+ def build_oddsapi_starter_fallback_map(
183
+ props_feed: pd.DataFrame | None,
184
+ primary_starters: dict[tuple[str, str], dict[str, Any]] | None = None,
185
+ pitcher_statcast_df: pd.DataFrame | None = None,
186
  ) -> dict[tuple[str, str], dict[str, str | None]]:
187
+ if props_feed is None or props_feed.empty:
188
+ return {}
189
+
190
+ working = props_feed.copy()
191
+ market_series = working.get("market_family", working.get("market", pd.Series(dtype="object", index=working.index)))
192
+ scope_series = working.get("selection_scope", pd.Series(dtype="object", index=working.index))
193
+ working = working[
194
+ market_series.fillna("").astype(str).str.strip().str.lower().eq("k")
195
+ & scope_series.fillna("").astype(str).str.strip().str.lower().eq("pitcher")
196
+ ].copy()
197
+ if working.empty:
198
+ return {}
199
+
200
  results: dict[tuple[str, str], dict[str, str | None]] = {}
201
+ primary = dict(primary_starters or {})
202
+ group_cols = [col for col in ("event_id", "away_team", "home_team") if col in working.columns]
203
+ if len(group_cols) < 3:
204
+ return {}
205
+
206
+ for _, event_df in working.groupby(group_cols, dropna=False):
207
+ away_team = str(event_df["away_team"].iloc[0] or "").strip()
208
+ home_team = str(event_df["home_team"].iloc[0] or "").strip()
209
+ away_norm = _normalize_team(away_team)
210
+ home_norm = _normalize_team(home_team)
211
  if not away_norm or not home_norm:
212
  continue
213
 
214
+ primary_payload = dict(primary.get((away_norm, home_norm)) or {})
215
+ away_pitcher = str(primary_payload.get("away_pitcher") or "").strip() or None
216
+ home_pitcher = str(primary_payload.get("home_pitcher") or "").strip() or None
217
+ away_source = str(primary_payload.get("away_pitcher_source") or "").strip() or "unresolved"
218
+ home_source = str(primary_payload.get("home_pitcher_source") or "").strip() or "unresolved"
219
+
220
+ candidate_names = [
221
+ str(name).strip()
222
+ for name in event_df.get("player_name_raw", pd.Series(dtype="object")).dropna().astype(str).tolist()
223
+ if str(name).strip()
224
+ ]
225
+ unique_candidates: list[str] = []
226
+ for candidate in candidate_names:
227
+ if not any(_names_match(candidate, existing) for existing in unique_candidates):
228
+ unique_candidates.append(candidate)
229
+
230
+ candidate_team_map: dict[str, str] = {}
231
+ for candidate in unique_candidates:
232
+ if away_pitcher and _names_match(candidate, away_pitcher):
233
+ candidate_team_map[candidate] = away_team
234
+ continue
235
+ if home_pitcher and _names_match(candidate, home_pitcher):
236
+ candidate_team_map[candidate] = home_team
237
+ continue
238
+ inferred_team = _infer_pitcher_team_from_rows(
239
+ pitcher_name=candidate,
240
+ pitcher_statcast_df=pitcher_statcast_df,
241
+ away_team=away_team,
242
+ home_team=home_team,
243
+ )
244
+ if inferred_team:
245
+ candidate_team_map[candidate] = inferred_team
246
+
247
+ blank_sides = int(not away_pitcher) + int(not home_pitcher)
248
+ assigned_from_odds = 0
249
+
250
+ if not away_pitcher:
251
+ for candidate, team_name in candidate_team_map.items():
252
+ if _normalize_team(team_name) == away_norm:
253
+ away_pitcher = candidate
254
+ assigned_from_odds += 1
255
+ break
256
+ if not home_pitcher:
257
+ for candidate, team_name in candidate_team_map.items():
258
+ if _normalize_team(team_name) == home_norm and not _names_match(candidate, away_pitcher or ""):
259
+ home_pitcher = candidate
260
+ assigned_from_odds += 1
261
+ break
262
+
263
+ unresolved_candidates = [
264
+ candidate
265
+ for candidate in unique_candidates
266
+ if not _names_match(candidate, away_pitcher or "") and not _names_match(candidate, home_pitcher or "")
267
+ ]
268
+ if len(unresolved_candidates) == 1:
269
+ if not away_pitcher and home_pitcher:
270
+ away_pitcher = unresolved_candidates[0]
271
+ assigned_from_odds += 1
272
+ elif not home_pitcher and away_pitcher:
273
+ home_pitcher = unresolved_candidates[0]
274
+ assigned_from_odds += 1
275
+
276
+ odds_source = "unresolved"
277
+ if assigned_from_odds >= 2 or (blank_sides >= 2 and away_pitcher and home_pitcher):
278
+ odds_source = "oddsapi_pitcher_strikeouts_two_candidate_match"
279
+ elif assigned_from_odds == 1:
280
+ odds_source = "oddsapi_pitcher_strikeouts_single_candidate_match"
281
+ elif len(unique_candidates) > 2 or (len(unique_candidates) >= 2 and not away_pitcher and not home_pitcher):
282
+ odds_source = "oddsapi_pitcher_strikeouts_ambiguous"
283
+
284
+ if away_source == "unresolved" and away_pitcher:
285
+ away_source = odds_source if odds_source != "unresolved" else "oddsapi_pitcher_strikeouts_single_candidate_match"
286
+ if home_source == "unresolved" and home_pitcher:
287
+ home_source = odds_source if odds_source != "unresolved" else "oddsapi_pitcher_strikeouts_single_candidate_match"
288
+
289
+ if away_source.startswith("statsapi_") or home_source.startswith("statsapi_"):
290
+ starter_cache_source = (
291
+ "statsapi_plus_oddsapi_fallback"
292
+ if (away_source.startswith("oddsapi_") or home_source.startswith("oddsapi_"))
293
+ else "statsapi_probable_pitcher"
294
+ )
295
+ elif away_source.startswith("oddsapi_") or home_source.startswith("oddsapi_"):
296
+ starter_cache_source = odds_source if odds_source != "unresolved" else "oddsapi_pitcher_strikeouts_single_candidate_match"
297
+ else:
298
+ starter_cache_source = odds_source
299
+
300
  results[(away_norm, home_norm)] = {
301
+ "away_team_raw": away_team,
302
+ "home_team_raw": home_team,
303
  "away_pitcher": away_pitcher,
304
  "home_pitcher": home_pitcher,
305
+ "away_pitcher_source": away_source if away_pitcher else "unresolved",
306
+ "home_pitcher_source": home_source if home_pitcher else "unresolved",
307
+ "starter_cache_source": starter_cache_source if (away_pitcher or home_pitcher or odds_source != "unresolved") else "unresolved",
308
+ "fallback_used": bool(
309
+ str(away_source).startswith("oddsapi_") or str(home_source).startswith("oddsapi_")
310
+ ),
311
  }
312
  return results
313
 
314
 
315
+ def merge_probable_starters_with_odds_fallback(
316
  primary_starters: dict[tuple[str, str], dict[str, Any]] | None,
317
+ odds_fallback_starters: dict[tuple[str, str], dict[str, Any]] | None,
318
  ) -> dict[tuple[str, str], dict[str, str | None]]:
319
  primary = dict(primary_starters or {})
320
+ fallback = dict(odds_fallback_starters or {})
321
  merged: dict[tuple[str, str], dict[str, str | None]] = {}
322
  for key in sorted(set(primary.keys()) | set(fallback.keys())):
323
  primary_payload = dict(primary.get(key) or {})
 
334
  or str(fallback_payload.get("home_pitcher_source") or "").strip()
335
  or "unresolved"
336
  )
337
+ fallback_used = away_source.startswith("oddsapi_") or home_source.startswith("oddsapi_")
338
  if away_source.startswith("statsapi_") or home_source.startswith("statsapi_"):
339
+ starter_cache_source = "statsapi_probable_pitcher" if not fallback_used else "statsapi_plus_oddsapi_fallback"
340
  elif fallback_used:
341
+ starter_cache_source = (
342
+ str(fallback_payload.get("starter_cache_source") or "").strip()
343
+ or "oddsapi_pitcher_strikeouts_single_candidate_match"
344
+ )
345
  else:
346
+ starter_cache_source = str(fallback_payload.get("starter_cache_source") or "").strip() or "unresolved"
347
  merged[key] = {
348
  "away_team_raw": str(primary_payload.get("away_team_raw") or fallback_payload.get("away_team_raw") or "").strip(),
349
  "home_team_raw": str(primary_payload.get("home_team_raw") or fallback_payload.get("home_team_raw") or "").strip(),
database/db.py CHANGED
@@ -631,24 +631,6 @@ def initialize_schema(conn) -> None:
631
  """
632
  ))
633
 
634
- conn.execute(text(
635
- """
636
- CREATE TABLE IF NOT EXISTS cached_fangraphs_starters (
637
- fetched_at TEXT,
638
- away_team_norm TEXT,
639
- home_team_norm TEXT,
640
- away_team_raw TEXT,
641
- home_team_raw TEXT,
642
- away_pitcher TEXT,
643
- home_pitcher TEXT,
644
- away_pitcher_source TEXT,
645
- home_pitcher_source TEXT,
646
- starter_cache_source TEXT,
647
- fallback_used BOOLEAN
648
- )
649
- """
650
- ))
651
-
652
  conn.execute(text(
653
  """
654
  CREATE TABLE IF NOT EXISTS cached_projected_lineups (
@@ -867,10 +849,6 @@ def initialize_schema(conn) -> None:
867
  "CREATE INDEX IF NOT EXISTS idx_cached_probable_starters_matchup "
868
  "ON cached_probable_starters (away_team_norm, home_team_norm)"
869
  ))
870
- conn.execute(text(
871
- "CREATE INDEX IF NOT EXISTS idx_cached_fangraphs_starters_matchup "
872
- "ON cached_fangraphs_starters (away_team_norm, home_team_norm)"
873
- ))
874
  conn.execute(text(
875
  "CREATE INDEX IF NOT EXISTS idx_cached_projected_lineups_team "
876
  "ON cached_projected_lineups (team_name_norm)"
@@ -1151,97 +1129,6 @@ def read_cached_probable_starters_meta(conn) -> pd.DataFrame:
1151
  )
1152
 
1153
 
1154
- def replace_cached_fangraphs_starters(
1155
- conn,
1156
- starters_map: Mapping[tuple[str, str], Mapping[str, Any]] | None,
1157
- ) -> None:
1158
- rows: list[dict[str, Any]] = []
1159
- fetched_at = utc_now_iso()
1160
- for key, payload in (starters_map or {}).items():
1161
- if not isinstance(key, tuple) or len(key) != 2:
1162
- continue
1163
- away_norm, home_norm = key
1164
- payload = dict(payload or {})
1165
- rows.append(
1166
- {
1167
- "fetched_at": fetched_at,
1168
- "away_team_norm": str(away_norm or "").strip(),
1169
- "home_team_norm": str(home_norm or "").strip(),
1170
- "away_team_raw": str(payload.get("away_team_raw") or "").strip(),
1171
- "home_team_raw": str(payload.get("home_team_raw") or "").strip(),
1172
- "away_pitcher": str(payload.get("away_pitcher") or "").strip() or None,
1173
- "home_pitcher": str(payload.get("home_pitcher") or "").strip() or None,
1174
- "away_pitcher_source": str(payload.get("away_pitcher_source") or "").strip() or None,
1175
- "home_pitcher_source": str(payload.get("home_pitcher_source") or "").strip() or None,
1176
- "starter_cache_source": str(payload.get("starter_cache_source") or "").strip() or None,
1177
- "fallback_used": bool(payload.get("fallback_used")),
1178
- }
1179
- )
1180
- replace_table_contents(conn, "cached_fangraphs_starters", pd.DataFrame(rows))
1181
-
1182
-
1183
- def read_cached_fangraphs_starters(
1184
- conn,
1185
- matchups_norm: Iterable[tuple[str, str]] | None = None,
1186
- ) -> dict[tuple[str, str], dict[str, str | None]]:
1187
- matchups = [
1188
- (str(away or "").strip(), str(home or "").strip())
1189
- for away, home in (matchups_norm or [])
1190
- if str(away or "").strip() and str(home or "").strip()
1191
- ]
1192
- if matchups:
1193
- clauses = []
1194
- params: dict[str, Any] = {}
1195
- for idx, (away, home) in enumerate(matchups):
1196
- away_key = f"away_{idx}"
1197
- home_key = f"home_{idx}"
1198
- clauses.append(f"(away_team_norm = :{away_key} AND home_team_norm = :{home_key})")
1199
- params[away_key] = away
1200
- params[home_key] = home
1201
- df = pd.read_sql(
1202
- text(f"SELECT * FROM cached_fangraphs_starters WHERE {' OR '.join(clauses)}"),
1203
- conn,
1204
- params=params,
1205
- )
1206
- else:
1207
- df = pd.read_sql(text("SELECT * FROM cached_fangraphs_starters"), conn)
1208
- if df.empty:
1209
- return {}
1210
- out: dict[tuple[str, str], dict[str, str | None]] = {}
1211
- for _, row in df.iterrows():
1212
- key = (
1213
- str(row.get("away_team_norm") or "").strip(),
1214
- str(row.get("home_team_norm") or "").strip(),
1215
- )
1216
- if not key[0] or not key[1]:
1217
- continue
1218
- out[key] = {
1219
- "away_team_raw": str(row.get("away_team_raw") or "").strip(),
1220
- "home_team_raw": str(row.get("home_team_raw") or "").strip(),
1221
- "away_pitcher": str(row.get("away_pitcher") or "").strip() or None,
1222
- "home_pitcher": str(row.get("home_pitcher") or "").strip() or None,
1223
- "away_pitcher_source": str(row.get("away_pitcher_source") or "").strip() or None,
1224
- "home_pitcher_source": str(row.get("home_pitcher_source") or "").strip() or None,
1225
- "starter_cache_source": str(row.get("starter_cache_source") or "").strip() or None,
1226
- "fallback_used": bool(row.get("fallback_used")),
1227
- }
1228
- return out
1229
-
1230
-
1231
- def read_cached_fangraphs_starters_meta(conn) -> pd.DataFrame:
1232
- return pd.read_sql(
1233
- text(
1234
- """
1235
- SELECT fetched_at, COUNT(*) AS matchup_count
1236
- FROM cached_fangraphs_starters
1237
- GROUP BY fetched_at
1238
- ORDER BY fetched_at DESC
1239
- """
1240
- ),
1241
- conn,
1242
- )
1243
-
1244
-
1245
  def replace_cached_projected_lineups(
1246
  conn,
1247
  projected_lineups: Mapping[str, Mapping[str, Any]] | None,
 
631
  """
632
  ))
633
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
634
  conn.execute(text(
635
  """
636
  CREATE TABLE IF NOT EXISTS cached_projected_lineups (
 
849
  "CREATE INDEX IF NOT EXISTS idx_cached_probable_starters_matchup "
850
  "ON cached_probable_starters (away_team_norm, home_team_norm)"
851
  ))
 
 
 
 
852
  conn.execute(text(
853
  "CREATE INDEX IF NOT EXISTS idx_cached_projected_lineups_team "
854
  "ON cached_projected_lineups (team_name_norm)"
 
1129
  )
1130
 
1131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1132
  def replace_cached_projected_lineups(
1133
  conn,
1134
  projected_lineups: Mapping[str, Mapping[str, Any]] | None,
tests/test_mlb_starters.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import sys
5
+ import unittest
6
+
7
+ import pandas as pd
8
+
9
+ sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
10
+
11
+ from data.mlb_starters import (
12
+ build_oddsapi_starter_fallback_map,
13
+ merge_probable_starters_with_odds_fallback,
14
+ )
15
+
16
+
17
+ class TestMlbStarters(unittest.TestCase):
18
+ def test_oddsapi_fallback_can_fill_tba_matchup_from_pitcher_team_inference(self) -> None:
19
+ props_feed = pd.DataFrame(
20
+ [
21
+ {
22
+ "event_id": "event-1",
23
+ "away_team": "New York Yankees",
24
+ "home_team": "San Francisco Giants",
25
+ "market_family": "k",
26
+ "selection_scope": "pitcher",
27
+ "player_name_raw": "Max Fried",
28
+ },
29
+ {
30
+ "event_id": "event-1",
31
+ "away_team": "New York Yankees",
32
+ "home_team": "San Francisco Giants",
33
+ "market_family": "k",
34
+ "selection_scope": "pitcher",
35
+ "player_name_raw": "Logan Webb",
36
+ },
37
+ ]
38
+ )
39
+ pitcher_statcast_df = pd.DataFrame(
40
+ [
41
+ {
42
+ "player_name": "Max Fried",
43
+ "inning_topbot": "Bottom",
44
+ "away_team": "New York Yankees",
45
+ "home_team": "San Francisco Giants",
46
+ },
47
+ {
48
+ "player_name": "Logan Webb",
49
+ "inning_topbot": "Top",
50
+ "away_team": "New York Yankees",
51
+ "home_team": "San Francisco Giants",
52
+ },
53
+ ]
54
+ )
55
+ primary = {
56
+ ("new york yankees", "san francisco giants"): {
57
+ "away_team_raw": "New York Yankees",
58
+ "home_team_raw": "San Francisco Giants",
59
+ "away_pitcher": None,
60
+ "home_pitcher": None,
61
+ "away_pitcher_source": "unresolved",
62
+ "home_pitcher_source": "unresolved",
63
+ "starter_cache_source": "statsapi_probable_pitcher",
64
+ "fallback_used": False,
65
+ }
66
+ }
67
+
68
+ fallback = build_oddsapi_starter_fallback_map(
69
+ props_feed=props_feed,
70
+ primary_starters=primary,
71
+ pitcher_statcast_df=pitcher_statcast_df,
72
+ )
73
+ merged = merge_probable_starters_with_odds_fallback(primary, fallback)
74
+ row = merged[("new york yankees", "san francisco giants")]
75
+
76
+ self.assertEqual(row["away_pitcher"], "Max Fried")
77
+ self.assertEqual(row["home_pitcher"], "Logan Webb")
78
+ self.assertEqual(row["away_pitcher_source"], "oddsapi_pitcher_strikeouts_two_candidate_match")
79
+ self.assertEqual(row["home_pitcher_source"], "oddsapi_pitcher_strikeouts_two_candidate_match")
80
+ self.assertEqual(row["starter_cache_source"], "statsapi_plus_oddsapi_fallback")
81
+ self.assertTrue(bool(row["fallback_used"]))
82
+
83
+ def test_oddsapi_fallback_does_not_override_existing_mlb_side(self) -> None:
84
+ props_feed = pd.DataFrame(
85
+ [
86
+ {
87
+ "event_id": "event-2",
88
+ "away_team": "Cleveland Guardians",
89
+ "home_team": "Detroit Tigers",
90
+ "market_family": "k",
91
+ "selection_scope": "pitcher",
92
+ "player_name_raw": "Tanner Bibee",
93
+ },
94
+ {
95
+ "event_id": "event-2",
96
+ "away_team": "Cleveland Guardians",
97
+ "home_team": "Detroit Tigers",
98
+ "market_family": "k",
99
+ "selection_scope": "pitcher",
100
+ "player_name_raw": "Tarik Skubal",
101
+ },
102
+ ]
103
+ )
104
+ primary = {
105
+ ("cleveland guardians", "detroit tigers"): {
106
+ "away_team_raw": "Cleveland Guardians",
107
+ "home_team_raw": "Detroit Tigers",
108
+ "away_pitcher": "Tanner Bibee",
109
+ "home_pitcher": None,
110
+ "away_pitcher_source": "statsapi_probable_pitcher",
111
+ "home_pitcher_source": "unresolved",
112
+ "starter_cache_source": "statsapi_probable_pitcher",
113
+ "fallback_used": False,
114
+ }
115
+ }
116
+
117
+ fallback = build_oddsapi_starter_fallback_map(
118
+ props_feed=props_feed,
119
+ primary_starters=primary,
120
+ pitcher_statcast_df=None,
121
+ )
122
+ merged = merge_probable_starters_with_odds_fallback(primary, fallback)
123
+ row = merged[("cleveland guardians", "detroit tigers")]
124
+
125
+ self.assertEqual(row["away_pitcher"], "Tanner Bibee")
126
+ self.assertEqual(row["away_pitcher_source"], "statsapi_probable_pitcher")
127
+ self.assertEqual(row["home_pitcher"], "Tarik Skubal")
128
+ self.assertEqual(row["home_pitcher_source"], "oddsapi_pitcher_strikeouts_single_candidate_match")
129
+ self.assertEqual(row["starter_cache_source"], "statsapi_plus_oddsapi_fallback")
130
+
131
+
132
+ if __name__ == "__main__":
133
+ unittest.main()
tests/test_props_mapper.py CHANGED
@@ -209,9 +209,9 @@ class TestPropsMapper(unittest.TestCase):
209
  ("away team", "home team"): {
210
  "home_pitcher": "Home Starter",
211
  "away_pitcher": "Away Starter",
212
- "home_pitcher_source": "fangraphs_rotation_fallback",
213
  "away_pitcher_source": "statsapi_probable_pitcher",
214
- "starter_cache_source": "statsapi_plus_fangraphs_fallback",
215
  "fallback_used": True,
216
  }
217
  }
@@ -280,9 +280,9 @@ class TestPropsMapper(unittest.TestCase):
280
  self.assertEqual(result.iloc[0]["projected_home_pitcher"], "Home Starter")
281
  self.assertEqual(result.iloc[0]["projected_away_pitcher"], "Away Starter")
282
  self.assertTrue(bool(result.iloc[0]["projected_starter_available"]))
283
- self.assertEqual(result.iloc[0]["projected_home_pitcher_source"], "fangraphs_rotation_fallback")
284
  self.assertEqual(result.iloc[0]["projected_away_pitcher_source"], "statsapi_probable_pitcher")
285
- self.assertEqual(result.iloc[0]["starter_cache_source"], "statsapi_plus_fangraphs_fallback")
286
  self.assertTrue(bool(result.iloc[0]["fallback_used"]))
287
  self.assertEqual(result.iloc[0]["projected_starter_match_status"], "matched_projected_home")
288
  self.assertEqual(mocked_build.call_args.kwargs["pitcher_name"], "Home Starter")
 
209
  ("away team", "home team"): {
210
  "home_pitcher": "Home Starter",
211
  "away_pitcher": "Away Starter",
212
+ "home_pitcher_source": "oddsapi_pitcher_strikeouts_two_candidate_match",
213
  "away_pitcher_source": "statsapi_probable_pitcher",
214
+ "starter_cache_source": "statsapi_plus_oddsapi_fallback",
215
  "fallback_used": True,
216
  }
217
  }
 
280
  self.assertEqual(result.iloc[0]["projected_home_pitcher"], "Home Starter")
281
  self.assertEqual(result.iloc[0]["projected_away_pitcher"], "Away Starter")
282
  self.assertTrue(bool(result.iloc[0]["projected_starter_available"]))
283
+ self.assertEqual(result.iloc[0]["projected_home_pitcher_source"], "oddsapi_pitcher_strikeouts_two_candidate_match")
284
  self.assertEqual(result.iloc[0]["projected_away_pitcher_source"], "statsapi_probable_pitcher")
285
+ self.assertEqual(result.iloc[0]["starter_cache_source"], "statsapi_plus_oddsapi_fallback")
286
  self.assertTrue(bool(result.iloc[0]["fallback_used"]))
287
  self.assertEqual(result.iloc[0]["projected_starter_match_status"], "matched_projected_home")
288
  self.assertEqual(mocked_build.call_args.kwargs["pitcher_name"], "Home Starter")
visualization/debug_page.py CHANGED
@@ -1424,9 +1424,7 @@ def render_debug(
1424
  "starter_cache_source": starter_debug.get("starter_cache_source"),
1425
  "starter_cache_age_seconds": starter_debug.get("starter_cache_age_seconds"),
1426
  "starter_refresh_mode": starter_debug.get("starter_refresh_mode"),
1427
- "fangraphs_starter_cache_age_seconds": starter_debug.get("fangraphs_starter_cache_age_seconds"),
1428
- "fangraphs_starter_refresh_mode": starter_debug.get("fangraphs_starter_refresh_mode"),
1429
- "fallback_used_matchup_count": starter_debug.get("fallback_used_matchup_count"),
1430
  "lineup_cache_age_seconds": starter_debug.get("lineup_cache_age_seconds"),
1431
  "lineup_refresh_mode": starter_debug.get("lineup_refresh_mode"),
1432
  }
 
1424
  "starter_cache_source": starter_debug.get("starter_cache_source"),
1425
  "starter_cache_age_seconds": starter_debug.get("starter_cache_age_seconds"),
1426
  "starter_refresh_mode": starter_debug.get("starter_refresh_mode"),
1427
+ "oddsapi_fallback_used_matchup_count": starter_debug.get("oddsapi_fallback_used_matchup_count"),
 
 
1428
  "lineup_cache_age_seconds": starter_debug.get("lineup_cache_age_seconds"),
1429
  "lineup_refresh_mode": starter_debug.get("lineup_refresh_mode"),
1430
  }
visualization/props_page.py CHANGED
@@ -30,19 +30,15 @@ from database.db import (
30
  ensure_upcoming_hr_props_table,
31
  get_connection,
32
  insert_upcoming_hr_props,
33
- read_cached_fangraphs_starters,
34
- read_cached_fangraphs_starters_meta,
35
  read_cached_projected_lineups,
36
  read_cached_projected_lineups_meta,
37
- replace_cached_fangraphs_starters,
38
  replace_cached_projected_lineups,
39
  )
40
  from utils.helpers import utc_now_iso
41
  from data.mlb_starters import (
42
- _normalize_team as _normalize_starter_team,
43
- build_fangraphs_starter_fallback_map,
44
  lookup_pitchers_for_game,
45
- merge_probable_starters_with_fallback,
46
  )
47
 
48
  _PROPS_ASYNC_LOCK = threading.Lock()
@@ -589,6 +585,25 @@ def _extract_slate_matchups(filtered_raw: pd.DataFrame) -> tuple[tuple[str, str]
589
  return tuple(sorted(matchups))
590
 
591
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
592
  def _filter_probable_starters_to_slate(
593
  probable_starters: dict | None,
594
  slate_teams: tuple[str, ...],
@@ -685,6 +700,21 @@ def _load_props_market_baseline_bundle(
685
  }
686
 
687
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
688
  def _build_market_modeling_payload(
689
  *,
690
  filtered_raw: pd.DataFrame,
@@ -985,110 +1015,39 @@ def _refresh_projected_lineups_cache(teams: list[str]) -> None:
985
  _persist_projected_lineups_cache(payload)
986
 
987
 
988
- def _persist_fangraphs_starter_fallback_cache(payload: dict[tuple[str, str], dict[str, Any]]) -> None:
989
- try:
990
- conn = get_connection()
991
- try:
992
- replace_cached_fangraphs_starters(conn, payload)
993
- finally:
994
- conn.close()
995
- except Exception:
996
- pass
997
-
998
-
999
- def _refresh_fangraphs_starter_fallback_cache(matchups: list[tuple[str, str]]) -> None:
1000
- if not matchups:
1001
- return
1002
- teams = sorted({team for matchup in matchups for team in matchup if str(team or "").strip()})
1003
- projected_lineups = _load_projected_lineups_for_props(tuple(teams))
1004
- payload = build_fangraphs_starter_fallback_map(matchups, projected_lineups)
1005
- _persist_fangraphs_starter_fallback_cache(payload)
1006
-
1007
-
1008
- def _load_fangraphs_starter_fallback_for_props(
1009
- matchups: tuple[tuple[str, str], ...],
1010
- *,
1011
- allow_sync_fetch: bool,
1012
- ) -> tuple[dict[tuple[str, str], dict[str, Any]], float | None, str]:
1013
- if not matchups:
1014
- return ({}, None, "starter_fallback_unneeded")
1015
- normalized_matchups = tuple(
1016
- sorted(
1017
- {
1018
- (_normalize_starter_team(away), _normalize_starter_team(home))
1019
- for away, home in matchups
1020
- if str(away or "").strip() and str(home or "").strip()
1021
- }
1022
- )
1023
- )
1024
- conn = get_connection()
1025
- try:
1026
- cached_meta = read_cached_fangraphs_starters_meta(conn)
1027
- cached = read_cached_fangraphs_starters(conn, matchups_norm=normalized_matchups)
1028
- latest = pd.to_datetime(cached_meta.iloc[0]["fetched_at"], errors="coerce", utc=True) if not cached_meta.empty else pd.NaT
1029
- age_seconds = None
1030
- if pd.notna(latest):
1031
- age_seconds = float((pd.Timestamp.now(tz="UTC") - latest).total_seconds())
1032
- if cached:
1033
- if age_seconds is not None and age_seconds <= float(60 * 60 * 6):
1034
- return (cached, age_seconds, "starter_fallback_cache_fresh")
1035
- _queue_props_async_refresh(
1036
- f"fangraphs_starters:{'|'.join(f'{away}@{home}' for away, home in normalized_matchups)}",
1037
- lambda: _refresh_fangraphs_starter_fallback_cache(list(matchups)),
1038
- )
1039
- return (cached, age_seconds, "starter_fallback_cache_stale_async_refresh")
1040
- finally:
1041
- try:
1042
- conn.close()
1043
- except Exception:
1044
- pass
1045
-
1046
- if not allow_sync_fetch:
1047
- _queue_props_async_refresh(
1048
- f"fangraphs_starters:{'|'.join(f'{away}@{home}' for away, home in normalized_matchups)}",
1049
- lambda: _refresh_fangraphs_starter_fallback_cache(list(matchups)),
1050
- )
1051
- return ({}, None, "starter_fallback_cache_missing_async_refresh")
1052
-
1053
- teams = sorted({team for matchup in matchups for team in matchup if str(team or "").strip()})
1054
- projected_lineups = _load_projected_lineups_for_props(tuple(teams))
1055
- payload = build_fangraphs_starter_fallback_map(matchups, projected_lineups)
1056
- _persist_fangraphs_starter_fallback_cache(payload)
1057
- return (payload, 0.0, "starter_fallback_sync_seed")
1058
-
1059
-
1060
  def _load_props_starter_bundle(
1061
  raw: pd.DataFrame,
1062
  probable_starters: dict | None,
 
1063
  ) -> dict[str, Any]:
1064
  slate_matchups = _extract_slate_matchups(raw)
1065
  primary_starters = dict(probable_starters or {})
1066
- usable_primary_games = 0
1067
- missing_primary_games: list[tuple[str, str]] = []
1068
- for away_team, home_team in slate_matchups:
1069
- matchup_payload = lookup_pitchers_for_game(
1070
- away_team=away_team,
1071
- home_team=home_team,
1072
- starters_map=primary_starters,
1073
- )
1074
- if str(matchup_payload.get("away_pitcher") or "").strip() or str(matchup_payload.get("home_pitcher") or "").strip():
1075
- usable_primary_games += 1
1076
- else:
1077
- missing_primary_games.append((away_team, home_team))
1078
-
1079
- fallback_map, fallback_age_seconds, fallback_refresh_mode = _load_fangraphs_starter_fallback_for_props(
1080
- tuple(missing_primary_games),
1081
- allow_sync_fetch=usable_primary_games == 0 and bool(missing_primary_games),
1082
  )
1083
- merged = merge_probable_starters_with_fallback(primary_starters, fallback_map)
 
 
 
 
 
 
 
 
 
 
 
 
 
1084
  return {
1085
  "matchups": slate_matchups,
1086
  "primary_starters": primary_starters,
1087
- "fallback_starters": fallback_map,
1088
  "merged_starters": merged,
1089
- "fallback_age_seconds": fallback_age_seconds,
1090
- "fallback_refresh_mode": fallback_refresh_mode,
1091
- "fallback_used_matchup_count": sum(
1092
  1 for payload in merged.values() if bool(payload.get("fallback_used"))
1093
  ),
1094
  }
@@ -2108,16 +2067,17 @@ def render_props(
2108
  starter_bundle = _load_props_starter_bundle(
2109
  raw=raw,
2110
  probable_starters=probable_starters,
 
 
 
 
 
2111
  )
2112
  st.session_state["props_starter_debug"] = {
2113
- "starter_cache_source": "statsapi_plus_fangraphs_fallback"
2114
- if int(starter_bundle.get("fallback_used_matchup_count") or 0) > 0
2115
- else "statsapi_probable_pitcher",
2116
  "starter_cache_age_seconds": st.session_state.get("probable_starters_cache_age_seconds"),
2117
  "starter_refresh_mode": st.session_state.get("probable_starters_refresh_mode"),
2118
- "fangraphs_starter_cache_age_seconds": starter_bundle.get("fallback_age_seconds"),
2119
- "fangraphs_starter_refresh_mode": starter_bundle.get("fallback_refresh_mode"),
2120
- "fallback_used_matchup_count": int(starter_bundle.get("fallback_used_matchup_count") or 0),
2121
  "lineup_cache_age_seconds": lineup_cache_age_seconds,
2122
  "lineup_refresh_mode": lineup_refresh_mode,
2123
  }
 
30
  ensure_upcoming_hr_props_table,
31
  get_connection,
32
  insert_upcoming_hr_props,
 
 
33
  read_cached_projected_lineups,
34
  read_cached_projected_lineups_meta,
 
35
  replace_cached_projected_lineups,
36
  )
37
  from utils.helpers import utc_now_iso
38
  from data.mlb_starters import (
39
+ build_oddsapi_starter_fallback_map,
 
40
  lookup_pitchers_for_game,
41
+ merge_probable_starters_with_odds_fallback,
42
  )
43
 
44
  _PROPS_ASYNC_LOCK = threading.Lock()
 
585
  return tuple(sorted(matchups))
586
 
587
 
588
+ def _extract_oddsapi_pitcher_names(raw: pd.DataFrame | None) -> tuple[str, ...]:
589
+ if raw is None or raw.empty:
590
+ return tuple()
591
+ market_series = raw.get("market_family", raw.get("market", pd.Series(dtype="object", index=raw.index)))
592
+ scope_series = raw.get("selection_scope", pd.Series(dtype="object", index=raw.index))
593
+ k_rows = raw[
594
+ market_series.fillna("").astype(str).str.strip().str.lower().eq("k")
595
+ & scope_series.fillna("").astype(str).str.strip().str.lower().eq("pitcher")
596
+ ].copy()
597
+ if k_rows.empty:
598
+ return tuple()
599
+ names = {
600
+ str(name).strip()
601
+ for name in k_rows.get("player_name_raw", pd.Series(dtype="object")).dropna().astype(str).tolist()
602
+ if str(name).strip()
603
+ }
604
+ return tuple(sorted(names))
605
+
606
+
607
  def _filter_probable_starters_to_slate(
608
  probable_starters: dict | None,
609
  slate_teams: tuple[str, ...],
 
700
  }
701
 
702
 
703
+ @st.cache_data(ttl=60 * 10, show_spinner=False)
704
+ def _load_props_starter_pitcher_identity_bundle(
705
+ pitcher_names: tuple[str, ...],
706
+ ) -> pd.DataFrame:
707
+ if not pitcher_names:
708
+ return pd.DataFrame()
709
+ bundle = load_or_build_shared_baseline_bundle_complete_for_request(
710
+ batter_names=tuple(),
711
+ pitcher_names=pitcher_names,
712
+ max_age_seconds=60 * 60,
713
+ persist_runtime_refresh=False,
714
+ )
715
+ return bundle.get("blended_pitcher_df", pd.DataFrame())
716
+
717
+
718
  def _build_market_modeling_payload(
719
  *,
720
  filtered_raw: pd.DataFrame,
 
1015
  _persist_projected_lineups_cache(payload)
1016
 
1017
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1018
  def _load_props_starter_bundle(
1019
  raw: pd.DataFrame,
1020
  probable_starters: dict | None,
1021
+ pitcher_statcast_df: pd.DataFrame | None = None,
1022
  ) -> dict[str, Any]:
1023
  slate_matchups = _extract_slate_matchups(raw)
1024
  primary_starters = dict(probable_starters or {})
1025
+ fallback_map = build_oddsapi_starter_fallback_map(
1026
+ props_feed=raw,
1027
+ primary_starters=primary_starters,
1028
+ pitcher_statcast_df=pitcher_statcast_df,
 
 
 
 
 
 
 
 
 
 
 
 
1029
  )
1030
+ merged = merge_probable_starters_with_odds_fallback(primary_starters, fallback_map)
1031
+ cache_sources = {
1032
+ str(payload.get("starter_cache_source") or "").strip()
1033
+ for payload in merged.values()
1034
+ if str(payload.get("starter_cache_source") or "").strip()
1035
+ }
1036
+ if "statsapi_plus_oddsapi_fallback" in cache_sources:
1037
+ aggregate_source = "statsapi_plus_oddsapi_fallback"
1038
+ elif any(source.startswith("oddsapi_") for source in cache_sources):
1039
+ aggregate_source = "oddsapi_pitcher_strikeouts_fallback"
1040
+ elif "statsapi_probable_pitcher" in cache_sources:
1041
+ aggregate_source = "statsapi_probable_pitcher"
1042
+ else:
1043
+ aggregate_source = "unresolved"
1044
  return {
1045
  "matchups": slate_matchups,
1046
  "primary_starters": primary_starters,
1047
+ "oddsapi_fallback_starters": fallback_map,
1048
  "merged_starters": merged,
1049
+ "starter_cache_source": aggregate_source,
1050
+ "oddsapi_fallback_used_matchup_count": sum(
 
1051
  1 for payload in merged.values() if bool(payload.get("fallback_used"))
1052
  ),
1053
  }
 
2067
  starter_bundle = _load_props_starter_bundle(
2068
  raw=raw,
2069
  probable_starters=probable_starters,
2070
+ pitcher_statcast_df=(
2071
+ pitcher_statcast_df
2072
+ if pitcher_statcast_df is not None and not pitcher_statcast_df.empty
2073
+ else _load_props_starter_pitcher_identity_bundle(_extract_oddsapi_pitcher_names(raw))
2074
+ ),
2075
  )
2076
  st.session_state["props_starter_debug"] = {
2077
+ "starter_cache_source": starter_bundle.get("starter_cache_source") or "unresolved",
 
 
2078
  "starter_cache_age_seconds": st.session_state.get("probable_starters_cache_age_seconds"),
2079
  "starter_refresh_mode": st.session_state.get("probable_starters_refresh_mode"),
2080
+ "oddsapi_fallback_used_matchup_count": int(starter_bundle.get("oddsapi_fallback_used_matchup_count") or 0),
 
 
2081
  "lineup_cache_age_seconds": lineup_cache_age_seconds,
2082
  "lineup_refresh_mode": lineup_refresh_mode,
2083
  }