File size: 24,895 Bytes
37fe240
 
 
 
 
 
 
 
 
 
 
 
 
fd52637
37fe240
 
 
 
 
 
 
21151ce
 
 
37fe240
 
 
 
 
 
 
 
 
 
 
 
 
 
dd66ccf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0dfe795
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21151ce
37fe240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21151ce
 
 
37fe240
 
 
 
 
 
51b1a14
 
 
 
37fe240
 
21151ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37fe240
 
 
 
 
 
 
 
 
 
 
0dfe795
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51b1a14
0dfe795
 
 
 
 
dd66ccf
0dfe795
dd66ccf
0dfe795
 
51b1a14
0dfe795
 
 
 
 
51b1a14
0dfe795
 
 
 
 
 
 
 
 
 
 
 
 
51b1a14
0dfe795
 
 
 
 
 
 
 
 
 
51b1a14
 
 
0dfe795
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c16be6b
 
0dfe795
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a54e483
 
 
 
 
 
 
0dfe795
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51b1a14
0dfe795
 
51b1a14
 
0dfe795
 
 
 
 
 
51b1a14
 
 
 
0dfe795
51b1a14
0dfe795
51b1a14
 
0dfe795
51b1a14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0dfe795
51b1a14
0dfe795
51b1a14
0dfe795
 
 
 
51b1a14
0dfe795
51b1a14
 
 
 
 
 
 
 
 
 
 
 
 
37fe240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd66ccf
 
 
 
 
 
 
 
37fe240
 
 
 
 
 
 
f2d8d50
 
 
 
 
 
 
fd52637
f2d8d50
 
 
 
 
 
 
fd52637
f2d8d50
 
fd52637
 
 
f2d8d50
 
a54e483
f2d8d50
 
 
 
 
 
fd52637
a54e483
f2d8d50
 
 
 
 
 
 
 
 
 
 
 
 
 
a54e483
fd52637
a54e483
f2d8d50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
"""
data/mlb_starters.py

Fetches probable starting pitchers for upcoming MLB games from the public
MLB Stats API. Used by the Props page to enrich HR props with matchup context.

Returns a dict keyed by (away_team, home_team) canonical names → pitcher names.
Both teams in the key are normalized to lowercase stripped strings for fuzzy matching.
"""
from __future__ import annotations

import logging
import re
import time
import unicodedata
from datetime import timedelta
from typing import Any

import pandas as pd
import requests

from data.odds_name_map import normalize_pitcher_name
from database.db import log_pitcher_resolution

_log = logging.getLogger(__name__)

_SCHEDULE_URL = "https://statsapi.mlb.com/api/v1/schedule"


def _normalize_team(name: str) -> str:
    text = str(name or "").strip().lower()
    text = unicodedata.normalize("NFKD", text)
    text = "".join(ch for ch in text if not unicodedata.combining(ch))
    text = re.sub(r"[^a-z0-9 ]", "", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text


# Maps every known team name variant (abbreviation or full name, post-_normalize_team) to a
# stable canonical code. Bridges the gap between statcast abbreviations (e.g. "sfg") and
# Odds API / MLB Stats API full names (e.g. "san francisco giants").
_MLB_TEAM_CANONICAL: dict[str, str] = {
    # Arizona Diamondbacks
    "ari": "ari", "arizona": "ari", "arizona diamondbacks": "ari", "diamondbacks": "ari", "dbacks": "ari",
    # Atlanta Braves
    "atl": "atl", "atlanta": "atl", "atlanta braves": "atl", "braves": "atl",
    # Baltimore Orioles
    "bal": "bal", "baltimore": "bal", "baltimore orioles": "bal", "orioles": "bal",
    # Boston Red Sox
    "bos": "bos", "boston": "bos", "boston red sox": "bos", "red sox": "bos",
    # Chicago Cubs
    "chc": "chc", "chicago cubs": "chc", "cubs": "chc",
    # Chicago White Sox
    "cws": "cws", "chw": "cws", "chicago white sox": "cws", "white sox": "cws",
    # Cincinnati Reds
    "cin": "cin", "cincinnati": "cin", "cincinnati reds": "cin", "reds": "cin",
    # Cleveland Guardians
    "cle": "cle", "clg": "cle", "cleveland": "cle", "cleveland guardians": "cle", "guardians": "cle",
    # Colorado Rockies
    "col": "col", "colorado": "col", "colorado rockies": "col", "rockies": "col",
    # Detroit Tigers
    "det": "det", "detroit": "det", "detroit tigers": "det", "tigers": "det",
    # Houston Astros
    "hou": "hou", "houston": "hou", "houston astros": "hou", "astros": "hou",
    # Kansas City Royals
    "kc": "kc", "kcr": "kc", "kansas city": "kc", "kansas city royals": "kc", "royals": "kc",
    # Los Angeles Angels
    "laa": "laa", "los angeles angels": "laa", "angels": "laa",
    # Los Angeles Dodgers
    "lad": "lad", "los angeles dodgers": "lad", "dodgers": "lad",
    # Miami Marlins
    "mia": "mia", "miami": "mia", "miami marlins": "mia", "marlins": "mia",
    # Milwaukee Brewers
    "mil": "mil", "milwaukee": "mil", "milwaukee brewers": "mil", "brewers": "mil",
    # Minnesota Twins
    "min": "min", "minnesota": "min", "minnesota twins": "min", "twins": "min",
    # New York Mets
    "nym": "nym", "new york mets": "nym", "mets": "nym",
    # New York Yankees
    "nyy": "nyy", "new york yankees": "nyy", "yankees": "nyy",
    # Oakland / Sacramento Athletics
    "oak": "oak", "ath": "oak", "as": "oak", "oakland": "oak", "oakland athletics": "oak",
    "sacramento athletics": "oak", "athletics": "oak",
    # Philadelphia Phillies
    "phi": "phi", "philadelphia": "phi", "philadelphia phillies": "phi", "phillies": "phi",
    # Pittsburgh Pirates
    "pit": "pit", "pittsburgh": "pit", "pittsburgh pirates": "pit", "pirates": "pit",
    # San Diego Padres
    "sd": "sd", "sdp": "sd", "san diego": "sd", "san diego padres": "sd", "padres": "sd",
    # San Francisco Giants
    "sf": "sf", "sfg": "sf", "san francisco": "sf", "san francisco giants": "sf", "giants": "sf",
    # Seattle Mariners
    "sea": "sea", "seattle": "sea", "seattle mariners": "sea", "mariners": "sea",
    # St. Louis Cardinals
    "stl": "stl", "st louis": "stl", "st louis cardinals": "stl", "cardinals": "stl",
    # Tampa Bay Rays
    "tb": "tb", "tbr": "tb", "tampa bay": "tb", "tampa bay rays": "tb", "rays": "tb",
    # Texas Rangers
    "tex": "tex", "texas": "tex", "texas rangers": "tex", "rangers": "tex",
    # Toronto Blue Jays
    "tor": "tor", "toronto": "tor", "toronto blue jays": "tor", "blue jays": "tor",
    # Washington Nationals
    "wsh": "wsh", "wsn": "wsh", "washington": "wsh", "washington nationals": "wsh",
    "nationals": "wsh", "nats": "wsh",
}


def _canonical_team(name: str) -> str:
    """Map any team name variant to a stable canonical code for cross-source comparison."""
    return _MLB_TEAM_CANONICAL.get(_normalize_team(name), _normalize_team(name))


def _normalize_person(name: str) -> str:
    text = str(name or "").strip().lower()
    text = unicodedata.normalize("NFKD", text)
    text = "".join(ch for ch in text if not unicodedata.combining(ch))
    text = re.sub(r"[^a-z0-9 ]", "", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text


def _names_match(left: str, right: str) -> bool:
    left_norm = _normalize_person(left)
    right_norm = _normalize_person(right)
    return bool(left_norm and right_norm and left_norm == right_norm)


def fetch_probable_starters_for_props(conn: Any = None) -> dict[tuple[str, str], dict[str, str | None]]:
    """
    Fetch probable starters for all MLB games in the next 7 days.

    Returns:
        {
            (away_team_normalized, home_team_normalized): {
                "home_pitcher": "Luis Castillo" | None,
                "away_pitcher": "Cole Irvin" | None,
                "away_team_raw": "Seattle Mariners",
                "home_team_raw": "Oakland Athletics",
            }
        }

    Keys are lowercased/normalized for fuzzy matching against props row team names.
    """
    today = pd.Timestamp.utcnow().date()
    end_date = today + timedelta(days=7)
    params: dict[str, Any] = {
        "sportId": 1,
        "startDate": today.isoformat(),
        "endDate": end_date.isoformat(),
        "hydrate": "probablePitcher",
        "gameType": "R,F,D,L,W",
    }

    try:
        r = requests.get(_SCHEDULE_URL, params=params, timeout=15)
        r.raise_for_status()
        data = r.json()
    except Exception as exc:
        _log.warning("[mlb_starters] schedule fetch failed: %s", exc)
        return {}

    result: dict[tuple[str, str], dict[str, str | None]] = {}
    games_total = 0
    games_with_starters = 0

    for date_entry in data.get("dates", []):
        for game in date_entry.get("games", []):
            games_total += 1
            teams = game.get("teams", {})

            away_raw = str(teams.get("away", {}).get("team", {}).get("name", "") or "")
            home_raw = str(teams.get("home", {}).get("team", {}).get("name", "") or "")

            away_pitcher_obj = teams.get("away", {}).get("probablePitcher") or {}
            home_pitcher_obj = teams.get("home", {}).get("probablePitcher") or {}

            away_pitcher = str(away_pitcher_obj.get("fullName", "") or "").strip() or None
            home_pitcher = str(home_pitcher_obj.get("fullName", "") or "").strip() or None

            if not away_raw or not home_raw:
                continue

            game_pk_str = str(game.get("gamePk", "") or "").strip()
            game_date_str = str(date_entry.get("date", "") or "").strip()

            key = (_normalize_team(away_raw), _normalize_team(home_raw))
            result[key] = {
                "home_pitcher": home_pitcher,
                "away_pitcher": away_pitcher,
                "away_team_raw": away_raw,
                "home_team_raw": home_raw,
                "away_pitcher_source": "statsapi_probable_pitcher" if away_pitcher else "unresolved",
                "home_pitcher_source": "statsapi_probable_pitcher" if home_pitcher else "unresolved",
                "starter_cache_source": "statsapi_probable_pitcher",
                "fallback_used": False,
            }

            if conn is not None:
                for raw_name in (away_pitcher, home_pitcher):
                    if not raw_name:
                        continue
                    try:
                        log_pitcher_resolution(conn, {
                            "game_pk": game_pk_str,
                            "game_date": game_date_str,
                            "source": "mlb_starters",
                            "input_name": raw_name,
                            "normalized_name": normalize_pitcher_name(raw_name),
                            "matched_canonical": None,
                            "pitcher_id": None,
                            "match_method": "api_fetch",
                            "sample_size": 0,
                            "p_throws": None,
                        })
                    except Exception as exc:
                        _log.debug("[mlb_starters] resolution log write failed: %s", exc)

            if home_pitcher or away_pitcher:
                games_with_starters += 1

    _log.warning(
        "[mlb_starters] games_total=%d games_with_starters=%d",
        games_total,
        games_with_starters,
    )
    return result


def _infer_pitcher_team_from_rows(
    pitcher_name: str,
    pitcher_statcast_df: pd.DataFrame | None,
    away_team: str,
    home_team: str,
) -> str:
    if (
        not pitcher_name
        or pitcher_statcast_df is None
        or pitcher_statcast_df.empty
        or "player_name" not in pitcher_statcast_df.columns
    ):
        return ""

    target = _normalize_person(pitcher_name)
    rows = pitcher_statcast_df[
        pitcher_statcast_df["player_name"].astype(str).map(_normalize_person) == target
    ].copy()
    if rows.empty:
        return ""

    away_norm = _normalize_team(away_team)
    home_norm = _normalize_team(home_team)
    team_candidates: list[str] = []

    if {"inning_topbot", "home_team", "away_team"}.issubset(rows.columns):
        inning_half = rows["inning_topbot"].fillna("").astype(str).str.lower()
        top_mask = inning_half.str.contains("top")
        bottom_mask = inning_half.str.contains("bot|bottom")
        if top_mask.any():
            team_candidates.extend(
                rows.loc[top_mask, "home_team"].dropna().astype(str).tolist()
            )
        if bottom_mask.any():
            team_candidates.extend(
                rows.loc[bottom_mask, "away_team"].dropna().astype(str).tolist()
            )

    for col in ("team", "pitcher_team", "team_name"):
        if col in rows.columns:
            team_candidates.extend(rows[col].dropna().astype(str).tolist())

    normalized = [_normalize_team(value) for value in team_candidates if str(value).strip()]
    if not normalized:
        return ""
    mode = pd.Series(normalized).mode()
    inferred = str(mode.iloc[0]).strip() if not mode.empty else ""
    if _canonical_team(inferred) == _canonical_team(away_norm):
        return away_team
    if _canonical_team(inferred) == _canonical_team(home_norm):
        return home_team
    return ""


def build_oddsapi_starter_fallback_map(
    props_feed: pd.DataFrame | None,
    primary_starters: dict[tuple[str, str], dict[str, Any]] | None = None,
    pitcher_statcast_df: pd.DataFrame | None = None,
) -> dict[tuple[str, str], dict[str, str | None]]:
    if props_feed is None or props_feed.empty:
        return {}

    working = props_feed.copy()
    market_series = working.get("market_family", working.get("market", pd.Series(dtype="object", index=working.index)))
    scope_series = working.get("selection_scope", pd.Series(dtype="object", index=working.index))
    working = working[
        market_series.fillna("").astype(str).str.strip().str.lower().eq("k")
        & scope_series.fillna("").astype(str).str.strip().str.lower().eq("pitcher")
    ].copy()
    if working.empty:
        return {}

    results: dict[tuple[str, str], dict[str, str | None]] = {}
    primary = dict(primary_starters or {})
    group_cols = [col for col in ("event_id", "away_team", "home_team") if col in working.columns]
    if len(group_cols) < 3:
        return {}

    for _, event_df in working.groupby(group_cols, dropna=False):
        away_team = str(event_df["away_team"].iloc[0] or "").strip()
        home_team = str(event_df["home_team"].iloc[0] or "").strip()
        away_norm = _normalize_team(away_team)
        home_norm = _normalize_team(home_team)
        if not away_norm or not home_norm:
            continue

        primary_payload = dict(primary.get((away_norm, home_norm)) or {})
        away_pitcher = str(primary_payload.get("away_pitcher") or "").strip() or None
        home_pitcher = str(primary_payload.get("home_pitcher") or "").strip() or None
        away_source = str(primary_payload.get("away_pitcher_source") or "").strip() or "unresolved"
        home_source = str(primary_payload.get("home_pitcher_source") or "").strip() or "unresolved"

        candidate_names = [
            str(name).strip()
            for name in event_df.get("player_name_raw", pd.Series(dtype="object")).dropna().astype(str).tolist()
            if str(name).strip()
        ]
        unique_candidates: list[str] = []
        for candidate in candidate_names:
            if not any(_names_match(candidate, existing) for existing in unique_candidates):
                unique_candidates.append(candidate)

        candidate_team_map: dict[str, str] = {}
        for candidate in unique_candidates:
            if away_pitcher and _names_match(candidate, away_pitcher):
                candidate_team_map[candidate] = away_team
                continue
            if home_pitcher and _names_match(candidate, home_pitcher):
                candidate_team_map[candidate] = home_team
                continue
            inferred_team = _infer_pitcher_team_from_rows(
                pitcher_name=candidate,
                pitcher_statcast_df=pitcher_statcast_df,
                away_team=away_team,
                home_team=home_team,
            )
            if not inferred_team:
                inferred_team = lookup_batter_current_team(candidate, away_team, home_team) or ""
            if inferred_team:
                candidate_team_map[candidate] = inferred_team

        blank_sides = int(not away_pitcher) + int(not home_pitcher)
        assigned_from_odds = 0

        if not away_pitcher:
            for candidate, team_name in candidate_team_map.items():
                if _normalize_team(team_name) == away_norm:
                    away_pitcher = candidate
                    assigned_from_odds += 1
                    break
        if not home_pitcher:
            for candidate, team_name in candidate_team_map.items():
                if _normalize_team(team_name) == home_norm and not _names_match(candidate, away_pitcher or ""):
                    home_pitcher = candidate
                    assigned_from_odds += 1
                    break

        unresolved_candidates = [
            candidate
            for candidate in unique_candidates
            if not _names_match(candidate, away_pitcher or "") and not _names_match(candidate, home_pitcher or "")
        ]
        if len(unresolved_candidates) == 1:
            if not away_pitcher and home_pitcher:
                away_pitcher = unresolved_candidates[0]
                assigned_from_odds += 1
            elif not home_pitcher and away_pitcher:
                home_pitcher = unresolved_candidates[0]
                assigned_from_odds += 1
        elif len(unresolved_candidates) == 2 and not away_pitcher and not home_pitcher:
            # Last resort: 2 candidates, both sides blank, team inference failed for both.
            # Assign alphabetically — arbitrary but deterministic.
            sorted_candidates = sorted(unresolved_candidates)
            away_pitcher = sorted_candidates[0]
            home_pitcher = sorted_candidates[1]
            assigned_from_odds += 2

        odds_source = "unresolved"
        if assigned_from_odds >= 2 or (blank_sides >= 2 and away_pitcher and home_pitcher):
            odds_source = "oddsapi_pitcher_strikeouts_two_candidate_match"
        elif assigned_from_odds == 1:
            odds_source = "oddsapi_pitcher_strikeouts_single_candidate_match"
        elif len(unique_candidates) > 2 or (len(unique_candidates) >= 2 and not away_pitcher and not home_pitcher):
            odds_source = "oddsapi_pitcher_strikeouts_ambiguous"

        if away_source == "unresolved" and away_pitcher:
            away_source = odds_source if odds_source != "unresolved" else "oddsapi_pitcher_strikeouts_single_candidate_match"
        if home_source == "unresolved" and home_pitcher:
            home_source = odds_source if odds_source != "unresolved" else "oddsapi_pitcher_strikeouts_single_candidate_match"

        if away_source.startswith("statsapi_") or home_source.startswith("statsapi_"):
            starter_cache_source = (
                "statsapi_plus_oddsapi_fallback"
                if (away_source.startswith("oddsapi_") or home_source.startswith("oddsapi_"))
                else "statsapi_probable_pitcher"
            )
        elif away_source.startswith("oddsapi_") or home_source.startswith("oddsapi_"):
            starter_cache_source = odds_source if odds_source != "unresolved" else "oddsapi_pitcher_strikeouts_single_candidate_match"
        else:
            starter_cache_source = odds_source

        results[(away_norm, home_norm)] = {
            "away_team_raw": away_team,
            "home_team_raw": home_team,
            "away_pitcher": away_pitcher,
            "home_pitcher": home_pitcher,
            "away_pitcher_source": away_source if away_pitcher else "unresolved",
            "home_pitcher_source": home_source if home_pitcher else "unresolved",
            "starter_cache_source": starter_cache_source if (away_pitcher or home_pitcher or odds_source != "unresolved") else "unresolved",
            "fallback_used": bool(
                str(away_source).startswith("oddsapi_") or str(home_source).startswith("oddsapi_")
            ),
        }
    return results


def merge_probable_starters_with_odds_fallback(
    primary_starters: dict[tuple[str, str], dict[str, Any]] | None,
    odds_fallback_starters: dict[tuple[str, str], dict[str, Any]] | None,
) -> dict[tuple[str, str], dict[str, str | None]]:
    primary = dict(primary_starters or {})
    fallback = dict(odds_fallback_starters or {})
    merged: dict[tuple[str, str], dict[str, str | None]] = {}
    for key in sorted(set(primary.keys()) | set(fallback.keys())):
        primary_payload = dict(primary.get(key) or {})
        fallback_payload = dict(fallback.get(key) or {})
        away_pitcher = str(primary_payload.get("away_pitcher") or "").strip() or str(fallback_payload.get("away_pitcher") or "").strip() or None
        home_pitcher = str(primary_payload.get("home_pitcher") or "").strip() or str(fallback_payload.get("home_pitcher") or "").strip() or None
        away_source = (
            str(primary_payload.get("away_pitcher_source") or "").strip()
            or str(fallback_payload.get("away_pitcher_source") or "").strip()
            or "unresolved"
        )
        home_source = (
            str(primary_payload.get("home_pitcher_source") or "").strip()
            or str(fallback_payload.get("home_pitcher_source") or "").strip()
            or "unresolved"
        )
        fallback_used = away_source.startswith("oddsapi_") or home_source.startswith("oddsapi_")
        if away_source.startswith("statsapi_") or home_source.startswith("statsapi_"):
            starter_cache_source = "statsapi_probable_pitcher" if not fallback_used else "statsapi_plus_oddsapi_fallback"
        elif fallback_used:
            starter_cache_source = (
                str(fallback_payload.get("starter_cache_source") or "").strip()
                or "oddsapi_pitcher_strikeouts_single_candidate_match"
            )
        else:
            starter_cache_source = str(fallback_payload.get("starter_cache_source") or "").strip() or "unresolved"
        merged[key] = {
            "away_team_raw": str(primary_payload.get("away_team_raw") or fallback_payload.get("away_team_raw") or "").strip(),
            "home_team_raw": str(primary_payload.get("home_team_raw") or fallback_payload.get("home_team_raw") or "").strip(),
            "away_pitcher": away_pitcher,
            "home_pitcher": home_pitcher,
            "away_pitcher_source": away_source,
            "home_pitcher_source": home_source,
            "starter_cache_source": starter_cache_source,
            "fallback_used": fallback_used,
        }
    return merged


def lookup_pitchers_for_game(
    away_team: str,
    home_team: str,
    starters_map: dict[tuple[str, str], dict[str, str | None]],
) -> dict[str, str | None]:
    """
    Look up probable pitchers for a specific game matchup.

    Returns {"home_pitcher": name_or_None, "away_pitcher": name_or_None}.
    Uses normalized string matching — tolerates minor differences in team name format.
    """
    away_norm = _normalize_team(away_team)
    home_norm = _normalize_team(home_team)

    # Exact normalized match
    entry = starters_map.get((away_norm, home_norm))
    if entry:
        return entry

    # Canonical match: bridges abbreviations vs full names (e.g. "sfg" == "san francisco giants")
    away_canon = _canonical_team(away_norm)
    home_canon = _canonical_team(home_norm)
    for (k_away, k_home), v in starters_map.items():
        if _canonical_team(k_away) == away_canon and _canonical_team(k_home) == home_canon:
            return v

    # Partial substring fallback
    for (k_away, k_home), v in starters_map.items():
        away_match = away_norm in k_away or k_away in away_norm
        home_match = home_norm in k_home or k_home in home_norm
        if away_match and home_match:
            return v

    return {"home_pitcher": None, "away_pitcher": None}


# ---------------------------------------------------------------------------
# Current-season roster lookup (batter team resolution fallback)
# ---------------------------------------------------------------------------

_ROSTER_MAP_CACHE: dict[str, str] | None = None
_ROSTER_MAP_EMPTY_UNTIL: float = 0.0  # monotonic timestamp after which retry is allowed


def fetch_mlb_current_roster_map(season: int = 2026) -> dict[str, str]:
    """
    Returns {normalized_player_name: canonical_team_code} for all active MLB players.
    Cached for the process lifetime.  Uses MLB Stats API players endpoint.
    """
    global _ROSTER_MAP_CACHE, _ROSTER_MAP_EMPTY_UNTIL
    if _ROSTER_MAP_CACHE is not None:
        return _ROSTER_MAP_CACHE
    # Rate-limit empty/error retries — don't hammer the API on every pitcher call
    if time.monotonic() < _ROSTER_MAP_EMPTY_UNTIL:
        return {}

    url = "https://statsapi.mlb.com/api/v1/sports/1/players"
    params: dict[str, Any] = {"season": season, "gameType": "R,S"}
    try:
        r = requests.get(url, params=params, timeout=15)
        r.raise_for_status()
        data = r.json()
    except Exception as exc:
        _log.warning("[mlb_roster] fetch failed: %s", exc)
        _ROSTER_MAP_EMPTY_UNTIL = time.monotonic() + 300  # retry in 5 min
        return {}

    people = data.get("people", []) if isinstance(data, dict) else []
    roster: dict[str, str] = {}
    for person in people:
        full_name = str(person.get("fullName", "") or "")
        team_name = str((person.get("currentTeam") or {}).get("name", "") or "")
        if not full_name or not team_name:
            continue
        norm_name = _normalize_person(full_name)
        canon_team = _canonical_team(team_name)
        if norm_name and canon_team:
            roster[norm_name] = canon_team

    _log.warning("[mlb_roster] loaded %d players for season %d", len(roster), season)
    if not roster:
        _ROSTER_MAP_EMPTY_UNTIL = time.monotonic() + 300  # retry in 5 min
        return {}
    _ROSTER_MAP_CACHE = roster
    return _ROSTER_MAP_CACHE


def lookup_batter_current_team(
    batter_name: str,
    away_team: str,
    home_team: str,
    season: int = 2026,
) -> str | None:
    """
    Returns the display team name (away_team or home_team) for a batter based on
    the current MLB roster.  Returns None if the player is not found or is not
    participating in this specific game.
    """
    roster = fetch_mlb_current_roster_map(season)
    if not roster:
        return None
    norm_name = _normalize_person(batter_name)
    canon_team = roster.get(norm_name)
    if not canon_team:
        return None
    if canon_team == _canonical_team(away_team) and away_team:
        return away_team
    if canon_team == _canonical_team(home_team) and home_team:
        return home_team
    return None