Spaces:
Sleeping
Sleeping
File size: 2,954 Bytes
abeeae7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 | from __future__ import annotations
from typing import Any
import pandas as pd
def _safe_text(value: Any) -> str:
return str(value or "").strip()
def _event_is_hit(event: str) -> int:
e = str(event or "").strip().lower()
return int(e in {"single", "double", "triple", "home_run"})
def _event_is_hr(event: str) -> int:
e = str(event or "").strip().lower()
return int(e == "home_run")
def _event_total_bases(event: str) -> int:
e = str(event or "").strip().lower()
if e == "single":
return 1
if e == "double":
return 2
if e == "triple":
return 3
if e == "home_run":
return 4
return 0
def build_batter_realization_rows(
batter_prop_outcomes_df: pd.DataFrame,
statcast_df: pd.DataFrame,
graded_at: str,
) -> pd.DataFrame:
"""
First-pass realized batter outcome scaffold.
Uses currently loaded statcast rows to determine whether a batter:
- recorded at least one hit
- recorded at least one HR
- recorded 2+ total bases
This is not yet game-perfect by game_pk, but it upgrades outcomes
from pending to actual observed values within the current loaded dataset.
"""
if batter_prop_outcomes_df is None or batter_prop_outcomes_df.empty:
return pd.DataFrame()
if statcast_df is None or statcast_df.empty:
return pd.DataFrame()
if "player_name" not in statcast_df.columns or "events" not in statcast_df.columns:
return pd.DataFrame()
rows: list[dict[str, Any]] = []
for _, outcome_row in batter_prop_outcomes_df.iterrows():
batter_name = _safe_text(outcome_row.get("batter_name"))
if not batter_name:
continue
batter_events = statcast_df[
statcast_df["player_name"].astype(str).str.strip() == batter_name
].copy()
if batter_events.empty:
realized_hit = None
realized_hr = None
realized_tb2p = None
grade_status = "pending"
outcome_source = "statcast_no_match"
else:
events = batter_events["events"].fillna("").astype(str)
hit_count = sum(_event_is_hit(e) for e in events)
hr_count = sum(_event_is_hr(e) for e in events)
total_bases = sum(_event_total_bases(e) for e in events)
realized_hit = int(hit_count > 0)
realized_hr = int(hr_count > 0)
realized_tb2p = int(total_bases >= 2)
grade_status = "graded"
outcome_source = "statcast_loaded_window"
row_dict = outcome_row.to_dict()
row_dict["graded_at"] = graded_at
row_dict["realized_hit"] = realized_hit
row_dict["realized_hr"] = realized_hr
row_dict["realized_tb2p"] = realized_tb2p
row_dict["grade_status"] = grade_status
row_dict["outcome_source"] = outcome_source
rows.append(row_dict)
return pd.DataFrame(rows) |