Spaces:
Running
Running
| from __future__ import annotations | |
| from typing import Any | |
| import pandas as pd | |
| def _safe_text(value: Any) -> str: | |
| return str(value or "").strip() | |
| def _event_is_hit(event: str) -> int: | |
| e = str(event or "").strip().lower() | |
| return int(e in {"single", "double", "triple", "home_run"}) | |
| def _event_is_hr(event: str) -> int: | |
| e = str(event or "").strip().lower() | |
| return int(e == "home_run") | |
| def _event_total_bases(event: str) -> int: | |
| e = str(event or "").strip().lower() | |
| if e == "single": | |
| return 1 | |
| if e == "double": | |
| return 2 | |
| if e == "triple": | |
| return 3 | |
| if e == "home_run": | |
| return 4 | |
| return 0 | |
| def build_batter_realization_rows( | |
| batter_prop_outcomes_df: pd.DataFrame, | |
| statcast_df: pd.DataFrame, | |
| graded_at: str, | |
| ) -> pd.DataFrame: | |
| """ | |
| First-pass realized batter outcome scaffold. | |
| Uses currently loaded statcast rows to determine whether a batter: | |
| - recorded at least one hit | |
| - recorded at least one HR | |
| - recorded 2+ total bases | |
| This is not yet game-perfect by game_pk, but it upgrades outcomes | |
| from pending to actual observed values within the current loaded dataset. | |
| """ | |
| if batter_prop_outcomes_df is None or batter_prop_outcomes_df.empty: | |
| return pd.DataFrame() | |
| if statcast_df is None or statcast_df.empty: | |
| return pd.DataFrame() | |
| if "player_name" not in statcast_df.columns or "events" not in statcast_df.columns: | |
| return pd.DataFrame() | |
| rows: list[dict[str, Any]] = [] | |
| for _, outcome_row in batter_prop_outcomes_df.iterrows(): | |
| batter_name = _safe_text(outcome_row.get("batter_name")) | |
| if not batter_name: | |
| continue | |
| batter_events = statcast_df[ | |
| statcast_df["player_name"].astype(str).str.strip() == batter_name | |
| ].copy() | |
| if batter_events.empty: | |
| realized_hit = None | |
| realized_hr = None | |
| realized_tb2p = None | |
| grade_status = "pending" | |
| outcome_source = "statcast_no_match" | |
| else: | |
| events = batter_events["events"].fillna("").astype(str) | |
| hit_count = sum(_event_is_hit(e) for e in events) | |
| hr_count = sum(_event_is_hr(e) for e in events) | |
| total_bases = sum(_event_total_bases(e) for e in events) | |
| realized_hit = int(hit_count > 0) | |
| realized_hr = int(hr_count > 0) | |
| realized_tb2p = int(total_bases >= 2) | |
| grade_status = "graded" | |
| outcome_source = "statcast_loaded_window" | |
| row_dict = outcome_row.to_dict() | |
| row_dict["graded_at"] = graded_at | |
| row_dict["realized_hit"] = realized_hit | |
| row_dict["realized_hr"] = realized_hr | |
| row_dict["realized_tb2p"] = realized_tb2p | |
| row_dict["grade_status"] = grade_status | |
| row_dict["outcome_source"] = outcome_source | |
| rows.append(row_dict) | |
| return pd.DataFrame(rows) |