2026_MLB_Model / visualization /card_lab_page.py
Syntrex's picture
Upgrade Game Summary to Kasper HUD poster design
8a468b1
raw
history blame
21.5 kB
from __future__ import annotations
import re
import streamlit as st
from PIL import Image
from sqlalchemy import text
from visualization.cards.card_data import (
build_hitter_card_data,
build_pitcher_card_data,
build_game_summary_card_data,
)
from visualization.cards.poster_renderer import (
render_hitter_poster,
render_pitcher_poster,
render_game_summary_poster,
)
from visualization.cards.card_queries import (
get_pitcher_selector_names,
get_hitter_selector_names,
get_pitcher_id_by_name,
get_player_card_window_df,
get_recent_completed_games_for_card_lab,
get_game_summary_window_df,
get_game_batter_log_df,
get_game_players_for_card_lab,
get_batter_mlbam_id,
_PITCHER_SELECTOR_MIN_EVENTS,
_HITTER_SELECTOR_MIN_EVENTS,
)
from visualization.cards.player_assets import resolve_player_image
from visualization.cards.player_identity import (
_HITTER_SEASONS_PATH,
_PITCHER_SEASONS_PATH,
_IDENTITY_MAP_PATH,
)
from utils.logger import logger
def normalize_name(name: str) -> str:
"""Convert 'Last, First' to 'First Last'. Pass-through for any other format."""
if not name:
return ""
if "," in name:
last, first = [x.strip() for x in name.split(",", 1)]
return f"{first} {last}"
return str(name).strip()
# ---------------------------------------------------------------------------
# Card generation functions — button-click only, no caching
# ---------------------------------------------------------------------------
def _gen_hitter_bytes(conn, player_name, batter_id, mode, year, date, start_date, end_date, fmt, player_pil):
windowed_df = get_player_card_window_df(
conn, player_name, "Hitter", mode=mode, year=year,
date=date, start_date=start_date, end_date=end_date,
batter_id=batter_id,
)
if windowed_df.empty:
return None, "", "limited"
payload = build_hitter_card_data(
player_name, windowed_df, mode=mode, year=year,
date=date, start_date=start_date, end_date=end_date,
)
# Normalize player name for poster display (DB may store "Last, First")
payload["player_name"] = normalize_name(payload.get("player_name", player_name))
img_bytes = render_hitter_poster(payload, player_img=player_pil, fmt=fmt)
return img_bytes, payload.get("timeframe", ""), payload.get("data_quality", "")
def _gen_pitcher_bytes(conn, player_name, pitcher_id, mode, year, date, start_date, end_date, fmt, player_pil):
windowed_df = get_player_card_window_df(
conn, player_name, "Pitcher", mode=mode, year=year,
date=date, start_date=start_date, end_date=end_date,
pitcher_id=pitcher_id,
)
if windowed_df.empty:
return None, "", "limited"
payload = build_pitcher_card_data(
player_name, windowed_df, mode=mode, year=year,
date=date, start_date=start_date, end_date=end_date,
)
# Normalize player name for poster display
payload["player_name"] = normalize_name(payload.get("player_name", player_name))
img_bytes = render_pitcher_poster(payload, player_img=player_pil, fmt=fmt)
return img_bytes, payload.get("timeframe", ""), payload.get("data_quality", "")
def _gen_game_bytes(
conn, game_pk, away_team, home_team, away_score, home_score, game_date,
player_name, fmt,
selected_hitters=None,
selected_pitchers=None,
):
windowed_df = get_game_summary_window_df(conn, game_pk, player_name=player_name)
batter_log_df = get_game_batter_log_df(conn, int(game_pk))
game_row = {
"game_pk": game_pk,
"away_team": away_team,
"home_team": home_team,
"away_score": away_score,
"home_score": home_score,
"game_date": game_date,
}
payload = build_game_summary_card_data(
game_pk, windowed_df, game_row, player_name,
selected_hitters=selected_hitters,
selected_pitchers=selected_pitchers,
batter_log_df=batter_log_df,
)
img_bytes = render_game_summary_poster(payload, fmt)
return img_bytes, payload.get("timeframe", "")
# ---------------------------------------------------------------------------
# Main render function
# ---------------------------------------------------------------------------
def render_card_lab(conn) -> None:
st.subheader("Kasper Card Lab")
st.caption("Generate downloadable Kasper scouting report posters.")
# ---- Card type ----
card_type = st.radio("Card Type", ["Hitter", "Pitcher", "Game Summary"], horizontal=True)
# ---- Timeframe controls (Hitter / Pitcher only) ----
date = start_date = end_date = None
year = None
mode_key = "season"
pitcher_id = None # set in pitcher selectbox block
if card_type in ("Hitter", "Pitcher"):
st.markdown("**Data Window**")
tcol1, tcol2 = st.columns([1, 2])
with tcol1:
mode = st.selectbox(
"Mode", ["Season", "Date Range", "Single Date"], key="cl_mode"
)
with tcol2:
year = st.selectbox(
"Year", [2026, 2025, 2024, 2023, 2022, 2021], key="cl_year"
)
if mode == "Single Date":
date = str(st.date_input("Date", key="cl_date"))
elif mode == "Date Range":
dcol1, dcol2 = st.columns(2)
with dcol1:
start_date = str(st.date_input("Start Date", key="cl_start"))
with dcol2:
end_date = str(st.date_input("End Date", key="cl_end"))
mode_key = {
"Season": "season",
"Date Range": "date_range",
"Single Date": "single_date",
}[mode]
# ---- Selector scope variables — initialized here, set in branches below ----
player_name: str | None = None
player_name_display: str | None = None # display-only (hitter)
selected_game_row: dict | None = None
# Hitter pipeline variables
hitter_display_names: list[str] = []
hitter_display_to_statcast: dict[str, str] = {}
_hitter_batter_id: int | None = None # MLBAM batter id from Parquet
# Pitcher pipeline variables
pitcher_display_names: list[str] = []
pitcher_display_to_id: dict[str, int | None] = {}
pitcher_pairs: list[tuple[str, int | None]] = []
# Game Summary pipeline variables
game_players: dict = {"pitchers": [], "hitters": []}
game_selected_hitters: list[dict] = []
game_selected_pitchers: list[dict] = []
selector_year = year if mode_key == "season" else None
# ---- Player / game selection ----
if card_type == "Hitter":
# Source: data/card_lab_hitter_seasons.parquet (pybaseball batting_stats, AB > 0 gate)
hitter_display_names, hitter_display_to_statcast = get_hitter_selector_names(
conn, year=selector_year
)
if not hitter_display_names:
st.info("No hitters found. Run scripts/build_card_lab_season_summaries.py first.")
return
selected_hitter_display = st.selectbox("Player", hitter_display_names, key="cl_player_hitter")
# Read batter_id (MLBAM) from the Parquet — used for ec.batter = :batter_id query.
# DB schema: ec.player_name = PITCHER, not batter. batter_id is required.
_hitter_batter_id: int | None = None
if _HITTER_SEASONS_PATH.exists():
import pandas as _pd_hi
_hdf = _pd_hi.read_parquet(_HITTER_SEASONS_PATH)
_match = _hdf[_hdf["display_name"] == selected_hitter_display]
if selector_year is not None:
_yr = _match[_match["Season"] == selector_year]
if not _yr.empty:
_match = _yr
if not _match.empty:
_pid = _match.iloc[0]["player_id"]
if _pd_hi.notna(_pid):
_hitter_batter_id = int(_pid)
if _hitter_batter_id is None:
st.warning(
f"No MLBAM batter ID found for {selected_hitter_display!r}. "
"Card data will be empty. Re-run build_player_identity_map.py."
)
player_name = selected_hitter_display # display label (injected as literal in SQL)
player_name_display = selected_hitter_display
elif card_type == "Pitcher":
# Source: data/card_lab_pitcher_seasons.parquet (pybaseball pitching_stats, IP > 0 gate)
pitcher_pairs = get_pitcher_selector_names(conn, year=selector_year)
if not pitcher_pairs:
st.info("No pitchers found. Run scripts/build_card_lab_season_summaries.py first.")
return
pitcher_display_to_id = {name: pid for name, pid in pitcher_pairs}
pitcher_display_names = [name for name, _ in pitcher_pairs] # already sorted
selected_pitcher_display = st.selectbox("Player", pitcher_display_names, key="cl_player_pitcher")
pitcher_id = pitcher_display_to_id[selected_pitcher_display]
player_name = selected_pitcher_display # display name for poster (literal in pitcher SQL)
else: # Game Summary
gdf = get_recent_completed_games_for_card_lab(conn)
if gdf.empty:
st.info("No completed game data available for Game Summary cards.")
return
recent_games = gdf.to_dict("records")
game_labels = [
f"{g.get('away_team', '?')} @ {g.get('home_team', '?')}{str(g.get('game_date', ''))[:10]}"
for g in recent_games
]
sel_idx = st.selectbox(
"Game",
range(len(game_labels)),
format_func=lambda i: game_labels[i],
key="cl_game",
)
selected_game_row = recent_games[sel_idx]
game_pk = selected_game_row.get("game_pk")
if game_pk:
game_players = get_game_players_for_card_lab(conn, int(game_pk))
hitter_opts = [f"{p['team']}{p['display_name']}" for p in game_players["hitters"]]
pitcher_opts = [f"{p['team']}{p['display_name']}" for p in game_players["pitchers"]]
_hitter_label_map = {f"{p['team']}{p['display_name']}": p for p in game_players["hitters"]}
_pitcher_label_map = {f"{p['team']}{p['display_name']}": p for p in game_players["pitchers"]}
sel_hitter_labels = st.multiselect("Hitters in This Game", hitter_opts, key="cl_game_hitters")
sel_pitcher_labels = st.multiselect("Pitchers in This Game", pitcher_opts, key="cl_game_pitchers")
game_selected_hitters = [_hitter_label_map[lbl] for lbl in sel_hitter_labels]
game_selected_pitchers = [_pitcher_label_map[lbl] for lbl in sel_pitcher_labels]
player_name = None # full game windowed_df, no single-pitcher filter
# ---- Auto-image: MLBAM ID lookup + 3-layer image resolve ----
mlbam_id: int | None = None
player_pil = None
if card_type == "Hitter" and player_name:
# batter_id from Parquet — no DB call needed (ec.player_name = pitcher, not batter)
mlbam_id = _hitter_batter_id
player_pil = resolve_player_image(mlbam_id) if mlbam_id else None
elif card_type == "Pitcher" and pitcher_id:
mlbam_id = pitcher_id
player_pil = resolve_player_image(mlbam_id)
# ---- Debug expander ----
with st.expander("Debug", expanded=False):
import pandas as _pd
from collections import Counter as _Counter
st.write(f"**Active card type:** {card_type}")
st.write(f"**Identity map exists:** {_IDENTITY_MAP_PATH.exists()}")
st.write(f"**Pitcher seasons exists:** {_PITCHER_SEASONS_PATH.exists()}")
st.write(f"**Hitter seasons exists:** {_HITTER_SEASONS_PATH.exists()}")
if card_type == "Pitcher":
st.write("**Source:** data/card_lab_pitcher_seasons.parquet (pybaseball pitching_stats)")
st.write(f"**Season:** {selector_year}")
st.write("**Workload filter:** IP > 0")
st.write(f"**Total names:** {len(pitcher_display_names)}")
st.write("**First 20:**", pitcher_display_names[:20])
if _PITCHER_SEASONS_PATH.exists():
_pdf = _pd.read_parquet(_PITCHER_SEASONS_PATH)
_yr_rows = _pdf[(_pdf["IP"] > 0) & (_pdf["Season"] == selector_year)] if selector_year else _pdf[_pdf["IP"] > 0]
st.write(f"**Parquet rows for season:** {len(_yr_rows)}")
null_id = sum(1 for _, pid in pitcher_pairs if pid is None)
st.write(f"**Players with null pitcher_id:** {null_id}")
dup_p = sum(1 for c in _Counter(pitcher_display_names).values() if c > 1)
st.write(f"**Duplicate display_names:** {dup_p} (must be 0)")
st.markdown("**Spot checks:**")
for check in ["Tarik Skubal", "Sonny Gray", "Aaron Nola"]:
found = [n for n, _ in pitcher_pairs if check.lower() in n.lower()]
suffix = f" ({len(found)} match{'es' if len(found) != 1 else ''})" if found else ""
st.write(f" {'✓' if found else '✗'} {check}{suffix}")
elif card_type == "Hitter":
st.write("**Source:** data/card_lab_hitter_seasons.parquet (pybaseball batting_stats)")
st.write(f"**Season:** {selector_year}")
st.write("**Workload filter:** AB > 0")
st.write(f"**Total names:** {len(hitter_display_names)}")
st.write("**First 20:**", hitter_display_names[:20])
if _HITTER_SEASONS_PATH.exists():
_hdf = _pd.read_parquet(_HITTER_SEASONS_PATH)
_yr_rows = _hdf[(_hdf["AB"] > 0) & (_hdf["Season"] == selector_year)] if selector_year else _hdf[_hdf["AB"] > 0]
st.write(f"**Parquet rows for season:** {len(_yr_rows)}")
st.write(f"**statcast_name coverage:** all {len(hitter_display_names)} shown rows resolved (null rows excluded at build)")
dup_h = sum(1 for c in _Counter(hitter_display_names).values() if c > 1)
st.write(f"**Duplicate display_names:** {dup_h} (must be 0)")
st.markdown("**Spot checks (hitters ✓, pitchers ✗):**")
names_lc = {n.lower() for n in hitter_display_names}
for check, expect_present in [
("Lars Nootbaar", True), ("Alec Burleson", True),
("Freddy Peralta", False), ("Tarik Skubal", False),
]:
found = any(check.lower() in n for n in names_lc)
ok = found == expect_present
st.write(f" {'✓' if ok else '✗'} {check}{'present' if found else 'absent'}")
if card_type in ("Hitter", "Pitcher"):
st.write(f"**MLBAM ID:** {mlbam_id}")
if card_type == "Hitter":
st.write(f"**Batter ID (from Parquet):** {_hitter_batter_id}")
st.write(f"**Image fetched:** {player_pil is not None}")
elif card_type == "Game Summary":
if selected_game_row:
raw_gp = selected_game_row.get("game_pk")
st.write(f"**game_pk raw value:** {raw_gp!r} (type: {type(raw_gp).__name__})")
st.write(f"**Hitters found:** {len(game_players.get('hitters', []))}")
st.write(f"**Pitchers found:** {len(game_players.get('pitchers', []))}")
st.write("**First 10 hitters:**", [h['display_name'] for h in game_players.get('hitters', [])[:10]])
st.write("**First 10 pitchers:**", [p['display_name'] for p in game_players.get('pitchers', [])[:10]])
st.write(f"**Selected hitters:** {len(game_selected_hitters)}")
st.write(f"**Selected pitchers:** {len(game_selected_pitchers)}")
if game_selected_hitters:
st.write("Selected hitters:", [h['display_name'] for h in game_selected_hitters])
if game_selected_pitchers:
st.write("Selected pitchers:", [p['display_name'] for p in game_selected_pitchers])
if selected_game_row and selected_game_row.get("game_pk"):
try:
_gp = int(selected_game_row["game_pk"])
_cnt = conn.execute(
text("SELECT COUNT(*) FROM statcast_event_core WHERE game_pk = :gp"),
{"gp": _gp},
).scalar()
_p_cnt = conn.execute(
text("SELECT COUNT(DISTINCT pitcher) FROM statcast_event_core WHERE game_pk = :gp"),
{"gp": _gp},
).scalar()
_b_cnt = conn.execute(
text("SELECT COUNT(DISTINCT batter) FROM statcast_event_core WHERE game_pk = :gp"),
{"gp": _gp},
).scalar()
st.write(f"**statcast rows for game_pk {_gp}:** {_cnt}")
st.write(f"**Distinct pitchers:** {_p_cnt} | **Distinct batters:** {_b_cnt}")
except Exception as _e:
st.write(f"**Statcast row count error:** {_e}")
if game_players.get("_error"):
st.write(f"**get_game_players error:** {game_players['_error']}")
fmt = st.radio("Format", ["PNG", "JPG"], horizontal=True, key="cl_fmt")
# ---- Generate button ----
generate = st.button("Generate Card", type="primary", key="cl_gen")
# ---- Placeholder before first generation ----
if "card_bytes" not in st.session_state:
st.info("Generate a card to preview.")
if generate:
status = st.empty()
try:
img_bytes = None
tf = ""
if card_type == "Hitter":
status.info("Querying warehouse data...")
status.info("Building poster...")
img_bytes, tf, dq = _gen_hitter_bytes(
conn, player_name, _hitter_batter_id, mode_key, year, date, start_date, end_date, fmt, player_pil
)
st.session_state["card_player"] = normalize_name(player_name or "unknown")
st.session_state["card_timeframe"] = tf
elif card_type == "Pitcher":
status.info("Querying warehouse data...")
status.info("Building poster...")
img_bytes, tf, dq = _gen_pitcher_bytes(
conn, player_name, pitcher_id, mode_key, year, date, start_date, end_date, fmt, player_pil
)
st.session_state["card_player"] = player_name or "unknown"
st.session_state["card_timeframe"] = tf
else:
status.info("Querying warehouse data...")
status.info("Building summary card...")
img_bytes, tf = _gen_game_bytes(
conn,
game_pk=selected_game_row.get("game_pk"),
away_team=selected_game_row.get("away_team", "—"),
home_team=selected_game_row.get("home_team", "—"),
away_score=selected_game_row.get("away_score"),
home_score=selected_game_row.get("home_score"),
game_date=str(selected_game_row.get("game_date", ""))[:10],
player_name=player_name,
fmt=fmt,
selected_hitters=game_selected_hitters,
selected_pitchers=game_selected_pitchers,
)
st.session_state["card_player"] = player_name or "game"
st.session_state["card_timeframe"] = tf
if img_bytes is None:
status.empty()
st.warning(
"No warehouse data found for this player and selected time window. "
"Try a different season or date range."
)
else:
status.info("Finalizing download...")
st.session_state["card_bytes"] = img_bytes
st.session_state["card_type"] = card_type
st.session_state["card_fmt"] = fmt
status.empty()
except Exception as exc:
status.empty()
logger.warning("[card_lab] generation failed: %s", exc)
st.error(f"Card generation failed: {exc}")
# ---- Preview + download ----
if st.session_state.get("card_bytes"):
st.image(st.session_state["card_bytes"], width=700)
p = (st.session_state.get("card_player") or "game").replace(" ", "_").lower()
t = st.session_state.get("card_type", "card").replace(" ", "_").lower()
tf = st.session_state.get("card_timeframe", "").replace(" ", "_").replace("/", "-").lower()
f = st.session_state.get("card_fmt", "png").lower()
p = re.sub(r"[^a-z0-9_\-]", "", p)
tf = re.sub(r"[^a-z0-9_\-]", "", tf)
fname = f"kasper_{t}_{p}_{tf}.{f}"
mime = "image/jpeg" if f == "jpg" else "image/png"
st.download_button(
label=f"Download {st.session_state.get('card_fmt', 'PNG')}",
data=st.session_state["card_bytes"],
file_name=fname,
mime=mime,
key="cl_dl",
)