Spaces:
Sleeping
Card Lab: fix player classification, column aliases, pitcher ID flow
Browse filesRoot cause fixes:
- Remove batter_stand/pitcher_hand/xwoba aliases that broke all model lookups
(models expect stand, p_throws, estimated_woba_using_speedangle)
- Restore bb.bb_type + add ec.pitcher to pitcher SELECT for feature builders
- Hitter list: add events NOT IN filter to exclude pitchers with rare ABs
- Pitcher list: LEFT JOIN to name map (no LIMIT), fallback "Pitcher {id}",
NULL names sorted last; returns list[tuple[int,str]] keyed by pitcher ID
- Pitcher window query: accept pitcher_id directly, remove name→id lookup
UX/perf:
- Cached hitter/pitcher list wrappers (_cached_hitters/_cached_pitchers, 10m TTL)
- Game summary player filter: explicit session_state binding for rerender
- Add idx_statcast_game_pk index for game summary player dropdown
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- database/db.py +4 -0
- visualization/card_lab_page.py +43 -15
- visualization/cards/card_queries.py +69 -69
|
@@ -158,6 +158,10 @@ def initialize_schema(conn) -> None:
|
|
| 158 |
"CREATE INDEX IF NOT EXISTS idx_statcast_pitcher_date "
|
| 159 |
"ON statcast_event_core (pitcher, source_season, game_date)"
|
| 160 |
))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
|
| 162 |
|
| 163 |
# ---------------------------------------------------------------------------
|
|
|
|
| 158 |
"CREATE INDEX IF NOT EXISTS idx_statcast_pitcher_date "
|
| 159 |
"ON statcast_event_core (pitcher, source_season, game_date)"
|
| 160 |
))
|
| 161 |
+
conn.execute(text(
|
| 162 |
+
"CREATE INDEX IF NOT EXISTS idx_statcast_game_pk "
|
| 163 |
+
"ON statcast_event_core (game_pk)"
|
| 164 |
+
))
|
| 165 |
|
| 166 |
|
| 167 |
# ---------------------------------------------------------------------------
|
|
@@ -35,6 +35,25 @@ def normalize_name(name: str) -> str:
|
|
| 35 |
return str(name).strip()
|
| 36 |
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
def _gen_hitter_bytes(conn, player_name, mode, year, date, start_date, end_date, fmt):
|
| 39 |
windowed_df = get_player_card_window_df(
|
| 40 |
conn, player_name, "Hitter", mode=mode, year=year,
|
|
@@ -50,10 +69,11 @@ def _gen_hitter_bytes(conn, player_name, mode, year, date, start_date, end_date,
|
|
| 50 |
return img_bytes, payload.get("timeframe", ""), payload.get("data_quality", "")
|
| 51 |
|
| 52 |
|
| 53 |
-
def _gen_pitcher_bytes(conn, player_name, mode, year, date, start_date, end_date, fmt):
|
| 54 |
windowed_df = get_player_card_window_df(
|
| 55 |
conn, player_name, "Pitcher", mode=mode, year=year,
|
| 56 |
date=date, start_date=start_date, end_date=end_date,
|
|
|
|
| 57 |
)
|
| 58 |
if windowed_df.empty:
|
| 59 |
return None, "", "limited"
|
|
@@ -80,6 +100,10 @@ def _gen_game_bytes(conn, game_pk, away_team, home_team, away_score, home_score,
|
|
| 80 |
return img_bytes, payload.get("timeframe", "")
|
| 81 |
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
def render_card_lab(conn) -> None:
|
| 84 |
st.subheader("Kasper Card Lab")
|
| 85 |
st.caption("Generate downloadable Kasper player cards.")
|
|
@@ -89,8 +113,9 @@ def render_card_lab(conn) -> None:
|
|
| 89 |
|
| 90 |
# ---- Timeframe controls (Hitter / Pitcher only) ----
|
| 91 |
date = start_date = end_date = None
|
| 92 |
-
year
|
| 93 |
-
mode_key
|
|
|
|
| 94 |
|
| 95 |
if card_type in ("Hitter", "Pitcher"):
|
| 96 |
st.markdown("**Data Window**")
|
|
@@ -124,7 +149,7 @@ def render_card_lab(conn) -> None:
|
|
| 124 |
selected_game_row = None
|
| 125 |
|
| 126 |
if card_type == "Hitter":
|
| 127 |
-
players =
|
| 128 |
if not players:
|
| 129 |
st.info("No hitters found in warehouse for selected season.")
|
| 130 |
return
|
|
@@ -133,17 +158,18 @@ def render_card_lab(conn) -> None:
|
|
| 133 |
)
|
| 134 |
|
| 135 |
elif card_type == "Pitcher":
|
| 136 |
-
|
| 137 |
-
if not
|
| 138 |
st.info("No pitchers found in warehouse for selected season.")
|
| 139 |
return
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
"
|
| 146 |
)
|
|
|
|
| 147 |
|
| 148 |
else: # Game Summary
|
| 149 |
gdf = get_recent_completed_games_for_card_lab(conn)
|
|
@@ -168,10 +194,10 @@ def render_card_lab(conn) -> None:
|
|
| 168 |
if game_pk:
|
| 169 |
try:
|
| 170 |
prows = conn.execute(
|
| 171 |
-
text("SELECT DISTINCT player_name FROM statcast_event_core WHERE game_pk = :gpk"),
|
| 172 |
{"gpk": int(game_pk)},
|
| 173 |
).fetchall()
|
| 174 |
-
raw_game_players =
|
| 175 |
except Exception as exc:
|
| 176 |
logger.warning("[card_lab] game player list failed: %s", exc)
|
| 177 |
raw_game_players = []
|
|
@@ -184,6 +210,8 @@ def render_card_lab(conn) -> None:
|
|
| 184 |
format_func=lambda n: "Full Game" if n == "Full Game" else normalize_name(n),
|
| 185 |
key="cl_game_player",
|
| 186 |
)
|
|
|
|
|
|
|
| 187 |
player_name = None if gp_sel == "Full Game" else gp_sel
|
| 188 |
|
| 189 |
fmt = st.radio("Format", ["PNG", "JPG"], horizontal=True, key="cl_fmt")
|
|
@@ -214,7 +242,7 @@ def render_card_lab(conn) -> None:
|
|
| 214 |
status.info("Querying warehouse data...")
|
| 215 |
status.info("Building charts...")
|
| 216 |
img_bytes, tf, dq = _gen_pitcher_bytes(
|
| 217 |
-
conn, player_name, mode_key, year, date, start_date, end_date, fmt
|
| 218 |
)
|
| 219 |
st.session_state["card_player"] = player_name or "unknown"
|
| 220 |
st.session_state["card_timeframe"] = tf
|
|
|
|
| 35 |
return str(name).strip()
|
| 36 |
|
| 37 |
|
| 38 |
+
# ---------------------------------------------------------------------------
|
| 39 |
+
# Cached player list wrappers
|
| 40 |
+
# Leading underscore on _conn tells Streamlit to skip hashing the connection.
|
| 41 |
+
# ---------------------------------------------------------------------------
|
| 42 |
+
|
| 43 |
+
@st.cache_data(ttl=600, show_spinner=False)
|
| 44 |
+
def _cached_hitters(_conn, year):
|
| 45 |
+
return get_card_lab_hitters(_conn, year=year)
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
@st.cache_data(ttl=600, show_spinner=False)
|
| 49 |
+
def _cached_pitchers(_conn, year):
|
| 50 |
+
return get_card_lab_pitchers(_conn, year=year)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
# ---------------------------------------------------------------------------
|
| 54 |
+
# Card generation functions — button-click only, no caching
|
| 55 |
+
# ---------------------------------------------------------------------------
|
| 56 |
+
|
| 57 |
def _gen_hitter_bytes(conn, player_name, mode, year, date, start_date, end_date, fmt):
|
| 58 |
windowed_df = get_player_card_window_df(
|
| 59 |
conn, player_name, "Hitter", mode=mode, year=year,
|
|
|
|
| 69 |
return img_bytes, payload.get("timeframe", ""), payload.get("data_quality", "")
|
| 70 |
|
| 71 |
|
| 72 |
+
def _gen_pitcher_bytes(conn, player_name, pitcher_id, mode, year, date, start_date, end_date, fmt):
|
| 73 |
windowed_df = get_player_card_window_df(
|
| 74 |
conn, player_name, "Pitcher", mode=mode, year=year,
|
| 75 |
date=date, start_date=start_date, end_date=end_date,
|
| 76 |
+
pitcher_id=pitcher_id,
|
| 77 |
)
|
| 78 |
if windowed_df.empty:
|
| 79 |
return None, "", "limited"
|
|
|
|
| 100 |
return img_bytes, payload.get("timeframe", "")
|
| 101 |
|
| 102 |
|
| 103 |
+
# ---------------------------------------------------------------------------
|
| 104 |
+
# Main render function
|
| 105 |
+
# ---------------------------------------------------------------------------
|
| 106 |
+
|
| 107 |
def render_card_lab(conn) -> None:
|
| 108 |
st.subheader("Kasper Card Lab")
|
| 109 |
st.caption("Generate downloadable Kasper player cards.")
|
|
|
|
| 113 |
|
| 114 |
# ---- Timeframe controls (Hitter / Pitcher only) ----
|
| 115 |
date = start_date = end_date = None
|
| 116 |
+
year = None
|
| 117 |
+
mode_key = "season"
|
| 118 |
+
pitcher_id = None # set in pitcher selectbox block
|
| 119 |
|
| 120 |
if card_type in ("Hitter", "Pitcher"):
|
| 121 |
st.markdown("**Data Window**")
|
|
|
|
| 149 |
selected_game_row = None
|
| 150 |
|
| 151 |
if card_type == "Hitter":
|
| 152 |
+
players = _cached_hitters(conn, year=year if mode_key == "season" else None)
|
| 153 |
if not players:
|
| 154 |
st.info("No hitters found in warehouse for selected season.")
|
| 155 |
return
|
|
|
|
| 158 |
)
|
| 159 |
|
| 160 |
elif card_type == "Pitcher":
|
| 161 |
+
pitchers = _cached_pitchers(conn, year=year if mode_key == "season" else None)
|
| 162 |
+
if not pitchers:
|
| 163 |
st.info("No pitchers found in warehouse for selected season.")
|
| 164 |
return
|
| 165 |
+
|
| 166 |
+
selected_pitcher = st.selectbox(
|
| 167 |
+
"Player",
|
| 168 |
+
pitchers,
|
| 169 |
+
format_func=lambda t: normalize_name(t[1]),
|
| 170 |
+
key="cl_player_pitcher",
|
| 171 |
)
|
| 172 |
+
pitcher_id, player_name = selected_pitcher
|
| 173 |
|
| 174 |
else: # Game Summary
|
| 175 |
gdf = get_recent_completed_games_for_card_lab(conn)
|
|
|
|
| 194 |
if game_pk:
|
| 195 |
try:
|
| 196 |
prows = conn.execute(
|
| 197 |
+
text("SELECT DISTINCT player_name FROM statcast_event_core WHERE game_pk = :gpk ORDER BY player_name"),
|
| 198 |
{"gpk": int(game_pk)},
|
| 199 |
).fetchall()
|
| 200 |
+
raw_game_players = [r[0] for r in prows if r[0]]
|
| 201 |
except Exception as exc:
|
| 202 |
logger.warning("[card_lab] game player list failed: %s", exc)
|
| 203 |
raw_game_players = []
|
|
|
|
| 210 |
format_func=lambda n: "Full Game" if n == "Full Game" else normalize_name(n),
|
| 211 |
key="cl_game_player",
|
| 212 |
)
|
| 213 |
+
# Explicit session state binding to ensure rerender picks up selection
|
| 214 |
+
st.session_state["cl_game_player_selected"] = gp_sel
|
| 215 |
player_name = None if gp_sel == "Full Game" else gp_sel
|
| 216 |
|
| 217 |
fmt = st.radio("Format", ["PNG", "JPG"], horizontal=True, key="cl_fmt")
|
|
|
|
| 242 |
status.info("Querying warehouse data...")
|
| 243 |
status.info("Building charts...")
|
| 244 |
img_bytes, tf, dq = _gen_pitcher_bytes(
|
| 245 |
+
conn, player_name, pitcher_id, mode_key, year, date, start_date, end_date, fmt
|
| 246 |
)
|
| 247 |
st.session_state["card_player"] = player_name or "unknown"
|
| 248 |
st.session_state["card_timeframe"] = tf
|
|
@@ -7,9 +7,8 @@ from sqlalchemy import text
|
|
| 7 |
from utils.logger import logger
|
| 8 |
|
| 9 |
# ---------------------------------------------------------------------------
|
| 10 |
-
# Shared JOIN fragment — confirmed columns only
|
| 11 |
-
#
|
| 12 |
-
# pitch_name IS confirmed
|
| 13 |
# ---------------------------------------------------------------------------
|
| 14 |
_HITTER_JOIN_SELECT = """
|
| 15 |
ec.event_key,
|
|
@@ -20,8 +19,8 @@ _HITTER_JOIN_SELECT = """
|
|
| 20 |
ec.pitch_name,
|
| 21 |
ec.events,
|
| 22 |
ec.description,
|
| 23 |
-
ec.stand
|
| 24 |
-
ec.p_throws
|
| 25 |
ec.home_team,
|
| 26 |
ec.away_team,
|
| 27 |
ec.inning,
|
|
@@ -30,7 +29,7 @@ _HITTER_JOIN_SELECT = """
|
|
| 30 |
bb.launch_speed,
|
| 31 |
bb.launch_angle,
|
| 32 |
bb.bb_type,
|
| 33 |
-
bb.estimated_woba_using_speedangle
|
| 34 |
pr.release_speed,
|
| 35 |
pr.release_spin_rate,
|
| 36 |
pr.pfx_x,
|
|
@@ -45,7 +44,6 @@ LEFT JOIN statcast_pitch_release pr ON ec.event_key = pr.event_key
|
|
| 45 |
|
| 46 |
# ---------------------------------------------------------------------------
|
| 47 |
# Date range helper — used only for date_range and single_date modes
|
| 48 |
-
# season mode bypasses this and uses source_season = :year directly
|
| 49 |
# ---------------------------------------------------------------------------
|
| 50 |
def _date_range(mode, date, start_date, end_date) -> tuple[str, str]:
|
| 51 |
"""Returns (sd, ed) for date_range / single_date modes only."""
|
|
@@ -65,23 +63,33 @@ def _date_range(mode, date, start_date, end_date) -> tuple[str, str]:
|
|
| 65 |
# ---------------------------------------------------------------------------
|
| 66 |
|
| 67 |
def get_card_lab_hitters(conn, year: int | None = None, limit: int = 500) -> list[str]:
|
| 68 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
if year:
|
| 71 |
-
sql = text("""
|
| 72 |
SELECT DISTINCT player_name
|
| 73 |
FROM statcast_event_core
|
| 74 |
WHERE source_season = :year
|
| 75 |
AND player_name IS NOT NULL
|
|
|
|
| 76 |
ORDER BY player_name
|
| 77 |
LIMIT :limit
|
| 78 |
""")
|
| 79 |
rows = conn.execute(sql, {"year": int(year), "limit": limit}).fetchall()
|
| 80 |
else:
|
| 81 |
-
sql = text("""
|
| 82 |
SELECT DISTINCT player_name
|
| 83 |
FROM statcast_event_core
|
| 84 |
WHERE player_name IS NOT NULL
|
|
|
|
| 85 |
ORDER BY player_name
|
| 86 |
LIMIT :limit
|
| 87 |
""")
|
|
@@ -95,63 +103,52 @@ def get_card_lab_hitters(conn, year: int | None = None, limit: int = 500) -> lis
|
|
| 95 |
return []
|
| 96 |
|
| 97 |
|
| 98 |
-
def get_card_lab_pitchers(conn, year: int | None = None
|
| 99 |
"""
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
Source: DISTINCT pitcher IDs from statcast_event_core, LEFT JOIN to
|
| 103 |
-
pitcher_inning_first_seed_events for name mapping. Only pitchers with
|
| 104 |
-
a valid name mapping are shown.
|
| 105 |
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
"""
|
| 110 |
try:
|
| 111 |
if year:
|
| 112 |
sql = text("""
|
| 113 |
-
SELECT DISTINCT pife.pitcher_name
|
| 114 |
FROM statcast_event_core ec
|
| 115 |
-
|
| 116 |
ON ec.pitcher = pife.pitcher_id
|
| 117 |
-
WHERE ec.
|
| 118 |
-
AND
|
| 119 |
-
ORDER BY
|
| 120 |
-
|
|
|
|
| 121 |
""")
|
| 122 |
-
rows = conn.execute(sql, {"year": int(year)
|
| 123 |
else:
|
| 124 |
sql = text("""
|
| 125 |
-
SELECT DISTINCT pife.pitcher_name
|
| 126 |
FROM statcast_event_core ec
|
| 127 |
-
|
| 128 |
ON ec.pitcher = pife.pitcher_id
|
| 129 |
-
WHERE
|
| 130 |
-
ORDER BY
|
| 131 |
-
|
|
|
|
| 132 |
""")
|
| 133 |
-
rows = conn.execute(sql
|
| 134 |
|
| 135 |
-
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
-
#
|
| 138 |
-
|
| 139 |
-
total_sql = text(
|
| 140 |
-
"SELECT COUNT(DISTINCT pitcher) FROM statcast_event_core"
|
| 141 |
-
+ (" WHERE source_season = :year" if year else "")
|
| 142 |
-
)
|
| 143 |
-
total_params = {"year": int(year)} if year else {}
|
| 144 |
-
total_row = conn.execute(total_sql, total_params).fetchone()
|
| 145 |
-
total_pitcher_ids = total_row[0] if total_row else "?"
|
| 146 |
-
logger.info(
|
| 147 |
-
"[card_lab_db_players] type=pitcher year=%s mapped=%s total_ids=%s "
|
| 148 |
-
"(unmapped pitcher_ids have no Card Lab coverage)",
|
| 149 |
-
year, len(names), total_pitcher_ids,
|
| 150 |
-
)
|
| 151 |
-
except Exception:
|
| 152 |
-
logger.info("[card_lab_db_players] type=pitcher year=%s count=%s", year, len(names))
|
| 153 |
|
| 154 |
-
|
|
|
|
| 155 |
except Exception as exc:
|
| 156 |
logger.warning("[card_lab_db_players] pitcher query failed: %s", exc)
|
| 157 |
return []
|
|
@@ -208,36 +205,29 @@ def get_player_card_window_df(
|
|
| 208 |
date: str | None = None,
|
| 209 |
start_date: str | None = None,
|
| 210 |
end_date: str | None = None,
|
|
|
|
| 211 |
) -> pd.DataFrame:
|
| 212 |
"""
|
| 213 |
Fetch the FULL matching pitch/event dataset for one player over a time window.
|
| 214 |
|
| 215 |
-
NO row cap is applied —
|
| 216 |
-
|
| 217 |
-
complete data. Chart-level downsampling is handled inside card_charts.py
|
| 218 |
-
(_sample_df) and is render-only; it does not affect metric computation.
|
| 219 |
-
|
| 220 |
-
This DataFrame is the ONLY data source passed to card builders.
|
| 221 |
-
No fallback to statcast HTTP loaders.
|
| 222 |
|
| 223 |
Season mode uses source_season INT filter (fastest).
|
| 224 |
Date range / single date use game_date TEXT range comparison.
|
|
|
|
|
|
|
|
|
|
| 225 |
"""
|
| 226 |
try:
|
| 227 |
if card_type == "Pitcher":
|
| 228 |
-
|
| 229 |
-
pid_row = conn.execute(
|
| 230 |
-
text("SELECT pitcher_id FROM pitcher_inning_first_seed_events WHERE pitcher_name = :n LIMIT 1"),
|
| 231 |
-
{"n": player_name},
|
| 232 |
-
).fetchone()
|
| 233 |
-
if not pid_row:
|
| 234 |
logger.warning(
|
| 235 |
-
"[card_lab_db_window]
|
| 236 |
-
"
|
| 237 |
)
|
| 238 |
return pd.DataFrame()
|
| 239 |
|
| 240 |
-
pitcher_id = pid_row[0]
|
| 241 |
_PITCHER_SELECT = """
|
| 242 |
SELECT
|
| 243 |
ec.event_key,
|
|
@@ -248,8 +238,9 @@ def get_player_card_window_df(
|
|
| 248 |
ec.pitch_name,
|
| 249 |
ec.events,
|
| 250 |
ec.description,
|
| 251 |
-
ec.stand
|
| 252 |
-
ec.p_throws
|
|
|
|
| 253 |
ec.home_team,
|
| 254 |
ec.away_team,
|
| 255 |
ec.inning,
|
|
@@ -260,7 +251,8 @@ def get_player_card_window_df(
|
|
| 260 |
pr.pfx_x,
|
| 261 |
pr.pfx_z,
|
| 262 |
bb.launch_speed,
|
| 263 |
-
bb.launch_angle
|
|
|
|
| 264 |
FROM statcast_event_core ec
|
| 265 |
LEFT JOIN statcast_pitch_release pr ON ec.event_key = pr.event_key
|
| 266 |
LEFT JOIN statcast_batted_ball bb ON ec.event_key = bb.event_key
|
|
@@ -285,6 +277,14 @@ def get_player_card_window_df(
|
|
| 285 |
params = {"player_name": player_name, "sd": sd, "ed": ed}
|
| 286 |
|
| 287 |
df = pd.read_sql(sql, conn, params=params)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
logger.info(
|
| 289 |
"[card_lab_db_window] player=%s type=%s mode=%s year=%s rows=%s",
|
| 290 |
player_name, card_type, mode, year, len(df),
|
|
|
|
| 7 |
from utils.logger import logger
|
| 8 |
|
| 9 |
# ---------------------------------------------------------------------------
|
| 10 |
+
# Shared JOIN fragment — confirmed columns only, NO breaking aliases
|
| 11 |
+
# Models expect: stand, p_throws, estimated_woba_using_speedangle (not aliased)
|
|
|
|
| 12 |
# ---------------------------------------------------------------------------
|
| 13 |
_HITTER_JOIN_SELECT = """
|
| 14 |
ec.event_key,
|
|
|
|
| 19 |
ec.pitch_name,
|
| 20 |
ec.events,
|
| 21 |
ec.description,
|
| 22 |
+
ec.stand,
|
| 23 |
+
ec.p_throws,
|
| 24 |
ec.home_team,
|
| 25 |
ec.away_team,
|
| 26 |
ec.inning,
|
|
|
|
| 29 |
bb.launch_speed,
|
| 30 |
bb.launch_angle,
|
| 31 |
bb.bb_type,
|
| 32 |
+
bb.estimated_woba_using_speedangle,
|
| 33 |
pr.release_speed,
|
| 34 |
pr.release_spin_rate,
|
| 35 |
pr.pfx_x,
|
|
|
|
| 44 |
|
| 45 |
# ---------------------------------------------------------------------------
|
| 46 |
# Date range helper — used only for date_range and single_date modes
|
|
|
|
| 47 |
# ---------------------------------------------------------------------------
|
| 48 |
def _date_range(mode, date, start_date, end_date) -> tuple[str, str]:
|
| 49 |
"""Returns (sd, ed) for date_range / single_date modes only."""
|
|
|
|
| 63 |
# ---------------------------------------------------------------------------
|
| 64 |
|
| 65 |
def get_card_lab_hitters(conn, year: int | None = None, limit: int = 500) -> list[str]:
|
| 66 |
+
"""
|
| 67 |
+
Distinct batter names for true batting events only.
|
| 68 |
+
Excludes walk, HBP, sac_bunt, sac_fly to prevent pitchers with rare ABs
|
| 69 |
+
from polluting the hitter list.
|
| 70 |
+
"""
|
| 71 |
try:
|
| 72 |
+
_EVENTS_FILTER = """
|
| 73 |
+
AND events IS NOT NULL
|
| 74 |
+
AND events NOT IN ('walk', 'hit_by_pitch', 'sac_bunt', 'sac_fly')
|
| 75 |
+
"""
|
| 76 |
if year:
|
| 77 |
+
sql = text(f"""
|
| 78 |
SELECT DISTINCT player_name
|
| 79 |
FROM statcast_event_core
|
| 80 |
WHERE source_season = :year
|
| 81 |
AND player_name IS NOT NULL
|
| 82 |
+
{_EVENTS_FILTER}
|
| 83 |
ORDER BY player_name
|
| 84 |
LIMIT :limit
|
| 85 |
""")
|
| 86 |
rows = conn.execute(sql, {"year": int(year), "limit": limit}).fetchall()
|
| 87 |
else:
|
| 88 |
+
sql = text(f"""
|
| 89 |
SELECT DISTINCT player_name
|
| 90 |
FROM statcast_event_core
|
| 91 |
WHERE player_name IS NOT NULL
|
| 92 |
+
{_EVENTS_FILTER}
|
| 93 |
ORDER BY player_name
|
| 94 |
LIMIT :limit
|
| 95 |
""")
|
|
|
|
| 103 |
return []
|
| 104 |
|
| 105 |
|
| 106 |
+
def get_card_lab_pitchers(conn, year: int | None = None) -> list[tuple[int, str]]:
|
| 107 |
"""
|
| 108 |
+
All distinct pitcher IDs from statcast_event_core, LEFT JOIN to name mapping.
|
| 109 |
+
Pitchers without a name mapping get fallback label "Pitcher {id}".
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
+
Returns list of (pitcher_id, display_name) tuples.
|
| 112 |
+
Named pitchers are sorted first (alphabetically); unnamed pitchers last.
|
| 113 |
+
No LIMIT — full result set with indexes is fast.
|
| 114 |
"""
|
| 115 |
try:
|
| 116 |
if year:
|
| 117 |
sql = text("""
|
| 118 |
+
SELECT DISTINCT ec.pitcher, pife.pitcher_name
|
| 119 |
FROM statcast_event_core ec
|
| 120 |
+
LEFT JOIN pitcher_inning_first_seed_events pife
|
| 121 |
ON ec.pitcher = pife.pitcher_id
|
| 122 |
+
WHERE ec.pitcher IS NOT NULL
|
| 123 |
+
AND ec.source_season = :year
|
| 124 |
+
ORDER BY
|
| 125 |
+
CASE WHEN pife.pitcher_name IS NULL THEN 1 ELSE 0 END,
|
| 126 |
+
pife.pitcher_name
|
| 127 |
""")
|
| 128 |
+
rows = conn.execute(sql, {"year": int(year)}).fetchall()
|
| 129 |
else:
|
| 130 |
sql = text("""
|
| 131 |
+
SELECT DISTINCT ec.pitcher, pife.pitcher_name
|
| 132 |
FROM statcast_event_core ec
|
| 133 |
+
LEFT JOIN pitcher_inning_first_seed_events pife
|
| 134 |
ON ec.pitcher = pife.pitcher_id
|
| 135 |
+
WHERE ec.pitcher IS NOT NULL
|
| 136 |
+
ORDER BY
|
| 137 |
+
CASE WHEN pife.pitcher_name IS NULL THEN 1 ELSE 0 END,
|
| 138 |
+
pife.pitcher_name
|
| 139 |
""")
|
| 140 |
+
rows = conn.execute(sql).fetchall()
|
| 141 |
|
| 142 |
+
pitchers: list[tuple[int, str]] = []
|
| 143 |
+
for row in rows:
|
| 144 |
+
pid, name = row[0], row[1]
|
| 145 |
+
pitchers.append((pid, name if name else f"Pitcher {pid}"))
|
| 146 |
|
| 147 |
+
# Re-sort in Python after fallback naming: named first (alpha), unnamed last (by id)
|
| 148 |
+
pitchers.sort(key=lambda t: (1 if t[1].startswith("Pitcher ") else 0, t[1]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
+
logger.info("[card_lab_db_players] type=pitcher year=%s count=%s", year, len(pitchers))
|
| 151 |
+
return pitchers
|
| 152 |
except Exception as exc:
|
| 153 |
logger.warning("[card_lab_db_players] pitcher query failed: %s", exc)
|
| 154 |
return []
|
|
|
|
| 205 |
date: str | None = None,
|
| 206 |
start_date: str | None = None,
|
| 207 |
end_date: str | None = None,
|
| 208 |
+
pitcher_id: int | None = None,
|
| 209 |
) -> pd.DataFrame:
|
| 210 |
"""
|
| 211 |
Fetch the FULL matching pitch/event dataset for one player over a time window.
|
| 212 |
|
| 213 |
+
NO row cap is applied — full matching dataset returned for metric computation.
|
| 214 |
+
Chart-level downsampling handled inside card_charts.py (_sample_df).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
|
| 216 |
Season mode uses source_season INT filter (fastest).
|
| 217 |
Date range / single date use game_date TEXT range comparison.
|
| 218 |
+
|
| 219 |
+
For Pitcher card_type, pitcher_id is REQUIRED (the numeric ID from statcast_event_core).
|
| 220 |
+
The player_name string is injected as a display label for the feature builders.
|
| 221 |
"""
|
| 222 |
try:
|
| 223 |
if card_type == "Pitcher":
|
| 224 |
+
if pitcher_id is None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
logger.warning(
|
| 226 |
+
"[card_lab_db_window] pitcher_id required for Pitcher card_type "
|
| 227 |
+
"(player_name='%s') — returning empty", player_name,
|
| 228 |
)
|
| 229 |
return pd.DataFrame()
|
| 230 |
|
|
|
|
| 231 |
_PITCHER_SELECT = """
|
| 232 |
SELECT
|
| 233 |
ec.event_key,
|
|
|
|
| 238 |
ec.pitch_name,
|
| 239 |
ec.events,
|
| 240 |
ec.description,
|
| 241 |
+
ec.stand,
|
| 242 |
+
ec.p_throws,
|
| 243 |
+
ec.pitcher,
|
| 244 |
ec.home_team,
|
| 245 |
ec.away_team,
|
| 246 |
ec.inning,
|
|
|
|
| 251 |
pr.pfx_x,
|
| 252 |
pr.pfx_z,
|
| 253 |
bb.launch_speed,
|
| 254 |
+
bb.launch_angle,
|
| 255 |
+
bb.bb_type
|
| 256 |
FROM statcast_event_core ec
|
| 257 |
LEFT JOIN statcast_pitch_release pr ON ec.event_key = pr.event_key
|
| 258 |
LEFT JOIN statcast_batted_ball bb ON ec.event_key = bb.event_key
|
|
|
|
| 277 |
params = {"player_name": player_name, "sd": sd, "ed": ed}
|
| 278 |
|
| 279 |
df = pd.read_sql(sql, conn, params=params)
|
| 280 |
+
|
| 281 |
+
if df.empty:
|
| 282 |
+
logger.warning(
|
| 283 |
+
"[card_lab_db_window] empty result player=%s pitcher_id=%s mode=%s year=%s",
|
| 284 |
+
player_name, pitcher_id, mode, year,
|
| 285 |
+
)
|
| 286 |
+
return pd.DataFrame()
|
| 287 |
+
|
| 288 |
logger.info(
|
| 289 |
"[card_lab_db_window] player=%s type=%s mode=%s year=%s rows=%s",
|
| 290 |
player_name, card_type, mode, year, len(df),
|