Spaces:
Sleeping
Sleeping
Debug tab: add live_pitch_mix_2026 + live_pa_events_2026 to coverage diagnostics
Browse filesCoverage Diagnostics table now includes the two 2026 live tables so row
counts and latest_game_date reflect current-season data. Overlap query
counts games covered by statcast_event_core OR either 2026 table, so
2026 game_pks no longer show as missing coverage.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- visualization/debug_page.py +28 -10
visualization/debug_page.py
CHANGED
|
@@ -160,6 +160,8 @@ def _build_coverage_diagnostics(conn) -> list[dict]:
|
|
| 160 |
KEY_TABLES = [
|
| 161 |
"game_outcomes",
|
| 162 |
"statcast_event_core",
|
|
|
|
|
|
|
| 163 |
"batter_zone_events",
|
| 164 |
"pitcher_inning_first_seed_events",
|
| 165 |
]
|
|
@@ -222,15 +224,21 @@ def _build_coverage_diagnostics(conn) -> list[dict]:
|
|
| 222 |
|
| 223 |
def _build_overlap_diagnostics(conn) -> dict:
|
| 224 |
"""
|
| 225 |
-
Compare game_outcomes.game_pk (TEXT) against statcast_event_core
|
| 226 |
-
Returns counts for: total final games, covered, missing.
|
| 227 |
-
Returns empty dict if
|
| 228 |
"""
|
| 229 |
from sqlalchemy import text as _t
|
| 230 |
|
| 231 |
go_cols = _get_table_columns(conn, "game_outcomes")
|
| 232 |
sc_cols = _get_table_columns(conn, "statcast_event_core")
|
| 233 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
return {}
|
| 235 |
|
| 236 |
try:
|
|
@@ -244,17 +252,27 @@ def _build_overlap_diagnostics(conn) -> dict:
|
|
| 244 |
FROM game_outcomes g
|
| 245 |
WHERE g.game_pk IS NOT NULL
|
| 246 |
AND g.game_pk != ''
|
| 247 |
-
AND
|
| 248 |
-
|
| 249 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
)
|
| 251 |
""")
|
| 252 |
).scalar()
|
| 253 |
|
| 254 |
return {
|
| 255 |
"total_game_outcomes_game_pks": total,
|
| 256 |
-
"
|
| 257 |
-
"
|
| 258 |
}
|
| 259 |
except Exception as exc:
|
| 260 |
return {"error": str(exc)}
|
|
@@ -646,7 +664,7 @@ def render_debug(
|
|
| 646 |
|
| 647 |
overlap = _build_overlap_diagnostics(conn)
|
| 648 |
if overlap and "error" not in overlap:
|
| 649 |
-
st.write("**game_outcomes ↔
|
| 650 |
st.dataframe(pd.DataFrame([overlap]), use_container_width=True, hide_index=True)
|
| 651 |
elif overlap and "error" in overlap:
|
| 652 |
st.warning(f"Overlap query error: {overlap['error']}")
|
|
|
|
| 160 |
KEY_TABLES = [
|
| 161 |
"game_outcomes",
|
| 162 |
"statcast_event_core",
|
| 163 |
+
"live_pitch_mix_2026",
|
| 164 |
+
"live_pa_events_2026",
|
| 165 |
"batter_zone_events",
|
| 166 |
"pitcher_inning_first_seed_events",
|
| 167 |
]
|
|
|
|
| 224 |
|
| 225 |
def _build_overlap_diagnostics(conn) -> dict:
|
| 226 |
"""
|
| 227 |
+
Compare game_outcomes.game_pk (TEXT) against statcast_event_core, live_pitch_mix_2026,
|
| 228 |
+
and live_pa_events_2026. Returns counts for: total final games, covered, missing.
|
| 229 |
+
Returns empty dict if required columns are missing.
|
| 230 |
"""
|
| 231 |
from sqlalchemy import text as _t
|
| 232 |
|
| 233 |
go_cols = _get_table_columns(conn, "game_outcomes")
|
| 234 |
sc_cols = _get_table_columns(conn, "statcast_event_core")
|
| 235 |
+
lpm_cols = _get_table_columns(conn, "live_pitch_mix_2026")
|
| 236 |
+
lpa_cols = _get_table_columns(conn, "live_pa_events_2026")
|
| 237 |
+
if "game_pk" not in go_cols or (
|
| 238 |
+
"game_pk" not in sc_cols
|
| 239 |
+
and "game_pk" not in lpm_cols
|
| 240 |
+
and "game_pk" not in lpa_cols
|
| 241 |
+
):
|
| 242 |
return {}
|
| 243 |
|
| 244 |
try:
|
|
|
|
| 252 |
FROM game_outcomes g
|
| 253 |
WHERE g.game_pk IS NOT NULL
|
| 254 |
AND g.game_pk != ''
|
| 255 |
+
AND (
|
| 256 |
+
EXISTS (
|
| 257 |
+
SELECT 1 FROM statcast_event_core s
|
| 258 |
+
WHERE s.game_pk = g.game_pk::BIGINT
|
| 259 |
+
)
|
| 260 |
+
OR EXISTS (
|
| 261 |
+
SELECT 1 FROM live_pitch_mix_2026 lpm
|
| 262 |
+
WHERE lpm.game_pk = g.game_pk::BIGINT
|
| 263 |
+
)
|
| 264 |
+
OR EXISTS (
|
| 265 |
+
SELECT 1 FROM live_pa_events_2026 lpa
|
| 266 |
+
WHERE lpa.game_pk = g.game_pk::BIGINT
|
| 267 |
+
)
|
| 268 |
)
|
| 269 |
""")
|
| 270 |
).scalar()
|
| 271 |
|
| 272 |
return {
|
| 273 |
"total_game_outcomes_game_pks": total,
|
| 274 |
+
"with_statcast_or_2026_coverage": covered,
|
| 275 |
+
"missing_coverage": (total or 0) - (covered or 0),
|
| 276 |
}
|
| 277 |
except Exception as exc:
|
| 278 |
return {"error": str(exc)}
|
|
|
|
| 664 |
|
| 665 |
overlap = _build_overlap_diagnostics(conn)
|
| 666 |
if overlap and "error" not in overlap:
|
| 667 |
+
st.write("**game_outcomes ↔ statcast / live_pitch_mix_2026 / live_pa_events_2026 overlap**")
|
| 668 |
st.dataframe(pd.DataFrame([overlap]), use_container_width=True, hide_index=True)
|
| 669 |
elif overlap and "error" in overlap:
|
| 670 |
st.warning(f"Overlap query error: {overlap['error']}")
|