Spaces:

Syntrex
/

2026_MLB_Model

Sleeping

Syntrex Claude Sonnet 4.6 commited on Mar 21

Commit

7dcfed5

1 Parent(s): 8f09f14

Fix db inventory row_count NULL: join table_statistics + tighten early-return guard

- Replace estimated_row_count (NULL in this CRDB instance) with a join
on crdb_internal.table_statistics, pulling the latest collected row
count per table via ROW_NUMBER() OVER (PARTITION BY table_id ORDER BY created DESC)
- Tighten both early-return checks from `not df.empty` to
`not df.empty and df["row_count"].notna().any()` so an all-NULL
result falls through to the pg_class fallback instead of returning early

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show

visualization/debug_page.py +19 -11

visualization/debug_page.py CHANGED Viewed

@@ -102,30 +102,38 @@ _LADDER_TB2P_FIELDS = [
 def _query_db_inventory(conn) -> pd.DataFrame:
     """
     List all tables in the public schema with estimated row counts.
-    Uses CockroachDB-native pre-computed stats (fast) with pg_class fallback.
     Returns DataFrame with columns [table_name, row_count], sorted by table_name.
     """
     from sqlalchemy import text as _t
-    # Try CockroachDB-native estimated counts first (fast — pre-computed stats)
     try:
         df = pd.read_sql(
             _t("""
-               SELECT name AS table_name,
-                      estimated_row_count AS row_count
-               FROM crdb_internal.tables
-               WHERE schema_name = 'public'
-                 AND table_type = 'table'
-               ORDER BY name
             """),
             conn,
         )
-        if not df.empty:
             return df
     except Exception:
         pass
-    # Fallback: pg_class reltuples (also approximate, also fast)
     try:
         df = pd.read_sql(
             _t("""
@@ -140,7 +148,7 @@ def _query_db_inventory(conn) -> pd.DataFrame:
             """),
             conn,
         )
-        if not df.empty:
             return df
     except Exception:
         pass

 def _query_db_inventory(conn) -> pd.DataFrame:
     """
     List all tables in the public schema with estimated row counts.
+    Uses CockroachDB auto-stats (table_statistics join) with pg_class fallback.
     Returns DataFrame with columns [table_name, row_count], sorted by table_name.
     """
     from sqlalchemy import text as _t
+    # Primary: join crdb_internal.table_statistics for latest collected row counts
     try:
         df = pd.read_sql(
             _t("""
+               SELECT t.name AS table_name,
+                      s.row_count
+               FROM crdb_internal.tables t
+               LEFT JOIN (
+                   SELECT table_id,
+                          row_count,
+                          ROW_NUMBER() OVER (
+                              PARTITION BY table_id ORDER BY created DESC
+                          ) AS rn
+                   FROM crdb_internal.table_statistics
+               ) s ON t.table_id = s.table_id AND s.rn = 1
+               WHERE t.schema_name = 'public'
+                 AND t.table_type = 'table'
+               ORDER BY t.name
             """),
             conn,
         )
+        if not df.empty and df["row_count"].notna().any():
             return df
     except Exception:
         pass
+    # Fallback: pg_class reltuples (approximate, also fast)
     try:
         df = pd.read_sql(
             _t("""
             """),
             conn,
         )
+        if not df.empty and df["row_count"].notna().any():
             return df
     except Exception:
         pass