Spaces:
Sleeping
Sleeping
Fix db inventory row_count NULL: join table_statistics + tighten early-return guard
Browse files- Replace estimated_row_count (NULL in this CRDB instance) with a join
on crdb_internal.table_statistics, pulling the latest collected row
count per table via ROW_NUMBER() OVER (PARTITION BY table_id ORDER BY created DESC)
- Tighten both early-return checks from `not df.empty` to
`not df.empty and df["row_count"].notna().any()` so an all-NULL
result falls through to the pg_class fallback instead of returning early
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- visualization/debug_page.py +19 -11
visualization/debug_page.py
CHANGED
|
@@ -102,30 +102,38 @@ _LADDER_TB2P_FIELDS = [
|
|
| 102 |
def _query_db_inventory(conn) -> pd.DataFrame:
|
| 103 |
"""
|
| 104 |
List all tables in the public schema with estimated row counts.
|
| 105 |
-
Uses CockroachDB
|
| 106 |
Returns DataFrame with columns [table_name, row_count], sorted by table_name.
|
| 107 |
"""
|
| 108 |
from sqlalchemy import text as _t
|
| 109 |
|
| 110 |
-
#
|
| 111 |
try:
|
| 112 |
df = pd.read_sql(
|
| 113 |
_t("""
|
| 114 |
-
SELECT name AS table_name,
|
| 115 |
-
|
| 116 |
-
FROM crdb_internal.tables
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
"""),
|
| 121 |
conn,
|
| 122 |
)
|
| 123 |
-
if not df.empty:
|
| 124 |
return df
|
| 125 |
except Exception:
|
| 126 |
pass
|
| 127 |
|
| 128 |
-
# Fallback: pg_class reltuples (
|
| 129 |
try:
|
| 130 |
df = pd.read_sql(
|
| 131 |
_t("""
|
|
@@ -140,7 +148,7 @@ def _query_db_inventory(conn) -> pd.DataFrame:
|
|
| 140 |
"""),
|
| 141 |
conn,
|
| 142 |
)
|
| 143 |
-
if not df.empty:
|
| 144 |
return df
|
| 145 |
except Exception:
|
| 146 |
pass
|
|
|
|
| 102 |
def _query_db_inventory(conn) -> pd.DataFrame:
|
| 103 |
"""
|
| 104 |
List all tables in the public schema with estimated row counts.
|
| 105 |
+
Uses CockroachDB auto-stats (table_statistics join) with pg_class fallback.
|
| 106 |
Returns DataFrame with columns [table_name, row_count], sorted by table_name.
|
| 107 |
"""
|
| 108 |
from sqlalchemy import text as _t
|
| 109 |
|
| 110 |
+
# Primary: join crdb_internal.table_statistics for latest collected row counts
|
| 111 |
try:
|
| 112 |
df = pd.read_sql(
|
| 113 |
_t("""
|
| 114 |
+
SELECT t.name AS table_name,
|
| 115 |
+
s.row_count
|
| 116 |
+
FROM crdb_internal.tables t
|
| 117 |
+
LEFT JOIN (
|
| 118 |
+
SELECT table_id,
|
| 119 |
+
row_count,
|
| 120 |
+
ROW_NUMBER() OVER (
|
| 121 |
+
PARTITION BY table_id ORDER BY created DESC
|
| 122 |
+
) AS rn
|
| 123 |
+
FROM crdb_internal.table_statistics
|
| 124 |
+
) s ON t.table_id = s.table_id AND s.rn = 1
|
| 125 |
+
WHERE t.schema_name = 'public'
|
| 126 |
+
AND t.table_type = 'table'
|
| 127 |
+
ORDER BY t.name
|
| 128 |
"""),
|
| 129 |
conn,
|
| 130 |
)
|
| 131 |
+
if not df.empty and df["row_count"].notna().any():
|
| 132 |
return df
|
| 133 |
except Exception:
|
| 134 |
pass
|
| 135 |
|
| 136 |
+
# Fallback: pg_class reltuples (approximate, also fast)
|
| 137 |
try:
|
| 138 |
df = pd.read_sql(
|
| 139 |
_t("""
|
|
|
|
| 148 |
"""),
|
| 149 |
conn,
|
| 150 |
)
|
| 151 |
+
if not df.empty and df["row_count"].notna().any():
|
| 152 |
return df
|
| 153 |
except Exception:
|
| 154 |
pass
|