Syntrex Claude Sonnet 4.6 commited on
Commit
9017da6
·
1 Parent(s): 449cb1d

Fix db inventory: capture guaranteed floor before complex queries

Browse files

Run estimated_row_count first and save to `guaranteed` variable before
attempting table_statistics JOIN or pg_class — preventing SQLAlchemy
connection-poisoning from an early query failure from blocking the
name-only fallback.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. visualization/debug_page.py +31 -23
visualization/debug_page.py CHANGED
@@ -102,13 +102,37 @@ _LADDER_TB2P_FIELDS = [
102
  def _query_db_inventory(conn) -> pd.DataFrame:
103
  """
104
  List all tables in the public schema with estimated row counts.
105
- Uses CockroachDB auto-stats (table_statistics join) with pg_class fallback,
106
- and a final-resort name-only fallback so the table always renders.
107
  Returns DataFrame with columns [table_name, row_count], sorted by table_name.
108
  """
109
  from sqlalchemy import text as _t
110
 
111
- # Primary: join crdb_internal.table_statistics for latest collected row counts
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  try:
113
  df = pd.read_sql(
114
  _t("""
@@ -134,7 +158,7 @@ def _query_db_inventory(conn) -> pd.DataFrame:
134
  except Exception:
135
  pass
136
 
137
- # Fallback: pg_class reltuples (approximate, also fast)
138
  try:
139
  df = pd.read_sql(
140
  _t("""
@@ -154,25 +178,9 @@ def _query_db_inventory(conn) -> pd.DataFrame:
154
  except Exception:
155
  pass
156
 
157
- # Final resort: table names only, row_count will be NULL better than empty
158
- try:
159
- df = pd.read_sql(
160
- _t("""
161
- SELECT name AS table_name,
162
- estimated_row_count AS row_count
163
- FROM crdb_internal.tables
164
- WHERE schema_name = 'public'
165
- AND table_type = 'table'
166
- ORDER BY name
167
- """),
168
- conn,
169
- )
170
- if not df.empty:
171
- return df
172
- except Exception:
173
- pass
174
-
175
- return pd.DataFrame(columns=["table_name", "row_count"])
176
 
177
 
178
  def _get_table_columns(conn, table_name: str) -> set:
 
102
  def _query_db_inventory(conn) -> pd.DataFrame:
103
  """
104
  List all tables in the public schema with estimated row counts.
105
+ Runs the safe estimated_row_count query first to guarantee a renderable
106
+ floor result, then attempts richer sources (table_statistics, pg_class).
107
  Returns DataFrame with columns [table_name, row_count], sorted by table_name.
108
  """
109
  from sqlalchemy import text as _t
110
 
111
+ # Step 1 always capture a guaranteed floor first.
112
+ # estimated_row_count is the proven-safe query (worked in 8f09f14).
113
+ # Runs before any complex query that might corrupt the connection object.
114
+ guaranteed = pd.DataFrame(columns=["table_name", "row_count"])
115
+ try:
116
+ df = pd.read_sql(
117
+ _t("""
118
+ SELECT name AS table_name,
119
+ estimated_row_count AS row_count
120
+ FROM crdb_internal.tables
121
+ WHERE schema_name = 'public'
122
+ AND table_type = 'table'
123
+ ORDER BY name
124
+ """),
125
+ conn,
126
+ )
127
+ if not df.empty:
128
+ guaranteed = df
129
+ if df["row_count"].notna().any():
130
+ return df # actual counts available — done
131
+ except Exception:
132
+ pass
133
+
134
+ # Step 2 — try table_statistics JOIN for richer counts.
135
+ # If this corrupts conn, guaranteed is already saved above.
136
  try:
137
  df = pd.read_sql(
138
  _t("""
 
158
  except Exception:
159
  pass
160
 
161
+ # Step 3 pg_class reltuples.
162
  try:
163
  df = pd.read_sql(
164
  _t("""
 
178
  except Exception:
179
  pass
180
 
181
+ # Always return at least table names (guaranteed may be empty only if DB is
182
+ # fully unreachable, in which case the "check DB connection" warning is correct).
183
+ return guaranteed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
 
186
  def _get_table_columns(conn, table_name: str) -> set: