add data viewer
Browse files- app/supabase_db.py +17 -23
app/supabase_db.py
CHANGED
|
@@ -197,46 +197,40 @@ class SupabaseClient:
|
|
| 197 |
total_count = count_response.count if hasattr(count_response, 'count') else 'unknown'
|
| 198 |
logger.info(f"[SUPABASE] Total records in table: {total_count}")
|
| 199 |
|
| 200 |
-
# Thử lấy tất cả với limit lớn
|
| 201 |
-
logger.info("[SUPABASE] Trying to fetch all records with large limit")
|
| 202 |
-
response = self.client.table('document_chunks').select('*').limit(10000).execute()
|
| 203 |
-
|
| 204 |
-
if response.data:
|
| 205 |
-
actual_count = len(response.data)
|
| 206 |
-
logger.info(f"[SUPABASE] Large limit fetched {actual_count} document chunks (expected: {total_count})")
|
| 207 |
-
|
| 208 |
-
if isinstance(total_count, int) and actual_count >= total_count:
|
| 209 |
-
logger.info("[SUPABASE] Successfully fetched all records")
|
| 210 |
-
return response.data
|
| 211 |
-
else:
|
| 212 |
-
logger.warning(f"[SUPABASE] Only got {actual_count}/{total_count} records, trying pagination")
|
| 213 |
-
|
| 214 |
-
# Fallback to pagination with very small page size
|
| 215 |
all_chunks = []
|
| 216 |
-
page_size =
|
| 217 |
-
|
| 218 |
page_count = 0
|
| 219 |
|
| 220 |
while True:
|
| 221 |
page_count += 1
|
| 222 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
|
| 224 |
actual_count = len(response.data) if response.data else 0
|
| 225 |
-
logger.info(f"[SUPABASE] Page {page_count}:
|
| 226 |
|
| 227 |
if not response.data:
|
| 228 |
-
logger.info(f"[SUPABASE] No more data
|
| 229 |
break
|
| 230 |
|
| 231 |
all_chunks.extend(response.data)
|
| 232 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
if actual_count < page_size:
|
| 234 |
logger.info(f"[SUPABASE] Last page with {actual_count} records")
|
| 235 |
break
|
| 236 |
-
|
| 237 |
-
start += page_size
|
| 238 |
|
| 239 |
-
logger.info(f"[SUPABASE]
|
| 240 |
logger.info(f"[SUPABASE] Fetched {page_count} pages with page_size={page_size}")
|
| 241 |
return all_chunks
|
| 242 |
|
|
|
|
| 197 |
total_count = count_response.count if hasattr(count_response, 'count') else 'unknown'
|
| 198 |
logger.info(f"[SUPABASE] Total records in table: {total_count}")
|
| 199 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
all_chunks = []
|
| 201 |
+
page_size = 1000
|
| 202 |
+
last_id = 0
|
| 203 |
page_count = 0
|
| 204 |
|
| 205 |
while True:
|
| 206 |
page_count += 1
|
| 207 |
+
|
| 208 |
+
# Sử dụng cursor-based pagination với id
|
| 209 |
+
if last_id == 0:
|
| 210 |
+
# Lần đầu: lấy từ đầu
|
| 211 |
+
response = self.client.table('document_chunks').select('*').order('id').limit(page_size).execute()
|
| 212 |
+
else:
|
| 213 |
+
# Các lần sau: lấy từ id > last_id
|
| 214 |
+
response = self.client.table('document_chunks').select('*').order('id').gt('id', last_id).limit(page_size).execute()
|
| 215 |
|
| 216 |
actual_count = len(response.data) if response.data else 0
|
| 217 |
+
logger.info(f"[SUPABASE] Page {page_count}: last_id={last_id}, requested={page_size}, actual={actual_count}")
|
| 218 |
|
| 219 |
if not response.data:
|
| 220 |
+
logger.info(f"[SUPABASE] No more data after id {last_id}")
|
| 221 |
break
|
| 222 |
|
| 223 |
all_chunks.extend(response.data)
|
| 224 |
|
| 225 |
+
# Cập nhật last_id cho page tiếp theo
|
| 226 |
+
if response.data:
|
| 227 |
+
last_id = max(chunk.get('id', 0) for chunk in response.data)
|
| 228 |
+
|
| 229 |
if actual_count < page_size:
|
| 230 |
logger.info(f"[SUPABASE] Last page with {actual_count} records")
|
| 231 |
break
|
|
|
|
|
|
|
| 232 |
|
| 233 |
+
logger.info(f"[SUPABASE] Cursor-based pagination fetched {len(all_chunks)} document chunks (expected: {total_count})")
|
| 234 |
logger.info(f"[SUPABASE] Fetched {page_count} pages with page_size={page_size}")
|
| 235 |
return all_chunks
|
| 236 |
|