VietCat commited on
Commit
0f13c77
·
1 Parent(s): 2c5219c

add data viewer

Browse files
Files changed (1) hide show
  1. app/supabase_db.py +17 -23
app/supabase_db.py CHANGED
@@ -197,46 +197,40 @@ class SupabaseClient:
197
  total_count = count_response.count if hasattr(count_response, 'count') else 'unknown'
198
  logger.info(f"[SUPABASE] Total records in table: {total_count}")
199
 
200
- # Thử lấy tất cả với limit lớn
201
- logger.info("[SUPABASE] Trying to fetch all records with large limit")
202
- response = self.client.table('document_chunks').select('*').limit(10000).execute()
203
-
204
- if response.data:
205
- actual_count = len(response.data)
206
- logger.info(f"[SUPABASE] Large limit fetched {actual_count} document chunks (expected: {total_count})")
207
-
208
- if isinstance(total_count, int) and actual_count >= total_count:
209
- logger.info("[SUPABASE] Successfully fetched all records")
210
- return response.data
211
- else:
212
- logger.warning(f"[SUPABASE] Only got {actual_count}/{total_count} records, trying pagination")
213
-
214
- # Fallback to pagination with very small page size
215
  all_chunks = []
216
- page_size = 300 # Rất nhỏ để test
217
- start = 0
218
  page_count = 0
219
 
220
  while True:
221
  page_count += 1
222
- response = self.client.table('document_chunks').select('*').range(start, start + page_size - 1).execute()
 
 
 
 
 
 
 
223
 
224
  actual_count = len(response.data) if response.data else 0
225
- logger.info(f"[SUPABASE] Page {page_count}: start={start}, end={start + page_size - 1}, requested={page_size}, actual={actual_count}")
226
 
227
  if not response.data:
228
- logger.info(f"[SUPABASE] No more data at start {start}")
229
  break
230
 
231
  all_chunks.extend(response.data)
232
 
 
 
 
 
233
  if actual_count < page_size:
234
  logger.info(f"[SUPABASE] Last page with {actual_count} records")
235
  break
236
-
237
- start += page_size
238
 
239
- logger.info(f"[SUPABASE] Pagination fetched {len(all_chunks)} document chunks (expected: {total_count})")
240
  logger.info(f"[SUPABASE] Fetched {page_count} pages with page_size={page_size}")
241
  return all_chunks
242
 
 
197
  total_count = count_response.count if hasattr(count_response, 'count') else 'unknown'
198
  logger.info(f"[SUPABASE] Total records in table: {total_count}")
199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  all_chunks = []
201
+ page_size = 1000
202
+ last_id = 0
203
  page_count = 0
204
 
205
  while True:
206
  page_count += 1
207
+
208
+ # Sử dụng cursor-based pagination với id
209
+ if last_id == 0:
210
+ # Lần đầu: lấy từ đầu
211
+ response = self.client.table('document_chunks').select('*').order('id').limit(page_size).execute()
212
+ else:
213
+ # Các lần sau: lấy từ id > last_id
214
+ response = self.client.table('document_chunks').select('*').order('id').gt('id', last_id).limit(page_size).execute()
215
 
216
  actual_count = len(response.data) if response.data else 0
217
+ logger.info(f"[SUPABASE] Page {page_count}: last_id={last_id}, requested={page_size}, actual={actual_count}")
218
 
219
  if not response.data:
220
+ logger.info(f"[SUPABASE] No more data after id {last_id}")
221
  break
222
 
223
  all_chunks.extend(response.data)
224
 
225
+ # Cập nhật last_id cho page tiếp theo
226
+ if response.data:
227
+ last_id = max(chunk.get('id', 0) for chunk in response.data)
228
+
229
  if actual_count < page_size:
230
  logger.info(f"[SUPABASE] Last page with {actual_count} records")
231
  break
 
 
232
 
233
+ logger.info(f"[SUPABASE] Cursor-based pagination fetched {len(all_chunks)} document chunks (expected: {total_count})")
234
  logger.info(f"[SUPABASE] Fetched {page_count} pages with page_size={page_size}")
235
  return all_chunks
236