VietCat commited on
Commit
55d95bd
·
1 Parent(s): 880062b

add data viewer

Browse files
Files changed (2) hide show
  1. app/main.py +59 -0
  2. app/supabase_db.py +23 -1
app/main.py CHANGED
@@ -694,6 +694,65 @@ async def update_all_documents():
694
  logger.error(f"[API] Error in update_all_documents: {e}")
695
  raise HTTPException(status_code=500, detail=f"Lỗi: {str(e)}")
696
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
697
  @app.get("/api/document-chunks/status")
698
  @timing_decorator_async
699
  async def get_document_chunks_status():
 
694
  logger.error(f"[API] Error in update_all_documents: {e}")
695
  raise HTTPException(status_code=500, detail=f"Lỗi: {str(e)}")
696
 
697
+ @app.get("/api/document-chunks/view")
698
+ @timing_decorator_async
699
+ async def view_all_document_chunks():
700
+ """
701
+ API xem toàn bộ dữ liệu trong bảng document_chunks.
702
+ """
703
+ try:
704
+ logger.info("[API] Starting view all document chunks")
705
+
706
+ # Lấy dữ liệu từ Supabase
707
+ chunks_data = supabase_client.get_all_document_chunks()
708
+
709
+ # Thống kê cơ bản
710
+ total_chunks = len(chunks_data)
711
+ unique_documents = len(set(chunk.get('vanbanid') for chunk in chunks_data if chunk.get('vanbanid')))
712
+
713
+ # Nhóm theo vanbanid để thống kê và tổ chức data
714
+ chunks_by_document = {}
715
+ for chunk in chunks_data:
716
+ vanbanid = chunk.get('vanbanid')
717
+ if vanbanid not in chunks_by_document:
718
+ chunks_by_document[vanbanid] = []
719
+ chunks_by_document[vanbanid].append(chunk)
720
+
721
+ # Thống kê chi tiết
722
+ document_stats = []
723
+ grouped_data = []
724
+
725
+ for vanbanid, chunks in chunks_by_document.items():
726
+ # Thống kê
727
+ document_stats.append({
728
+ "vanbanid": vanbanid,
729
+ "chunk_count": len(chunks),
730
+ "document_title": chunks[0].get('document_title', 'Unknown') if chunks else 'Unknown'
731
+ })
732
+
733
+ # Nhóm data theo vanbanid
734
+ grouped_data.append({
735
+ "vanbanid": vanbanid,
736
+ "document_title": chunks[0].get('document_title', 'Unknown') if chunks else 'Unknown',
737
+ "chunk_count": len(chunks),
738
+ "chunks": chunks
739
+ })
740
+
741
+ return {
742
+ "status": "success",
743
+ "message": f"Đã lấy {total_chunks} chunks từ {unique_documents} văn bản",
744
+ "summary": {
745
+ "total_chunks": total_chunks,
746
+ "unique_documents": unique_documents,
747
+ "document_stats": document_stats
748
+ },
749
+ "data": grouped_data
750
+ }
751
+
752
+ except Exception as e:
753
+ logger.error(f"[API] Error in view_all_document_chunks: {e}")
754
+ raise HTTPException(status_code=500, detail=f"Lỗi: {str(e)}")
755
+
756
  @app.get("/api/document-chunks/status")
757
  @timing_decorator_async
758
  async def get_document_chunks_status():
app/supabase_db.py CHANGED
@@ -176,4 +176,26 @@ class SupabaseClient:
176
  return True
177
  except Exception as e:
178
  logger.error(f"Error deleting chunks for vanbanid {vanbanid}: {e}")
179
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  return True
177
  except Exception as e:
178
  logger.error(f"Error deleting chunks for vanbanid {vanbanid}: {e}")
179
+ return False
180
+
181
+ @timing_decorator_sync
182
+ def get_all_document_chunks(self) -> List[Dict[str, Any]]:
183
+ """
184
+ Lấy toàn bộ dữ liệu từ bảng document_chunks.
185
+ Output: List[Dict] - danh sách tất cả chunks
186
+ """
187
+ try:
188
+ logger.info("[SUPABASE] Fetching all document chunks")
189
+
190
+ response = self.client.table('document_chunks').select('*').execute()
191
+
192
+ if response.data:
193
+ logger.info(f"[SUPABASE] Successfully fetched {len(response.data)} document chunks")
194
+ return response.data
195
+ else:
196
+ logger.warning("[SUPABASE] No document chunks found")
197
+ return []
198
+
199
+ except Exception as e:
200
+ logger.error(f"[SUPABASE] Error fetching document chunks: {e}")
201
+ return []