add data viewer
Browse files- app/main.py +59 -0
- app/supabase_db.py +23 -1
app/main.py
CHANGED
|
@@ -694,6 +694,65 @@ async def update_all_documents():
|
|
| 694 |
logger.error(f"[API] Error in update_all_documents: {e}")
|
| 695 |
raise HTTPException(status_code=500, detail=f"Lỗi: {str(e)}")
|
| 696 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 697 |
@app.get("/api/document-chunks/status")
|
| 698 |
@timing_decorator_async
|
| 699 |
async def get_document_chunks_status():
|
|
|
|
| 694 |
logger.error(f"[API] Error in update_all_documents: {e}")
|
| 695 |
raise HTTPException(status_code=500, detail=f"Lỗi: {str(e)}")
|
| 696 |
|
| 697 |
+
@app.get("/api/document-chunks/view")
|
| 698 |
+
@timing_decorator_async
|
| 699 |
+
async def view_all_document_chunks():
|
| 700 |
+
"""
|
| 701 |
+
API xem toàn bộ dữ liệu trong bảng document_chunks.
|
| 702 |
+
"""
|
| 703 |
+
try:
|
| 704 |
+
logger.info("[API] Starting view all document chunks")
|
| 705 |
+
|
| 706 |
+
# Lấy dữ liệu từ Supabase
|
| 707 |
+
chunks_data = supabase_client.get_all_document_chunks()
|
| 708 |
+
|
| 709 |
+
# Thống kê cơ bản
|
| 710 |
+
total_chunks = len(chunks_data)
|
| 711 |
+
unique_documents = len(set(chunk.get('vanbanid') for chunk in chunks_data if chunk.get('vanbanid')))
|
| 712 |
+
|
| 713 |
+
# Nhóm theo vanbanid để thống kê và tổ chức data
|
| 714 |
+
chunks_by_document = {}
|
| 715 |
+
for chunk in chunks_data:
|
| 716 |
+
vanbanid = chunk.get('vanbanid')
|
| 717 |
+
if vanbanid not in chunks_by_document:
|
| 718 |
+
chunks_by_document[vanbanid] = []
|
| 719 |
+
chunks_by_document[vanbanid].append(chunk)
|
| 720 |
+
|
| 721 |
+
# Thống kê chi tiết
|
| 722 |
+
document_stats = []
|
| 723 |
+
grouped_data = []
|
| 724 |
+
|
| 725 |
+
for vanbanid, chunks in chunks_by_document.items():
|
| 726 |
+
# Thống kê
|
| 727 |
+
document_stats.append({
|
| 728 |
+
"vanbanid": vanbanid,
|
| 729 |
+
"chunk_count": len(chunks),
|
| 730 |
+
"document_title": chunks[0].get('document_title', 'Unknown') if chunks else 'Unknown'
|
| 731 |
+
})
|
| 732 |
+
|
| 733 |
+
# Nhóm data theo vanbanid
|
| 734 |
+
grouped_data.append({
|
| 735 |
+
"vanbanid": vanbanid,
|
| 736 |
+
"document_title": chunks[0].get('document_title', 'Unknown') if chunks else 'Unknown',
|
| 737 |
+
"chunk_count": len(chunks),
|
| 738 |
+
"chunks": chunks
|
| 739 |
+
})
|
| 740 |
+
|
| 741 |
+
return {
|
| 742 |
+
"status": "success",
|
| 743 |
+
"message": f"Đã lấy {total_chunks} chunks từ {unique_documents} văn bản",
|
| 744 |
+
"summary": {
|
| 745 |
+
"total_chunks": total_chunks,
|
| 746 |
+
"unique_documents": unique_documents,
|
| 747 |
+
"document_stats": document_stats
|
| 748 |
+
},
|
| 749 |
+
"data": grouped_data
|
| 750 |
+
}
|
| 751 |
+
|
| 752 |
+
except Exception as e:
|
| 753 |
+
logger.error(f"[API] Error in view_all_document_chunks: {e}")
|
| 754 |
+
raise HTTPException(status_code=500, detail=f"Lỗi: {str(e)}")
|
| 755 |
+
|
| 756 |
@app.get("/api/document-chunks/status")
|
| 757 |
@timing_decorator_async
|
| 758 |
async def get_document_chunks_status():
|
app/supabase_db.py
CHANGED
|
@@ -176,4 +176,26 @@ class SupabaseClient:
|
|
| 176 |
return True
|
| 177 |
except Exception as e:
|
| 178 |
logger.error(f"Error deleting chunks for vanbanid {vanbanid}: {e}")
|
| 179 |
-
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
return True
|
| 177 |
except Exception as e:
|
| 178 |
logger.error(f"Error deleting chunks for vanbanid {vanbanid}: {e}")
|
| 179 |
+
return False
|
| 180 |
+
|
| 181 |
+
@timing_decorator_sync
|
| 182 |
+
def get_all_document_chunks(self) -> List[Dict[str, Any]]:
|
| 183 |
+
"""
|
| 184 |
+
Lấy toàn bộ dữ liệu từ bảng document_chunks.
|
| 185 |
+
Output: List[Dict] - danh sách tất cả chunks
|
| 186 |
+
"""
|
| 187 |
+
try:
|
| 188 |
+
logger.info("[SUPABASE] Fetching all document chunks")
|
| 189 |
+
|
| 190 |
+
response = self.client.table('document_chunks').select('*').execute()
|
| 191 |
+
|
| 192 |
+
if response.data:
|
| 193 |
+
logger.info(f"[SUPABASE] Successfully fetched {len(response.data)} document chunks")
|
| 194 |
+
return response.data
|
| 195 |
+
else:
|
| 196 |
+
logger.warning("[SUPABASE] No document chunks found")
|
| 197 |
+
return []
|
| 198 |
+
|
| 199 |
+
except Exception as e:
|
| 200 |
+
logger.error(f"[SUPABASE] Error fetching document chunks: {e}")
|
| 201 |
+
return []
|