Spaces:
Build error
Build error
| """API endpoints for database statistics""" | |
| from typing import List | |
| from fastapi import APIRouter, Depends | |
| from pydantic import BaseModel | |
| from sqlalchemy import text | |
| from sqlalchemy.ext.asyncio import AsyncSession | |
| from backend.database import get_db | |
| router = APIRouter(prefix="/api/stats", tags=["stats"]) | |
| class DocumentStats(BaseModel): | |
| id: int | |
| title: str | |
| pages: int | |
| bytes: int | |
| chunks_count: int | |
| created_at: str | |
| class DatabaseStats(BaseModel): | |
| total_datasources: int | |
| total_documents: int | |
| total_chunks: int | |
| total_embeddings: int | |
| total_bytes: int | |
| documents: List[DocumentStats] | |
| async def get_database_stats(db: AsyncSession = Depends(get_db)): | |
| """ | |
| Get database statistics including counts and document details | |
| Returns: | |
| DatabaseStats with all statistics | |
| """ | |
| # Get total counts | |
| datasources_count = await db.scalar(text("SELECT COUNT(*) FROM datasources")) | |
| documents_count = await db.scalar(text("SELECT COUNT(*) FROM documents")) | |
| chunks_count = await db.scalar(text("SELECT COUNT(*) FROM chunks")) | |
| embeddings_count = await db.scalar(text("SELECT COUNT(*) FROM embeddings")) | |
| total_bytes = await db.scalar(text("SELECT COALESCE(SUM(bytes), 0) FROM documents")) | |
| # Get document details with chunk counts | |
| query = text(""" | |
| SELECT | |
| d.id, | |
| d.title, | |
| d.pages, | |
| d.bytes, | |
| d.created_at, | |
| COUNT(c.id) as chunks_count | |
| FROM documents d | |
| LEFT JOIN chunks c ON d.id = c.doc_id | |
| GROUP BY d.id, d.title, d.pages, d.bytes, d.created_at | |
| ORDER BY d.created_at DESC | |
| """) | |
| result = await db.execute(query) | |
| rows = result.fetchall() | |
| documents = [ | |
| DocumentStats( | |
| id=row.id, | |
| title=row.title, | |
| pages=row.pages or 0, | |
| bytes=row.bytes or 0, | |
| chunks_count=row.chunks_count or 0, | |
| created_at=str(row.created_at), | |
| ) | |
| for row in rows | |
| ] | |
| return DatabaseStats( | |
| total_datasources=datasources_count or 0, | |
| total_documents=documents_count or 0, | |
| total_chunks=chunks_count or 0, | |
| total_embeddings=embeddings_count or 0, | |
| total_bytes=total_bytes or 0, | |
| documents=documents, | |
| ) | |
| class ChunkDetail(BaseModel): | |
| id: int | |
| ordinal: int | |
| text: str | |
| text_length: int | |
| document_title: str | |
| async def get_document_chunks(doc_id: int, db: AsyncSession = Depends(get_db)): | |
| """ | |
| Get all chunks for a specific document | |
| Args: | |
| doc_id: Document ID | |
| Returns: | |
| List of chunks with details | |
| """ | |
| query = text(""" | |
| SELECT | |
| c.id, | |
| c.ordinal, | |
| c.text, | |
| LENGTH(c.text) as text_length, | |
| d.title as document_title | |
| FROM chunks c | |
| JOIN documents d ON c.doc_id = d.id | |
| WHERE c.doc_id = :doc_id | |
| ORDER BY c.ordinal | |
| """) | |
| result = await db.execute(query, {"doc_id": doc_id}) | |
| rows = result.fetchall() | |
| return [ | |
| ChunkDetail( | |
| id=row.id, | |
| ordinal=row.ordinal, | |
| text=row.text, | |
| text_length=row.text_length, | |
| document_title=row.document_title, | |
| ) | |
| for row in rows | |
| ] | |
| async def delete_document(doc_id: int, db: AsyncSession = Depends(get_db)): | |
| """ | |
| Delete a document and all its related chunks and embeddings | |
| This will cascade delete: | |
| - All chunks associated with the document | |
| - All embeddings associated with those chunks | |
| - The document itself | |
| Args: | |
| doc_id: Document ID to delete | |
| Returns: | |
| Success message with deletion counts | |
| """ | |
| # Get chunk count before deletion | |
| chunks_result = await db.execute( | |
| text("SELECT COUNT(*) FROM chunks WHERE doc_id = :doc_id"), {"doc_id": doc_id} | |
| ) | |
| chunks_count = chunks_result.scalar() or 0 | |
| # Get embeddings count before deletion | |
| embeddings_result = await db.execute( | |
| text(""" | |
| SELECT COUNT(*) FROM embeddings | |
| WHERE chunk_id IN (SELECT id FROM chunks WHERE doc_id = :doc_id) | |
| """), | |
| {"doc_id": doc_id}, | |
| ) | |
| embeddings_count = embeddings_result.scalar() or 0 | |
| # Delete embeddings first (FK constraint) | |
| await db.execute( | |
| text(""" | |
| DELETE FROM embeddings | |
| WHERE chunk_id IN (SELECT id FROM chunks WHERE doc_id = :doc_id) | |
| """), | |
| {"doc_id": doc_id}, | |
| ) | |
| # Delete chunks | |
| await db.execute(text("DELETE FROM chunks WHERE doc_id = :doc_id"), {"doc_id": doc_id}) | |
| # Delete document | |
| await db.execute(text("DELETE FROM documents WHERE id = :doc_id"), {"doc_id": doc_id}) | |
| await db.commit() | |
| return { | |
| "success": True, | |
| "message": f"成功刪除文檔 ID {doc_id}", | |
| "deleted": {"document": 1, "chunks": chunks_count, "embeddings": embeddings_count}, | |
| } | |