Spaces:

youngtsai
/

career_app

Build error

Youngger9765

fix: Python 3.9 compatibility for HuggingFace Spaces

b830e05 5 months ago

5.11 kB

	"""API endpoints for database statistics"""

	from typing import List

	from fastapi import APIRouter, Depends
	from pydantic import BaseModel
	from sqlalchemy import text
	from sqlalchemy.ext.asyncio import AsyncSession

	from backend.database import get_db

	router = APIRouter(prefix="/api/stats", tags=["stats"])


	class DocumentStats(BaseModel):
	id: int
	title: str
	pages: int
	bytes: int
	chunks_count: int
	created_at: str


	class DatabaseStats(BaseModel):
	total_datasources: int
	total_documents: int
	total_chunks: int
	total_embeddings: int
	total_bytes: int
	documents: List[DocumentStats]


	@router.get("/", response_model=DatabaseStats)
	async def get_database_stats(db: AsyncSession = Depends(get_db)):
	"""
	Get database statistics including counts and document details

	Returns:
	DatabaseStats with all statistics
	"""

	# Get total counts
	datasources_count = await db.scalar(text("SELECT COUNT(*) FROM datasources"))
	documents_count = await db.scalar(text("SELECT COUNT(*) FROM documents"))
	chunks_count = await db.scalar(text("SELECT COUNT(*) FROM chunks"))
	embeddings_count = await db.scalar(text("SELECT COUNT(*) FROM embeddings"))
	total_bytes = await db.scalar(text("SELECT COALESCE(SUM(bytes), 0) FROM documents"))

	# Get document details with chunk counts
	query = text("""
	SELECT
	d.id,
	d.title,
	d.pages,
	d.bytes,
	d.created_at,
	COUNT(c.id) as chunks_count
	FROM documents d
	LEFT JOIN chunks c ON d.id = c.doc_id
	GROUP BY d.id, d.title, d.pages, d.bytes, d.created_at
	ORDER BY d.created_at DESC
	""")

	result = await db.execute(query)
	rows = result.fetchall()

	documents = [
	DocumentStats(
	id=row.id,
	title=row.title,
	pages=row.pages or 0,
	bytes=row.bytes or 0,
	chunks_count=row.chunks_count or 0,
	created_at=str(row.created_at),
	)
	for row in rows
	]

	return DatabaseStats(
	total_datasources=datasources_count or 0,
	total_documents=documents_count or 0,
	total_chunks=chunks_count or 0,
	total_embeddings=embeddings_count or 0,
	total_bytes=total_bytes or 0,
	documents=documents,
	)


	class ChunkDetail(BaseModel):
	id: int
	ordinal: int
	text: str
	text_length: int
	document_title: str


	@router.get("/chunks/{doc_id}", response_model=List[ChunkDetail])
	async def get_document_chunks(doc_id: int, db: AsyncSession = Depends(get_db)):
	"""
	Get all chunks for a specific document

	Args:
	doc_id: Document ID

	Returns:
	List of chunks with details
	"""

	query = text("""
	SELECT
	c.id,
	c.ordinal,
	c.text,
	LENGTH(c.text) as text_length,
	d.title as document_title
	FROM chunks c
	JOIN documents d ON c.doc_id = d.id
	WHERE c.doc_id = :doc_id
	ORDER BY c.ordinal
	""")

	result = await db.execute(query, {"doc_id": doc_id})
	rows = result.fetchall()

	return [
	ChunkDetail(
	id=row.id,
	ordinal=row.ordinal,
	text=row.text,
	text_length=row.text_length,
	document_title=row.document_title,
	)
	for row in rows
	]


	@router.delete("/documents/{doc_id}")
	async def delete_document(doc_id: int, db: AsyncSession = Depends(get_db)):
	"""
	Delete a document and all its related chunks and embeddings

	This will cascade delete:
	- All chunks associated with the document
	- All embeddings associated with those chunks
	- The document itself

	Args:
	doc_id: Document ID to delete

	Returns:
	Success message with deletion counts
	"""

	# Get chunk count before deletion
	chunks_result = await db.execute(
	text("SELECT COUNT(*) FROM chunks WHERE doc_id = :doc_id"), {"doc_id": doc_id}
	)
	chunks_count = chunks_result.scalar() or 0

	# Get embeddings count before deletion
	embeddings_result = await db.execute(
	text("""
	SELECT COUNT(*) FROM embeddings
	WHERE chunk_id IN (SELECT id FROM chunks WHERE doc_id = :doc_id)
	"""),
	{"doc_id": doc_id},
	)
	embeddings_count = embeddings_result.scalar() or 0

	# Delete embeddings first (FK constraint)
	await db.execute(
	text("""
	DELETE FROM embeddings
	WHERE chunk_id IN (SELECT id FROM chunks WHERE doc_id = :doc_id)
	"""),
	{"doc_id": doc_id},
	)

	# Delete chunks
	await db.execute(text("DELETE FROM chunks WHERE doc_id = :doc_id"), {"doc_id": doc_id})

	# Delete document
	await db.execute(text("DELETE FROM documents WHERE id = :doc_id"), {"doc_id": doc_id})

	await db.commit()

	return {
	"success": True,
	"message": f"成功刪除文檔 ID {doc_id}",
	"deleted": {"document": 1, "chunks": chunks_count, "embeddings": embeddings_count},
	}