Ragora-Server / app /api /documents.py
Peterase's picture
feat: nested folder hierarchy with parent_id, root_only filter, recursive delete
5ebe979
"""Documents API endpoints β€” async upload with background processing."""
from fastapi import APIRouter, Depends, UploadFile, File, Form, HTTPException, status, BackgroundTasks
from pydantic import BaseModel
from sqlalchemy.orm import Session
from app.database import get_db
from app.auth import get_current_user
from app.models import User, Document
from app.application.document_service import DocumentService
from app.dependencies import get_document_service
from app.config import get_settings
from typing import Optional
import logging
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/documents", tags=["documents"])
settings = get_settings()
class MoveDocumentRequest(BaseModel):
folder_id: Optional[str] = None
def _doc_response(doc: Document) -> dict:
return {
"id": doc.id,
"name": doc.name,
"size": doc.size,
"chunks": doc.chunks,
"status": doc.status,
"error_message": doc.error_message,
"folder_id": doc.folder_id,
"created_at": doc.created_at.isoformat(),
}
@router.post("/upload", status_code=status.HTTP_202_ACCEPTED)
async def upload_document(
background_tasks: BackgroundTasks,
file: UploadFile = File(...),
folder_id: Optional[str] = Form(None),
document_service: DocumentService = Depends(get_document_service),
current_user: User = Depends(get_current_user),
):
"""
Accept a file upload and return immediately (202 Accepted).
Processing (text extraction, embedding, vector storage) runs in the background.
Poll GET /documents/{id}/status to track progress.
"""
try:
content = await file.read()
file_size = len(content)
if file_size > settings.MAX_FILE_SIZE:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"File too large. Max: {settings.MAX_FILE_SIZE // 1024 // 1024}MB",
)
# Persist document record immediately (status = "processing")
document = await document_service.accept_upload(
file_data=content,
filename=file.filename,
file_size=file_size,
user=current_user,
folder_id=folder_id,
)
# Schedule heavy work as a background task β€” does NOT block the response
background_tasks.add_task(
document_service.process_document_background,
document.id,
)
return _doc_response(document)
except ValueError as e:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
except HTTPException:
raise
except Exception as e:
logger.error(f"Upload error: {e}", exc_info=True)
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
@router.get("/{document_id}/status")
async def get_document_status(
document_id: str,
document_service: DocumentService = Depends(get_document_service),
current_user: User = Depends(get_current_user),
):
"""
Poll processing status for a document.
Returns: { status: 'processing' | 'done' | 'error', chunks, error_message }
"""
try:
doc = await document_service.get_document_status(document_id, current_user)
return _doc_response(doc)
except ValueError as e:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
@router.get("/")
async def list_documents(
folder_id: Optional[str] = None,
root_only: bool = False,
document_service: DocumentService = Depends(get_document_service),
current_user: User = Depends(get_current_user),
):
"""
List documents.
- GET /documents/ β†’ all documents
- GET /documents/?folder_id=X β†’ documents in folder X
- GET /documents/?root_only=true β†’ only documents with no folder (root level)
"""
documents = await document_service.list_documents(current_user, folder_id, root_only)
return [_doc_response(doc) for doc in documents]
@router.patch("/{document_id}/move")
async def move_document(
document_id: str,
request: MoveDocumentRequest,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
"""Move a document to a folder or root."""
document = db.query(Document).filter(
Document.id == document_id,
Document.org_id == current_user.org_id,
).first()
if not document:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Document not found")
if request.folder_id:
from app.models import Folder
folder = db.query(Folder).filter(
Folder.id == request.folder_id,
Folder.org_id == current_user.org_id,
).first()
if not folder:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Folder not found")
document.folder_id = request.folder_id
db.commit()
return _doc_response(document)
@router.delete("/{document_id}")
async def delete_document(
document_id: str,
document_service: DocumentService = Depends(get_document_service),
current_user: User = Depends(get_current_user),
):
"""Delete a document."""
try:
await document_service.delete_document(document_id, current_user)
return {"message": "Document deleted successfully"}
except ValueError as e:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
@router.get("/{document_id}/download")
async def get_download_url(
document_id: str,
document_service: DocumentService = Depends(get_document_service),
current_user: User = Depends(get_current_user),
):
"""Get download URL."""
try:
url = await document_service.get_download_url(document_id, current_user)
return {"download_url": url}
except ValueError as e:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))