"""Documents API endpoints — async upload with background processing.""" from fastapi import APIRouter, Depends, UploadFile, File, Form, HTTPException, status, BackgroundTasks from pydantic import BaseModel from sqlalchemy.orm import Session from app.database import get_db from app.auth import get_current_user from app.models import User, Document from app.application.document_service import DocumentService from app.dependencies import get_document_service from app.config import get_settings from typing import Optional import logging logger = logging.getLogger(__name__) router = APIRouter(prefix="/documents", tags=["documents"]) settings = get_settings() class MoveDocumentRequest(BaseModel): folder_id: Optional[str] = None def _doc_response(doc: Document) -> dict: return { "id": doc.id, "name": doc.name, "size": doc.size, "chunks": doc.chunks, "status": doc.status, "error_message": doc.error_message, "folder_id": doc.folder_id, "created_at": doc.created_at.isoformat(), } @router.post("/upload", status_code=status.HTTP_202_ACCEPTED) async def upload_document( background_tasks: BackgroundTasks, file: UploadFile = File(...), folder_id: Optional[str] = Form(None), document_service: DocumentService = Depends(get_document_service), current_user: User = Depends(get_current_user), ): """ Accept a file upload and return immediately (202 Accepted). Processing (text extraction, embedding, vector storage) runs in the background. Poll GET /documents/{id}/status to track progress. """ try: content = await file.read() file_size = len(content) if file_size > settings.MAX_FILE_SIZE: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=f"File too large. Max: {settings.MAX_FILE_SIZE // 1024 // 1024}MB", ) # Persist document record immediately (status = "processing") document = await document_service.accept_upload( file_data=content, filename=file.filename, file_size=file_size, user=current_user, folder_id=folder_id, ) # Schedule heavy work as a background task — does NOT block the response background_tasks.add_task( document_service.process_document_background, document.id, ) return _doc_response(document) except ValueError as e: raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) except HTTPException: raise except Exception as e: logger.error(f"Upload error: {e}", exc_info=True) raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) @router.get("/{document_id}/status") async def get_document_status( document_id: str, document_service: DocumentService = Depends(get_document_service), current_user: User = Depends(get_current_user), ): """ Poll processing status for a document. Returns: { status: 'processing' | 'done' | 'error', chunks, error_message } """ try: doc = await document_service.get_document_status(document_id, current_user) return _doc_response(doc) except ValueError as e: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e)) @router.get("/") async def list_documents( folder_id: Optional[str] = None, root_only: bool = False, document_service: DocumentService = Depends(get_document_service), current_user: User = Depends(get_current_user), ): """ List documents. - GET /documents/ → all documents - GET /documents/?folder_id=X → documents in folder X - GET /documents/?root_only=true → only documents with no folder (root level) """ documents = await document_service.list_documents(current_user, folder_id, root_only) return [_doc_response(doc) for doc in documents] @router.patch("/{document_id}/move") async def move_document( document_id: str, request: MoveDocumentRequest, db: Session = Depends(get_db), current_user: User = Depends(get_current_user), ): """Move a document to a folder or root.""" document = db.query(Document).filter( Document.id == document_id, Document.org_id == current_user.org_id, ).first() if not document: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Document not found") if request.folder_id: from app.models import Folder folder = db.query(Folder).filter( Folder.id == request.folder_id, Folder.org_id == current_user.org_id, ).first() if not folder: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Folder not found") document.folder_id = request.folder_id db.commit() return _doc_response(document) @router.delete("/{document_id}") async def delete_document( document_id: str, document_service: DocumentService = Depends(get_document_service), current_user: User = Depends(get_current_user), ): """Delete a document.""" try: await document_service.delete_document(document_id, current_user) return {"message": "Document deleted successfully"} except ValueError as e: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e)) @router.get("/{document_id}/download") async def get_download_url( document_id: str, document_service: DocumentService = Depends(get_document_service), current_user: User = Depends(get_current_user), ): """Get download URL.""" try: url = await document_service.get_download_url(document_id, current_user) return {"download_url": url} except ValueError as e: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))