Spaces:
Sleeping
Sleeping
File size: 5,975 Bytes
8b37702 c880083 f02c5b9 c880083 f02c5b9 c880083 f02c5b9 c880083 8b37702 c880083 8b37702 f02c5b9 8b37702 f02c5b9 8b37702 f02c5b9 8b37702 f02c5b9 8b37702 f02c5b9 8b37702 f02c5b9 8b37702 f02c5b9 8b37702 f02c5b9 8b37702 f02c5b9 8b37702 f02c5b9 8b37702 f02c5b9 5ebe979 f02c5b9 8b37702 f02c5b9 5ebe979 8b37702 f02c5b9 c880083 8b37702 c880083 8b37702 c880083 8b37702 c880083 8b37702 c880083 8b37702 c880083 f02c5b9 8b37702 f02c5b9 8b37702 f02c5b9 8b37702 f02c5b9 8b37702 f02c5b9 8b37702 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 | """Documents API endpoints β async upload with background processing."""
from fastapi import APIRouter, Depends, UploadFile, File, Form, HTTPException, status, BackgroundTasks
from pydantic import BaseModel
from sqlalchemy.orm import Session
from app.database import get_db
from app.auth import get_current_user
from app.models import User, Document
from app.application.document_service import DocumentService
from app.dependencies import get_document_service
from app.config import get_settings
from typing import Optional
import logging
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/documents", tags=["documents"])
settings = get_settings()
class MoveDocumentRequest(BaseModel):
folder_id: Optional[str] = None
def _doc_response(doc: Document) -> dict:
return {
"id": doc.id,
"name": doc.name,
"size": doc.size,
"chunks": doc.chunks,
"status": doc.status,
"error_message": doc.error_message,
"folder_id": doc.folder_id,
"created_at": doc.created_at.isoformat(),
}
@router.post("/upload", status_code=status.HTTP_202_ACCEPTED)
async def upload_document(
background_tasks: BackgroundTasks,
file: UploadFile = File(...),
folder_id: Optional[str] = Form(None),
document_service: DocumentService = Depends(get_document_service),
current_user: User = Depends(get_current_user),
):
"""
Accept a file upload and return immediately (202 Accepted).
Processing (text extraction, embedding, vector storage) runs in the background.
Poll GET /documents/{id}/status to track progress.
"""
try:
content = await file.read()
file_size = len(content)
if file_size > settings.MAX_FILE_SIZE:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"File too large. Max: {settings.MAX_FILE_SIZE // 1024 // 1024}MB",
)
# Persist document record immediately (status = "processing")
document = await document_service.accept_upload(
file_data=content,
filename=file.filename,
file_size=file_size,
user=current_user,
folder_id=folder_id,
)
# Schedule heavy work as a background task β does NOT block the response
background_tasks.add_task(
document_service.process_document_background,
document.id,
)
return _doc_response(document)
except ValueError as e:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e))
except HTTPException:
raise
except Exception as e:
logger.error(f"Upload error: {e}", exc_info=True)
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e))
@router.get("/{document_id}/status")
async def get_document_status(
document_id: str,
document_service: DocumentService = Depends(get_document_service),
current_user: User = Depends(get_current_user),
):
"""
Poll processing status for a document.
Returns: { status: 'processing' | 'done' | 'error', chunks, error_message }
"""
try:
doc = await document_service.get_document_status(document_id, current_user)
return _doc_response(doc)
except ValueError as e:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
@router.get("/")
async def list_documents(
folder_id: Optional[str] = None,
root_only: bool = False,
document_service: DocumentService = Depends(get_document_service),
current_user: User = Depends(get_current_user),
):
"""
List documents.
- GET /documents/ β all documents
- GET /documents/?folder_id=X β documents in folder X
- GET /documents/?root_only=true β only documents with no folder (root level)
"""
documents = await document_service.list_documents(current_user, folder_id, root_only)
return [_doc_response(doc) for doc in documents]
@router.patch("/{document_id}/move")
async def move_document(
document_id: str,
request: MoveDocumentRequest,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
"""Move a document to a folder or root."""
document = db.query(Document).filter(
Document.id == document_id,
Document.org_id == current_user.org_id,
).first()
if not document:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Document not found")
if request.folder_id:
from app.models import Folder
folder = db.query(Folder).filter(
Folder.id == request.folder_id,
Folder.org_id == current_user.org_id,
).first()
if not folder:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Folder not found")
document.folder_id = request.folder_id
db.commit()
return _doc_response(document)
@router.delete("/{document_id}")
async def delete_document(
document_id: str,
document_service: DocumentService = Depends(get_document_service),
current_user: User = Depends(get_current_user),
):
"""Delete a document."""
try:
await document_service.delete_document(document_id, current_user)
return {"message": "Document deleted successfully"}
except ValueError as e:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
@router.get("/{document_id}/download")
async def get_download_url(
document_id: str,
document_service: DocumentService = Depends(get_document_service),
current_user: User = Depends(get_current_user),
):
"""Get download URL."""
try:
url = await document_service.get_download_url(document_id, current_user)
return {"download_url": url}
except ValueError as e:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
|