Spaces:
Running
Running
Saurabh Kumar Bajpai commited on
Commit Β·
6f5def0
1
Parent(s): 457eacc
perf: expose async document processing status
Browse files- README.md +2 -1
- backend/app/routes/documents.py +30 -4
- backend/app/schemas.py +11 -0
README.md
CHANGED
|
@@ -437,8 +437,9 @@ docker compose up --build
|
|
| 437 |
| `POST` | `/api/v1/auth/register` | β | Create a new user account |
|
| 438 |
| `POST` | `/api/v1/auth/login` | β | Login and receive JWT token |
|
| 439 |
| `GET` | `/api/v1/auth/me` | β
| Get current user profile |
|
| 440 |
-
| `POST` | `/api/v1/documents/upload` | β
| Upload PDF/DOCX and
|
| 441 |
| `GET` | `/api/v1/documents` | β
| List all documents for current user |
|
|
|
|
| 442 |
| `DELETE` | `/api/v1/documents/{id}` | β
| Delete a document and its vector data |
|
| 443 |
| `POST` | `/api/v1/chat/ask/stream` | β
| Ask a question (SSE streaming response) |
|
| 444 |
| `GET` | `/api/v1/chat/history/{doc_id}` | β
| Get chat history for a document |
|
|
|
|
| 437 |
| `POST` | `/api/v1/auth/register` | β | Create a new user account |
|
| 438 |
| `POST` | `/api/v1/auth/login` | β | Login and receive JWT token |
|
| 439 |
| `GET` | `/api/v1/auth/me` | β
| Get current user profile |
|
| 440 |
+
| `POST` | `/api/v1/documents/upload` | β
| Upload PDF/DOCX and enqueue background indexing (`202 Accepted`) |
|
| 441 |
| `GET` | `/api/v1/documents` | β
| List all documents for current user |
|
| 442 |
+
| `GET` | `/api/v1/documents/{id}/status` | β
| Poll background document processing status |
|
| 443 |
| `DELETE` | `/api/v1/documents/{id}` | β
| Delete a document and its vector data |
|
| 444 |
| `POST` | `/api/v1/chat/ask/stream` | β
| Ask a question (SSE streaming response) |
|
| 445 |
| `GET` | `/api/v1/chat/history/{doc_id}` | β
| Get chat history for a document |
|
backend/app/routes/documents.py
CHANGED
|
@@ -16,7 +16,7 @@ from sqlalchemy.orm import Session
|
|
| 16 |
|
| 17 |
from app.database import get_db
|
| 18 |
from app.models import User, Document
|
| 19 |
-
from app.schemas import DocumentResponse, DocumentListResponse
|
| 20 |
from app.auth import get_current_user
|
| 21 |
from app.config import get_settings
|
| 22 |
from app.rag.chunker import chunk_document, get_page_count
|
|
@@ -191,7 +191,7 @@ def _ingest_document(document_id: str, filepath: str, original_name: str, user_i
|
|
| 191 |
db.close()
|
| 192 |
|
| 193 |
|
| 194 |
-
@router.post("/upload", response_model=DocumentResponse, status_code=status.
|
| 195 |
async def upload_document(
|
| 196 |
background_tasks: BackgroundTasks,
|
| 197 |
file: UploadFile = File(...),
|
|
@@ -199,11 +199,13 @@ async def upload_document(
|
|
| 199 |
db: Session = Depends(get_db),
|
| 200 |
):
|
| 201 |
"""
|
| 202 |
-
Upload a document
|
| 203 |
|
| 204 |
Validates the uploaded file (extension, size, MIME type, integrity),
|
| 205 |
saves it to the user's directory, creates a database record with status
|
| 206 |
-
'pending',
|
|
|
|
|
|
|
| 207 |
|
| 208 |
Args:
|
| 209 |
background_tasks: FastAPI BackgroundTasks instance to run the ingestion process asynchronously.
|
|
@@ -272,6 +274,30 @@ async def upload_document(
|
|
| 272 |
return DocumentResponse.model_validate(document)
|
| 273 |
|
| 274 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
@router.get("/", response_model=DocumentListResponse)
|
| 276 |
def list_documents(
|
| 277 |
page: int = Query(1, ge=1),
|
|
|
|
| 16 |
|
| 17 |
from app.database import get_db
|
| 18 |
from app.models import User, Document
|
| 19 |
+
from app.schemas import DocumentResponse, DocumentListResponse, DocumentStatusResponse
|
| 20 |
from app.auth import get_current_user
|
| 21 |
from app.config import get_settings
|
| 22 |
from app.rag.chunker import chunk_document, get_page_count
|
|
|
|
| 191 |
db.close()
|
| 192 |
|
| 193 |
|
| 194 |
+
@router.post("/upload", response_model=DocumentResponse, status_code=status.HTTP_202_ACCEPTED)
|
| 195 |
async def upload_document(
|
| 196 |
background_tasks: BackgroundTasks,
|
| 197 |
file: UploadFile = File(...),
|
|
|
|
| 199 |
db: Session = Depends(get_db),
|
| 200 |
):
|
| 201 |
"""
|
| 202 |
+
Upload a document and enqueue RAG processing.
|
| 203 |
|
| 204 |
Validates the uploaded file (extension, size, MIME type, integrity),
|
| 205 |
saves it to the user's directory, creates a database record with status
|
| 206 |
+
'pending', schedules a background task for chunking and embedding, and
|
| 207 |
+
returns 202 Accepted immediately so large documents do not block the API
|
| 208 |
+
request while embeddings are generated.
|
| 209 |
|
| 210 |
Args:
|
| 211 |
background_tasks: FastAPI BackgroundTasks instance to run the ingestion process asynchronously.
|
|
|
|
| 274 |
return DocumentResponse.model_validate(document)
|
| 275 |
|
| 276 |
|
| 277 |
+
@router.get("/{document_id}/status", response_model=DocumentStatusResponse)
|
| 278 |
+
def get_document_status(
|
| 279 |
+
document_id: str,
|
| 280 |
+
user: User = Depends(get_current_user),
|
| 281 |
+
db: Session = Depends(get_db),
|
| 282 |
+
):
|
| 283 |
+
"""
|
| 284 |
+
Poll processing status for a single uploaded document.
|
| 285 |
+
|
| 286 |
+
This endpoint lets clients refresh the upload lifecycle without fetching
|
| 287 |
+
the entire document list. The returned status is one of the existing
|
| 288 |
+
document states: pending, processing, ready, or failed.
|
| 289 |
+
"""
|
| 290 |
+
doc = db.query(Document).filter(
|
| 291 |
+
Document.id == document_id,
|
| 292 |
+
Document.user_id == user.id,
|
| 293 |
+
).first()
|
| 294 |
+
|
| 295 |
+
if not doc:
|
| 296 |
+
raise HTTPException(status_code=404, detail="Document not found")
|
| 297 |
+
|
| 298 |
+
return DocumentStatusResponse.model_validate(doc)
|
| 299 |
+
|
| 300 |
+
|
| 301 |
@router.get("/", response_model=DocumentListResponse)
|
| 302 |
def list_documents(
|
| 303 |
page: int = Query(1, ge=1),
|
backend/app/schemas.py
CHANGED
|
@@ -75,6 +75,17 @@ class DocumentResponse(BaseModel):
|
|
| 75 |
from_attributes = True
|
| 76 |
|
| 77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
class DocumentListResponse(BaseModel):
|
| 79 |
items: List[DocumentResponse]
|
| 80 |
total: int
|
|
|
|
| 75 |
from_attributes = True
|
| 76 |
|
| 77 |
|
| 78 |
+
class DocumentStatusResponse(BaseModel):
|
| 79 |
+
id: str
|
| 80 |
+
status: str
|
| 81 |
+
page_count: int
|
| 82 |
+
chunk_count: int
|
| 83 |
+
error_message: Optional[str] = None
|
| 84 |
+
|
| 85 |
+
class Config:
|
| 86 |
+
from_attributes = True
|
| 87 |
+
|
| 88 |
+
|
| 89 |
class DocumentListResponse(BaseModel):
|
| 90 |
items: List[DocumentResponse]
|
| 91 |
total: int
|