""" OCR API Route — Accepts document images and returns extracted structured data. Uses the multi-provider OCR router (Groq → Gemini → OpenAI → HuggingFace). """ from fastapi import APIRouter, UploadFile, File, Form, HTTPException from app.schemas import OCRResponse, DocumentType from app.services.ocr.ocr_router import ocr_router from PIL import Image import asyncio import io import logging router = APIRouter() logger = logging.getLogger(__name__) MAX_FILE_SIZE_MB = 10 MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024 @router.post("/ocr", response_model=OCRResponse) async def process_ocr( file: UploadFile = File(..., description="Document image (PNG, JPG, WEBP)"), document_type: DocumentType = Form(..., description="Type of the document being scanned"), ): """ Process an Arabic document image and extract structured data as JSON. - **file**: The document image to scan (max 10 MB). - **document_type**: One of the supported document types (id_card, income_proof, etc.). The router automatically tries providers in order (Groq → Gemini → OpenAI → HuggingFace) and falls back if any provider hits a rate limit or error. """ # ── Validate file type ── if not file.content_type or not file.content_type.startswith("image/"): raise HTTPException(status_code=400, detail="File provided is not an image.") # ── Validate file size (safe: only read limit + 1 byte) ── contents = await file.read(MAX_FILE_SIZE_BYTES + 1) if len(contents) > MAX_FILE_SIZE_BYTES: raise HTTPException( status_code=400, detail=f"File size exceeds the {MAX_FILE_SIZE_MB} MB limit.", ) try: image = await asyncio.to_thread(Image.open, io.BytesIO(contents)) result_data = await ocr_router.run_ocr(image, document_type=document_type) # Extract '_provider' from result (injected by the router) provider_name = result_data.pop("_provider", "unknown") return OCRResponse(data=result_data, provider=provider_name) except RuntimeError as e: # All providers exhausted logger.error("All OCR providers failed: %s", e) raise HTTPException(status_code=503, detail="All OCR providers are currently unavailable. Please try again later.") except Exception as e: logger.error("Error processing OCR: %s", e, exc_info=True) raise HTTPException(status_code=500, detail="Internal server error during OCR processing.")