Aoun-Ai / app /api /ocr.py
MuhammadMahmoud's picture
enhance rag
468ea61
from fastapi import APIRouter, UploadFile, File, Form, HTTPException
from app.schemas import OCRResponse, OCRAnalysisResponse, DocumentType
from app.services.ocr.ocr_router import ocr_router
from app.services.document.document_analyzer import document_analyzer
from PIL import Image
import asyncio
import io
import logging
import time
from app.core.audit_logger import audit_logger
router = APIRouter()
logger = logging.getLogger(__name__)
MAX_FILE_SIZE_MB = 10
MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024
@router.post("/ocr", response_model=OCRResponse)
async def process_ocr(
file: UploadFile = File(..., description="Document image (PNG, JPG, WEBP)"),
document_type: DocumentType = Form(..., description="Type of the document being scanned"),
):
"""
Processes official Arabic document images (e.g., IDs, proofs) to extract raw text logic.
Accepts standard image formats (PNG, JPG) routing them through a priority-based Vision AI chain.
Automatically handles provider rotation and fallback degradation mechanisms ensuring high availability.
Returns the processed string output alongside the identifier indicating the successful extraction engine.
"""
contents = await _read_and_validate(file)
try:
image = await asyncio.to_thread(Image.open, io.BytesIO(contents))
result_data = await ocr_router.run_ocr(image, document_type=document_type)
provider_name = result_data.pop("_provider", "unknown")
return OCRResponse(data=result_data, provider=provider_name)
except RuntimeError as e:
logger.error("All OCR providers failed: %s", e)
raise HTTPException(status_code=503, detail="All OCR providers are currently unavailable. Please try again later.")
except Exception as e:
logger.error("Error processing OCR: %s", e, exc_info=True)
raise HTTPException(status_code=500, detail="Internal server error during OCR processing.")
@router.post("/ocr/analyze", response_model=OCRAnalysisResponse)
async def process_ocr_with_analysis(
file: UploadFile = File(..., description="Document image (PNG, JPG, WEBP)"),
document_type: DocumentType = Form(..., description="Type of the document being scanned"),
):
"""
Executes a multi-stage structured extraction paired with semantic risk assessment on Arabic uploads.
Initiates base OCR extractions which cleanly filter into a specialized Llama 3.3 70B analytical pipeline.
Synthesizes raw text blocks into concise bullet findings, hierarchical summaries, and relative risk severities.
Outputs comprehensive JSON formatting marrying granular text data with executive case worker recommendations.
"""
contents = await _read_and_validate(file)
try:
start_time = time.perf_counter()
image = await asyncio.to_thread(Image.open, io.BytesIO(contents))
# Step 1: OCR
result_data = await ocr_router.run_ocr(image, document_type=document_type)
provider_name = result_data.pop("_provider", "unknown")
# Step 2: LLM Analysis (runs concurrently with nothing else β€” just sequential for clarity)
analysis_dict = await document_analyzer.analyze(
ocr_fields=result_data,
document_type=document_type.value,
)
duration_ms = (time.perf_counter() - start_time) * 1000
audit_logger.log_event("ocr_analysis", "/ocr/analyze", {"filename": file.filename}, analysis_dict, provider_name, duration_ms)
return OCRAnalysisResponse(
data=result_data,
provider=provider_name,
analysis=analysis_dict,
)
except RuntimeError as e:
logger.error("All OCR providers failed: %s", e)
raise HTTPException(status_code=503, detail="All OCR providers are currently unavailable. Please try again later.")
except Exception as e:
logger.error("Error processing OCR+Analysis: %s", e, exc_info=True)
raise HTTPException(status_code=500, detail="Internal server error during OCR analysis.")
# ─── Shared Helper ────────────────────────────────────────────────────────────
async def _read_and_validate(file: UploadFile) -> bytes:
"""Validate file type and size, then return raw bytes."""
if not file.content_type or not file.content_type.startswith("image/"):
raise HTTPException(status_code=400, detail="File provided is not an image.")
contents = await file.read(MAX_FILE_SIZE_BYTES + 1)
if len(contents) > MAX_FILE_SIZE_BYTES:
raise HTTPException(
status_code=400,
detail=f"File size exceeds the {MAX_FILE_SIZE_MB} MB limit.",
)
return contents