chefcode / backend /routes /ocr.py
Mariem-Daha's picture
Upload 31 files
9aaec2c verified
from fastapi import APIRouter, File, UploadFile, HTTPException, Depends
from typing import Dict, Any
import os
import tempfile
import asyncio
from pathlib import Path
from auth import verify_api_key
router = APIRouter()
# Cached OCR processor instance
_ocr_processor_cache = {"instance": None, "error": None}
# Lazy import of OCR to avoid import errors if dependencies are missing
def get_ocr_processor():
"""Lazy load OCR processor with caching to avoid repeated initialization"""
# Return cached instance if available
if _ocr_processor_cache["instance"] is not None:
return _ocr_processor_cache["instance"], None
if _ocr_processor_cache["error"] is not None:
return None, _ocr_processor_cache["error"]
try:
import sys
import io
# Set UTF-8 encoding for stdout/stderr to handle emojis
if sys.stdout.encoding != 'utf-8':
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
from tools.ocr_invoice import InvoiceOCR
# Get configuration from environment
PROJECT_ID = os.getenv("PROJECT_ID")
LOCATION = os.getenv("LOCATION")
PROCESSOR_ID = os.getenv("PROCESSOR_ID")
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
if not all([PROJECT_ID, LOCATION, PROCESSOR_ID, GEMINI_API_KEY]):
error = "Missing OCR configuration. Set PROJECT_ID, LOCATION, PROCESSOR_ID, and GEMINI_API_KEY in .env"
_ocr_processor_cache["error"] = error
return None, error
ocr = InvoiceOCR(
project_id=PROJECT_ID,
location=LOCATION,
processor_id=PROCESSOR_ID,
gemini_api_key=GEMINI_API_KEY
)
_ocr_processor_cache["instance"] = ocr
return ocr, None
except ImportError as e:
error = f"OCR dependencies not installed: {str(e)}"
_ocr_processor_cache["error"] = error
return None, error
except Exception as e:
import logging
logging.error(f"OCR initialization error: {str(e)}", exc_info=True)
error = f"Failed to initialize OCR: {str(e)}"
_ocr_processor_cache["error"] = error
return None, error
@router.post("/ocr-invoice")
async def process_invoice(
file: UploadFile = File(...),
api_key: str = Depends(verify_api_key)
) -> Dict[str, Any]:
"""
Process an invoice image/PDF and extract structured data using OCR
Supports: PDF, JPG, JPEG, PNG, TIFF
"""
# Get OCR processor
ocr, error = get_ocr_processor()
if error:
raise HTTPException(
status_code=503,
detail=f"OCR service not available: {error}"
)
# Validate file type
allowed_types = {'application/pdf', 'image/jpeg', 'image/png', 'image/tiff'}
if file.content_type not in allowed_types:
raise HTTPException(
status_code=400,
detail=f"Unsupported file type: {file.content_type}. Supported: PDF, JPEG, PNG, TIFF"
)
# Create temporary file to store upload
temp_file = None
try:
# Determine file extension
extension = Path(file.filename).suffix or '.tmp'
# Create temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=extension) as temp:
# Read and write file content
content = await file.read()
temp.write(content)
temp_file = temp.name
# Process the invoice asynchronously (run synchronous OCR in thread pool)
invoice_data = await asyncio.to_thread(ocr.process_invoice, temp_file, save_json=False)
# Extract line items and convert to frontend format
line_items = invoice_data.get("line_items", [])
items = []
for item in line_items:
items.append({
"name": item.get("description", "Unknown"),
"quantity": item.get("quantity", 0),
"unit": item.get("unit", "pz"),
"price": item.get("unit_price", 0),
"category": item.get("type", "Food"), # Use OCR extracted type as category
"lot_number": item.get("item_code", ""), # Use item_code as lot_number
"expiry_date": item.get("expiry_date", "") # Extract expiry date from OCR
})
# Return in format frontend expects
return {
"status": "success",
"items": items,
"filename": file.filename,
"metadata": invoice_data.get("_processing_metadata", {})
}
except Exception as e:
# Log detailed error internally
import logging
logging.error(f"OCR processing error: {str(e)}")
raise HTTPException(
status_code=500,
detail="Error processing invoice. Please check the file format and try again."
)
finally:
# Clean up temporary file
if temp_file and os.path.exists(temp_file):
try:
os.unlink(temp_file)
except:
pass
@router.get("/ocr-status")
async def ocr_status():
"""Check if OCR service is available"""
ocr, error = get_ocr_processor()
if error:
return {
"available": False,
"error": error,
"message": "OCR service is not configured or dependencies are missing"
}
return {
"available": True,
"message": "OCR service is ready",
"supported_formats": ["PDF", "JPEG", "PNG", "TIFF"]
}