Spaces:

Mariem-Daha
/

chefcode

Runtime error

App Files Files Community

chefcode / backend /routes /ocr.py

Mariem-Daha

Upload 31 files

9aaec2c verified 6 months ago

raw

history blame contribute delete

5.77 kB

	from fastapi import APIRouter, File, UploadFile, HTTPException, Depends
	from typing import Dict, Any
	import os
	import tempfile
	import asyncio
	from pathlib import Path
	from auth import verify_api_key

	router = APIRouter()

	# Cached OCR processor instance
	_ocr_processor_cache = {"instance": None, "error": None}

	# Lazy import of OCR to avoid import errors if dependencies are missing
	def get_ocr_processor():
	"""Lazy load OCR processor with caching to avoid repeated initialization"""
	# Return cached instance if available
	if _ocr_processor_cache["instance"] is not None:
	return _ocr_processor_cache["instance"], None
	if _ocr_processor_cache["error"] is not None:
	return None, _ocr_processor_cache["error"]

	try:
	import sys
	import io

	# Set UTF-8 encoding for stdout/stderr to handle emojis
	if sys.stdout.encoding != 'utf-8':
	sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
	sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')

	from tools.ocr_invoice import InvoiceOCR

	# Get configuration from environment
	PROJECT_ID = os.getenv("PROJECT_ID")
	LOCATION = os.getenv("LOCATION")
	PROCESSOR_ID = os.getenv("PROCESSOR_ID")
	GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

	if not all([PROJECT_ID, LOCATION, PROCESSOR_ID, GEMINI_API_KEY]):
	error = "Missing OCR configuration. Set PROJECT_ID, LOCATION, PROCESSOR_ID, and GEMINI_API_KEY in .env"
	_ocr_processor_cache["error"] = error
	return None, error

	ocr = InvoiceOCR(
	project_id=PROJECT_ID,
	location=LOCATION,
	processor_id=PROCESSOR_ID,
	gemini_api_key=GEMINI_API_KEY
	)
	_ocr_processor_cache["instance"] = ocr
	return ocr, None

	except ImportError as e:
	error = f"OCR dependencies not installed: {str(e)}"
	_ocr_processor_cache["error"] = error
	return None, error
	except Exception as e:
	import logging
	logging.error(f"OCR initialization error: {str(e)}", exc_info=True)
	error = f"Failed to initialize OCR: {str(e)}"
	_ocr_processor_cache["error"] = error
	return None, error


	@router.post("/ocr-invoice")
	async def process_invoice(
	file: UploadFile = File(...),
	api_key: str = Depends(verify_api_key)
	) -> Dict[str, Any]:
	"""
	Process an invoice image/PDF and extract structured data using OCR

	Supports: PDF, JPG, JPEG, PNG, TIFF
	"""
	# Get OCR processor
	ocr, error = get_ocr_processor()
	if error:
	raise HTTPException(
	status_code=503,
	detail=f"OCR service not available: {error}"
	)

	# Validate file type
	allowed_types = {'application/pdf', 'image/jpeg', 'image/png', 'image/tiff'}
	if file.content_type not in allowed_types:
	raise HTTPException(
	status_code=400,
	detail=f"Unsupported file type: {file.content_type}. Supported: PDF, JPEG, PNG, TIFF"
	)

	# Create temporary file to store upload
	temp_file = None
	try:
	# Determine file extension
	extension = Path(file.filename).suffix or '.tmp'

	# Create temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix=extension) as temp:
	# Read and write file content
	content = await file.read()
	temp.write(content)
	temp_file = temp.name

	# Process the invoice asynchronously (run synchronous OCR in thread pool)
	invoice_data = await asyncio.to_thread(ocr.process_invoice, temp_file, save_json=False)

	# Extract line items and convert to frontend format
	line_items = invoice_data.get("line_items", [])
	items = []
	for item in line_items:
	items.append({
	"name": item.get("description", "Unknown"),
	"quantity": item.get("quantity", 0),
	"unit": item.get("unit", "pz"),
	"price": item.get("unit_price", 0),
	"category": item.get("type", "Food"), # Use OCR extracted type as category
	"lot_number": item.get("item_code", ""), # Use item_code as lot_number
	"expiry_date": item.get("expiry_date", "") # Extract expiry date from OCR
	})

	# Return in format frontend expects
	return {
	"status": "success",
	"items": items,
	"filename": file.filename,
	"metadata": invoice_data.get("_processing_metadata", {})
	}

	except Exception as e:
	# Log detailed error internally
	import logging
	logging.error(f"OCR processing error: {str(e)}")

	raise HTTPException(
	status_code=500,
	detail="Error processing invoice. Please check the file format and try again."
	)

	finally:
	# Clean up temporary file
	if temp_file and os.path.exists(temp_file):
	try:
	os.unlink(temp_file)
	except:
	pass


	@router.get("/ocr-status")
	async def ocr_status():
	"""Check if OCR service is available"""
	ocr, error = get_ocr_processor()

	if error:
	return {
	"available": False,
	"error": error,
	"message": "OCR service is not configured or dependencies are missing"
	}

	return {
	"available": True,
	"message": "OCR service is ready",
	"supported_formats": ["PDF", "JPEG", "PNG", "TIFF"]
	}