Spaces:
Sleeping
Sleeping
| """ | |
| FastAPI Application for OCR Service | |
| Production-ready API for advanced OCR on scanned images | |
| """ | |
| import os | |
| import tempfile | |
| import logging | |
| from typing import Optional | |
| from pathlib import Path | |
| from contextlib import asynccontextmanager | |
| from fastapi import FastAPI, File, UploadFile, HTTPException, Query | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from fastapi.responses import JSONResponse | |
| import uvicorn | |
| from ocr_api.ocr_service import OCRService | |
| # Setup logging | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' | |
| ) | |
| logger = logging.getLogger(__name__) | |
| # Global OCR service instance | |
| ocr_service = None | |
| # Check for GPU availability from environment | |
| use_gpu = os.getenv("USE_GPU", "false").lower() == "true" | |
| # CORS allowed origins - configure for production | |
| allowed_origins = os.getenv("CORS_ORIGINS", "*").split(",") | |
| if allowed_origins == ["*"]: | |
| logger.warning("CORS is configured to allow all origins. This is insecure for production.") | |
| logger.warning("Set CORS_ORIGINS environment variable with comma-separated allowed origins.") | |
| async def lifespan(app: FastAPI): | |
| """Lifespan context manager for startup and shutdown events""" | |
| global ocr_service | |
| # Startup | |
| logger.info("Initializing OCR Service...") | |
| try: | |
| from ocr_api.ocr_service import OCRService | |
| ocr_service = OCRService(use_gpu=use_gpu, lang='en') | |
| logger.info(f"OCR Service initialized successfully (GPU: {use_gpu})") | |
| except Exception as e: | |
| logger.warning(f"Failed to initialize PaddleOCR: {e}") | |
| logger.info("Falling back to Mock OCR Service for testing...") | |
| try: | |
| from ocr_api.mock_ocr_service import MockOCRService | |
| ocr_service = MockOCRService(use_gpu=use_gpu, lang='en') | |
| logger.info("Mock OCR Service initialized successfully") | |
| except Exception as mock_error: | |
| logger.error(f"Failed to initialize Mock OCR Service: {mock_error}") | |
| raise | |
| yield | |
| # Shutdown | |
| logger.info("Shutting down OCR Service...") | |
| # Initialize FastAPI app with lifespan | |
| app = FastAPI( | |
| title="Advanced OCR API", | |
| description="Production-ready API for OCR on scanned images using PaddleOCR", | |
| version="1.0.0", | |
| docs_url="/docs", | |
| redoc_url="/redoc", | |
| lifespan=lifespan | |
| ) | |
| # Configure CORS | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=allowed_origins, # Configure via CORS_ORIGINS env var | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| async def root(): | |
| """Root endpoint with API information""" | |
| return { | |
| "message": "Advanced OCR API", | |
| "version": "1.0.0", | |
| "endpoints": { | |
| "ocr": "/api/ocr", | |
| "health": "/health", | |
| "docs": "/docs" | |
| } | |
| } | |
| async def health_check(): | |
| """Health check endpoint""" | |
| return { | |
| "status": "healthy", | |
| "ocr_service": "initialized" if ocr_service else "not_initialized", | |
| "gpu_enabled": use_gpu | |
| } | |
| async def perform_ocr( | |
| file: UploadFile = File(..., description="Image file (jpg, png, tiff, pdf)") | |
| ): | |
| """ | |
| Perform OCR on uploaded image | |
| Args: | |
| file: Uploaded image file | |
| Returns: | |
| Structured JSON response with OCR results | |
| """ | |
| if not ocr_service: | |
| raise HTTPException(status_code=503, detail="OCR service not initialized") | |
| # Validate file type | |
| allowed_extensions = {'.jpg', '.jpeg', '.png', '.tiff', '.tif', '.pdf'} | |
| file_ext = Path(file.filename).suffix.lower() if file.filename else '' | |
| if file_ext not in allowed_extensions: | |
| raise HTTPException( | |
| status_code=400, | |
| detail=f"Unsupported file type. Allowed: {', '.join(allowed_extensions)}" | |
| ) | |
| # Create temporary file to store upload | |
| temp_file = None | |
| try: | |
| # Save uploaded file to temporary location | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as temp: | |
| content = await file.read() | |
| temp.write(content) | |
| temp_file = temp.name | |
| logger.info(f"Processing uploaded file: {file.filename} ({len(content)} bytes)") | |
| # Process image with OCR | |
| result = ocr_service.process_image(temp_file) | |
| logger.info(f"OCR processing completed for {file.filename}") | |
| return JSONResponse(content=result) | |
| except ValueError as e: | |
| logger.error(f"Invalid image: {e}") | |
| raise HTTPException(status_code=400, detail=str(e)) | |
| except Exception as e: | |
| logger.error(f"OCR processing failed: {e}", exc_info=True) | |
| raise HTTPException(status_code=500, detail=f"OCR processing failed: {str(e)}") | |
| finally: | |
| # Clean up temporary file | |
| if temp_file and os.path.exists(temp_file): | |
| try: | |
| os.unlink(temp_file) | |
| except Exception as e: | |
| logger.warning(f"Failed to delete temporary file: {e}") | |
| def main(): | |
| """Run the application""" | |
| port = int(os.getenv("PORT", 8000)) | |
| host = os.getenv("HOST", "0.0.0.0") | |
| logger.info(f"Starting OCR API server on {host}:{port}") | |
| uvicorn.run( | |
| "ocr_api.main:app", | |
| host=host, | |
| port=port, | |
| reload=False, | |
| log_level="info" | |
| ) | |
| if __name__ == "__main__": | |
| main() | |