Spaces:

xce009
/

ocr-api

Running

App Files Files Community

Soumik Bose commited on Jan 25

Commit

690d90f

1 Parent(s): 6536e0d

go

Browse files

Files changed (3) hide show

Dockerfile +20 -5
main.py +183 -95
requirements.txt +3 -0

Dockerfile CHANGED Viewed

@@ -2,10 +2,24 @@ FROM python:3.11-slim
 WORKDIR /app
-# Install system dependencies for RapidOCR and PDF processing
 RUN apt-get update && apt-get install -y \
     curl \
     poppler-utils \
     libgl1 \
     libglib2.0-0 \
     libgomp1 \
@@ -13,18 +27,19 @@ RUN apt-get update && apt-get install -y \
     g++ \
     && rm -rf /var/lib/apt/lists/*
-# Fix: Ensure logs appear immediately in the console
 ENV PYTHONUNBUFFERED=1
 ENV PYTHONIOENCODING=UTF-8
 ENV HF_HOME=/tmp/cache
 ENV PORT=7860
-# Copy requirements and install dependencies
 COPY requirements.txt .
 RUN pip install --upgrade pip setuptools wheel \
     && pip install --default-timeout=100 --retries=10 --no-cache-dir -r requirements.txt
-# Copy application files
 COPY . .
 # Create non-root user
@@ -36,7 +51,7 @@ RUN mkdir -p ${HF_HOME} && chmod 777 ${HF_HOME}
 EXPOSE $PORT
-# Start the application
 CMD bash -c "\
 (while true; do curl -s https://xce009-ocr-api.hf.space >/dev/null; sleep 300; done) & \
 uvicorn main:app --host 0.0.0.0 --port ${PORT} --workers 4"

 WORKDIR /app
+# Install system dependencies for BOTH Tesseract and RapidOCR
 RUN apt-get update && apt-get install -y \
     curl \
+    # Tesseract with language packs
+    tesseract-ocr \
+    tesseract-ocr-eng \
+    tesseract-ocr-deu \
+    tesseract-ocr-fra \
+    tesseract-ocr-spa \
+    tesseract-ocr-por \
+    tesseract-ocr-ita \
+    tesseract-ocr-rus \
+    tesseract-ocr-chi-sim \
+    tesseract-ocr-jpn \
+    tesseract-ocr-kor \
+    # PDF processing
     poppler-utils \
+    # RapidOCR dependencies
     libgl1 \
     libglib2.0-0 \
     libgomp1 \
     g++ \
     && rm -rf /var/lib/apt/lists/*
+# Environment variables
 ENV PYTHONUNBUFFERED=1
 ENV PYTHONIOENCODING=UTF-8
 ENV HF_HOME=/tmp/cache
 ENV PORT=7860
+ENV DEFAULT_OCR_ENGINE=tesseract
+# Install Python dependencies
 COPY requirements.txt .
 RUN pip install --upgrade pip setuptools wheel \
     && pip install --default-timeout=100 --retries=10 --no-cache-dir -r requirements.txt
+# Copy application
 COPY . .
 # Create non-root user
 EXPOSE $PORT
+# Start application
 CMD bash -c "\
 (while true; do curl -s https://xce009-ocr-api.hf.space >/dev/null; sleep 300; done) & \
 uvicorn main:app --host 0.0.0.0 --port ${PORT} --workers 4"

main.py CHANGED Viewed

@@ -13,10 +13,11 @@ from contextvars import ContextVar
 import uvicorn
 import cv2
 import numpy as np
 from rapidocr_onnxruntime import RapidOCR
 from fastapi import (
     FastAPI, File, UploadFile, Depends,
-    HTTPException, Request, status
 )
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
@@ -32,15 +33,15 @@ from pdf2image import convert_from_path
 # ==========================================
 load_dotenv()
-# ContextVar for thread-safe Request ID tracking
 request_id_ctx: ContextVar[str] = ContextVar("request_id", default="system")
 class Config:
-    APP_NAME = os.getenv("APP_NAME", "OCR API")
     API_TOKEN = os.getenv("API_BEARER_TOKEN")
-    MAX_SIZE = int(os.getenv("MAX_FILE_SIZE", 52428800)) # 50MB
     ALLOWED_ORIGINS = [o.strip() for o in os.getenv("ALLOWED_ORIGINS", "").split(",") if o.strip()]
     ALLOWED_TYPES = ["image/jpeg", "image/png", "image/bmp", "image/webp", "application/pdf"]
 class RequestIdFilter(logging.Filter):
     def filter(self, record):
@@ -63,6 +64,11 @@ class StatusEnum(str, Enum):
     SUCCESS = "success"
     ERROR = "error"
 class BaseResponse(BaseModel):
     request_id: str
     process_time_ms: float
@@ -75,6 +81,7 @@ class PageResult(BaseModel):
     text: str
     confidence: Optional[float] = None
     lines_detected: Optional[int] = None
 class OCRResult(BaseModel):
     filename: str
@@ -83,6 +90,7 @@ class OCRResult(BaseModel):
     total_pages: int
     pages_content: List[PageResult]
     average_confidence: Optional[float] = None
 class APIResponse(BaseResponse):
     data: Optional[OCRResult] = None
@@ -120,62 +128,78 @@ class FileValidator:
             raise HTTPException(413, "File too large")
         return tmp_path
-class OCRProcessor:
-    """RapidOCR-based OCR processor with enhanced accuracy"""
     def __init__(self):
-        """Initialize RapidOCR engine"""
         self.engine = RapidOCR()
-        logger.info("RapidOCR engine initialized successfully")
-    def _extract_text_from_image(self, image_path: str) -> dict:
-        """
-        Extract text from a single image using RapidOCR
-        Args:
-            image_path: Path to image file
-        Returns:
-            dict: Contains text, confidence, and line count
-        """
         try:
-            # Perform OCR - RapidOCR returns (result_object, elapse_list)
             ocr_result, elapse = self.engine(image_path)
-            # Handle result object
             if hasattr(ocr_result, '__iter__') and not isinstance(ocr_result, str):
                 result = list(ocr_result)
             else:
                 result = ocr_result
             if result is None or len(result) == 0:
-                logger.warning(f"No text detected in image: {image_path}")
                 return {
                     'text': '',
                     'confidence': 0.0,
-                    'lines_detected': 0
                 }
-            # Parse results
             texts = []
             confidences = []
-            for idx, line in enumerate(result):
                 try:
                     if isinstance(line, (list, tuple)):
                         if len(line) == 2:
-                            # Format: [box, text] or [text, confidence]
                             if isinstance(line[0], (list, tuple)):
                                 box, text = line
                                 confidence = 1.0
                             else:
                                 text, confidence = line
-                                box = []
                         elif len(line) == 3:
-                            # Format: [box, text, confidence]
                             box, text, confidence = line
                         elif len(line) >= 4:
-                            # Format: [box, text, confidence, something_else]
                             box, text, confidence = line[0], line[1], line[2]
                         else:
                             continue
@@ -184,50 +208,88 @@ class OCRProcessor:
                     texts.append(str(text))
                     confidences.append(float(confidence) if confidence is not None else 1.0)
-                except Exception as e:
-                    logger.debug(f"Skipping malformed line {idx}: {e}")
                     continue
-            if not texts:
-                return {
-                    'text': '',
-                    'confidence': 0.0,
-                    'lines_detected': 0
-                }
             combined_text = '\n'.join(texts)
             avg_confidence = sum(confidences) / len(confidences) if confidences else 0.0
-            logger.debug(f"Extracted {len(texts)} lines with avg confidence: {avg_confidence:.2%}")
             return {
                 'text': combined_text,
                 'confidence': avg_confidence,
-                'lines_detected': len(texts)
             }
         except Exception as e:
-            logger.error(f"Image OCR extraction failed: {str(e)}")
-            raise ValueError(f"OCR extraction error: {str(e)}")
-    def process_file(self, file_path: str, content_type: str) -> dict:
         """
-        Process PDF or image file and extract text
         Args:
-            file_path: Path to the file
-            content_type: MIME type of the file
-        Returns:
-            dict: Processing results with pages content
         """
         start = time.perf_counter()
         pages_content = []
         all_confidences = []
         try:
-            logger.info(f"Processing File: {file_path}")
             if content_type == "application/pdf":
                 logger.info("Converting PDF to Images...")
@@ -238,43 +300,41 @@ class OCRProcessor:
                     page_num = idx + 1
                     logger.info(f"Scanning Page {page_num}/{total}")
-                    # Save PIL Image to temp file for RapidOCR
                     with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmp_img:
                         img.save(tmp_img.name, 'PNG')
                         temp_img_path = tmp_img.name
                     try:
-                        # Extract text from temp image
-                        ocr_result = self._extract_text_from_image(temp_img_path)
                         pages_content.append({
                             "index": idx,
                             "page_number": page_num,
                             "text": ocr_result["text"],
                             "confidence": ocr_result["confidence"],
-                            "lines_detected": ocr_result["lines_detected"]
                         })
                         if ocr_result["confidence"] > 0:
                             all_confidences.append(ocr_result["confidence"])
                     finally:
-                        # Clean up temp image
                         try:
                             os.remove(temp_img_path)
                         except:
                             pass
             else:
                 logger.info("Scanning Single Image...")
-                # Extract text from image
-                ocr_result = self._extract_text_from_image(file_path)
                 pages_content.append({
                     "index": 0,
                     "page_number": 1,
                     "text": ocr_result["text"],
                     "confidence": ocr_result["confidence"],
-                    "lines_detected": ocr_result["lines_detected"]
                 })
                 if ocr_result["confidence"] > 0:
@@ -288,7 +348,8 @@ class OCRProcessor:
             return {
                 "total_pages": len(pages_content),
                 "pages_content": pages_content,
-                "average_confidence": avg_confidence
             }
         except Exception as e:
@@ -309,9 +370,7 @@ app.add_middleware(
 @app.middleware("http")
 async def request_context_middleware(request: Request, call_next):
-    # 1. Generate ID
     req_id = str(uuid.uuid4())
-    # 2. Set Context (Crucial for thread logging)
     token = request_id_ctx.set(req_id)
     request.state.request_id = req_id
@@ -335,7 +394,6 @@ async def request_context_middleware(request: Request, call_next):
             }
         )
     finally:
-        # 3. Clean up Context
         request_id_ctx.reset(token)
 # ==========================================
@@ -348,53 +406,84 @@ async def root(request: Request):
         "request_id": request.state.request_id,
         "process_time_ms": 0,
         "status": StatusEnum.SUCCESS,
-        "message": "RapidOCR API Active",
-        "engine": "RapidOCR",
-        "version": "1.0.0"
     }
 @app.get("/health")
 async def health_check(request: Request):
     """Health check endpoint"""
-    try:
-        return {
-            "request_id": request.state.request_id,
-            "status": StatusEnum.SUCCESS,
-            "message": "Service healthy",
-            "ocr_engine": "RapidOCR"
         }
-    except Exception as e:
-        return JSONResponse(
-            status_code=503,
-            content={
-                "request_id": request.state.request_id,
-                "status": StatusEnum.ERROR,
-                "message": "Service unhealthy",
-                "error": str(e)
-            }
-        )
 @app.post("/api/v1/get_data", response_model=APIResponse)
 async def extract_data(
     request: Request,
     file: UploadFile = File(...),
     token: str = Depends(SecurityService.validate_token)
 ):
     start_ts = time.perf_counter()
     tmp_path = None
     req_id = request.state.request_id
     try:
         FileValidator.validate(file)
         tmp_path = FileValidator.check_size_and_save(file)
-        # Initialize OCR processor and run in thread pool
-        # ContextVars are automatically copied to the thread
         processor = OCRProcessor()
         result = await run_in_threadpool(
             processor.process_file,
             tmp_path,
-            file.content_type
         )
         return {
@@ -408,7 +497,8 @@ async def extract_data(
                 "saved_file_path": tmp_path,
                 "total_pages": result["total_pages"],
                 "pages_content": result["pages_content"],
-                "average_confidence": result.get("average_confidence", 0.0)
             }
         }
@@ -426,7 +516,6 @@ async def extract_data(
         )
     finally:
         if tmp_path:
-            logger.info(f"File preserved at: {tmp_path}")
             try:
                 os.remove(tmp_path)
                 logger.info(f"Temporary file deleted: {tmp_path}")
@@ -439,14 +528,13 @@ async def extract_data(
 @app.on_event("startup")
 async def startup_event():
-    """Initialize OCR engine on startup"""
-    logger.info("Starting RapidOCR API...")
     try:
-        # Test initialize the engine
-        test_processor = OCRProcessor()
-        logger.info("RapidOCR engine ready for processing")
     except Exception as e:
-        logger.error(f"Failed to initialize OCR engine: {str(e)}")
         raise
 if __name__ == "__main__":

 import uvicorn
 import cv2
 import numpy as np
+import pytesseract
 from rapidocr_onnxruntime import RapidOCR
 from fastapi import (
     FastAPI, File, UploadFile, Depends,
+    HTTPException, Request, Query, Form
 )
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
 # ==========================================
 load_dotenv()
 request_id_ctx: ContextVar[str] = ContextVar("request_id", default="system")
 class Config:
+    APP_NAME = os.getenv("APP_NAME", "Hybrid OCR API")
     API_TOKEN = os.getenv("API_BEARER_TOKEN")
+    MAX_SIZE = int(os.getenv("MAX_FILE_SIZE", 52428800))
     ALLOWED_ORIGINS = [o.strip() for o in os.getenv("ALLOWED_ORIGINS", "").split(",") if o.strip()]
     ALLOWED_TYPES = ["image/jpeg", "image/png", "image/bmp", "image/webp", "application/pdf"]
+    DEFAULT_ENGINE = os.getenv("DEFAULT_OCR_ENGINE", "tesseract")  # or "rapidocr" or "hybrid"
 class RequestIdFilter(logging.Filter):
     def filter(self, record):
     SUCCESS = "success"
     ERROR = "error"
+class OCREngine(str, Enum):
+    TESSERACT = "tesseract"
+    RAPIDOCR = "rapidocr"
+    HYBRID = "hybrid"  # Use both and pick best result
 class BaseResponse(BaseModel):
     request_id: str
     process_time_ms: float
     text: str
     confidence: Optional[float] = None
     lines_detected: Optional[int] = None
+    engine_used: Optional[str] = None
 class OCRResult(BaseModel):
     filename: str
     total_pages: int
     pages_content: List[PageResult]
     average_confidence: Optional[float] = None
+    engine: str
 class APIResponse(BaseResponse):
     data: Optional[OCRResult] = None
             raise HTTPException(413, "File too large")
         return tmp_path
+class TesseractEngine:
+    """Tesseract OCR Engine - Best for English/European languages"""
+    @staticmethod
+    def extract_text(image_path: str) -> dict:
+        """Extract text using Tesseract"""
+        try:
+            img = Image.open(image_path)
+            # Get text with confidence
+            data = pytesseract.image_to_data(img, output_type=pytesseract.Output.DICT)
+            # Filter out low confidence and empty text
+            texts = []
+            confidences = []
+            for i, text in enumerate(data['text']):
+                if text.strip() and int(data['conf'][i]) > 0:
+                    texts.append(text)
+                    confidences.append(int(data['conf'][i]) / 100.0)
+            combined_text = ' '.join(texts)
+            avg_confidence = sum(confidences) / len(confidences) if confidences else 0.0
+            return {
+                'text': combined_text,
+                'confidence': avg_confidence,
+                'lines_detected': len(texts),
+                'engine': 'tesseract'
+            }
+        except Exception as e:
+            logger.error(f"Tesseract extraction failed: {str(e)}")
+            raise ValueError(f"Tesseract error: {str(e)}")
+class RapidOCREngine:
+    """RapidOCR Engine - Fast and lightweight"""
     def __init__(self):
         self.engine = RapidOCR()
+    def extract_text(self, image_path: str) -> dict:
+        """Extract text using RapidOCR"""
         try:
             ocr_result, elapse = self.engine(image_path)
             if hasattr(ocr_result, '__iter__') and not isinstance(ocr_result, str):
                 result = list(ocr_result)
             else:
                 result = ocr_result
             if result is None or len(result) == 0:
                 return {
                     'text': '',
                     'confidence': 0.0,
+                    'lines_detected': 0,
+                    'engine': 'rapidocr'
                 }
             texts = []
             confidences = []
+            for line in result:
                 try:
                     if isinstance(line, (list, tuple)):
                         if len(line) == 2:
                             if isinstance(line[0], (list, tuple)):
                                 box, text = line
                                 confidence = 1.0
                             else:
                                 text, confidence = line
                         elif len(line) == 3:
                             box, text, confidence = line
                         elif len(line) >= 4:
                             box, text, confidence = line[0], line[1], line[2]
                         else:
                             continue
                     texts.append(str(text))
                     confidences.append(float(confidence) if confidence is not None else 1.0)
+                except:
                     continue
             combined_text = '\n'.join(texts)
             avg_confidence = sum(confidences) / len(confidences) if confidences else 0.0
             return {
                 'text': combined_text,
                 'confidence': avg_confidence,
+                'lines_detected': len(texts),
+                'engine': 'rapidocr'
             }
         except Exception as e:
+            logger.error(f"RapidOCR extraction failed: {str(e)}")
+            raise ValueError(f"RapidOCR error: {str(e)}")
+class HybridOCRProcessor:
+    """Hybrid processor that uses both engines and picks the best result"""
+    def __init__(self):
+        self.rapidocr = RapidOCREngine()
+        self.tesseract = TesseractEngine()
+    def extract_text(self, image_path: str, engine: str = "tesseract") -> dict:
         """
+        Extract text using specified engine or both
         Args:
+            image_path: Path to image
+            engine: 'tesseract', 'rapidocr', or 'hybrid'
         """
+        if engine == OCREngine.TESSERACT:
+            return self.tesseract.extract_text(image_path)
+        elif engine == OCREngine.RAPIDOCR:
+            return self.rapidocr.extract_text(image_path)
+        elif engine == OCREngine.HYBRID:
+            # Run both engines
+            logger.info("Running hybrid OCR (Tesseract + RapidOCR)")
+            try:
+                tess_result = self.tesseract.extract_text(image_path)
+            except Exception as e:
+                logger.warning(f"Tesseract failed in hybrid mode: {e}")
+                tess_result = {'text': '', 'confidence': 0.0, 'lines_detected': 0}
+            try:
+                rapid_result = self.rapidocr.extract_text(image_path)
+            except Exception as e:
+                logger.warning(f"RapidOCR failed in hybrid mode: {e}")
+                rapid_result = {'text': '', 'confidence': 0.0, 'lines_detected': 0}
+            # Pick the one with higher confidence
+            if tess_result['confidence'] >= rapid_result['confidence']:
+                logger.info(f"Using Tesseract (conf: {tess_result['confidence']:.2%} vs {rapid_result['confidence']:.2%})")
+                tess_result['engine'] = 'tesseract (hybrid)'
+                return tess_result
+            else:
+                logger.info(f"Using RapidOCR (conf: {rapid_result['confidence']:.2%} vs {tess_result['confidence']:.2%})")
+                rapid_result['engine'] = 'rapidocr (hybrid)'
+                return rapid_result
+        else:
+            raise ValueError(f"Unknown engine: {engine}")
+class OCRProcessor:
+    """Main OCR processor supporting multiple engines"""
+    def __init__(self, engine: str = None):
+        self.engine_type = engine or Config.DEFAULT_ENGINE
+        self.processor = HybridOCRProcessor()
+    def process_file(self, file_path: str, content_type: str, engine: str = None) -> dict:
+        """Process PDF or image file"""
         start = time.perf_counter()
         pages_content = []
         all_confidences = []
+        engine_to_use = engine or self.engine_type
         try:
+            logger.info(f"Processing File: {file_path} with engine: {engine_to_use}")
             if content_type == "application/pdf":
                 logger.info("Converting PDF to Images...")
                     page_num = idx + 1
                     logger.info(f"Scanning Page {page_num}/{total}")
+                    # Save PIL Image to temp file
                     with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmp_img:
                         img.save(tmp_img.name, 'PNG')
                         temp_img_path = tmp_img.name
                     try:
+                        ocr_result = self.processor.extract_text(temp_img_path, engine_to_use)
                         pages_content.append({
                             "index": idx,
                             "page_number": page_num,
                             "text": ocr_result["text"],
                             "confidence": ocr_result["confidence"],
+                            "lines_detected": ocr_result["lines_detected"],
+                            "engine_used": ocr_result.get("engine", engine_to_use)
                         })
                         if ocr_result["confidence"] > 0:
                             all_confidences.append(ocr_result["confidence"])
                     finally:
                         try:
                             os.remove(temp_img_path)
                         except:
                             pass
             else:
                 logger.info("Scanning Single Image...")
+                ocr_result = self.processor.extract_text(file_path, engine_to_use)
                 pages_content.append({
                     "index": 0,
                     "page_number": 1,
                     "text": ocr_result["text"],
                     "confidence": ocr_result["confidence"],
+                    "lines_detected": ocr_result["lines_detected"],
+                    "engine_used": ocr_result.get("engine", engine_to_use)
                 })
                 if ocr_result["confidence"] > 0:
             return {
                 "total_pages": len(pages_content),
                 "pages_content": pages_content,
+                "average_confidence": avg_confidence,
+                "engine": engine_to_use
             }
         except Exception as e:
 @app.middleware("http")
 async def request_context_middleware(request: Request, call_next):
     req_id = str(uuid.uuid4())
     token = request_id_ctx.set(req_id)
     request.state.request_id = req_id
             }
         )
     finally:
         request_id_ctx.reset(token)
 # ==========================================
         "request_id": request.state.request_id,
         "process_time_ms": 0,
         "status": StatusEnum.SUCCESS,
+        "message": "Hybrid OCR API Active",
+        "engines": ["tesseract", "rapidocr", "hybrid"],
+        "default_engine": Config.DEFAULT_ENGINE,
+        "version": "2.0.0"
     }
 @app.get("/health")
 async def health_check(request: Request):
     """Health check endpoint"""
+    return {
+        "request_id": request.state.request_id,
+        "status": StatusEnum.SUCCESS,
+        "message": "Service healthy",
+        "engines": {
+            "tesseract": "ready",
+            "rapidocr": "ready"
         }
+    }
 @app.post("/api/v1/get_data", response_model=APIResponse)
 async def extract_data(
     request: Request,
     file: UploadFile = File(...),
+    engine: Optional[str] = Form(default=None, description="OCR engine: tesseract, rapidocr, or hybrid"),
     token: str = Depends(SecurityService.validate_token)
 ):
+    """
+    Extract text from image or PDF
+    - **file**: Image or PDF file to process
+    - **engine**: Choose OCR engine (optional, can be sent as form data or query param)
+      - `tesseract`: Best for English/European languages, highest accuracy (DEFAULT)
+      - `rapidocr`: Faster, good for Asian languages
+      - `hybrid`: Use both and pick best result (slower but most accurate)
+    Example curl:
+    ```bash
+    # Using query parameter
+    curl -X POST "http://localhost:7860/api/v1/get_data?engine=tesseract" \
+      -H "Authorization: Bearer your-token" \
+      -F "file=@document.pdf"
+    # Using form data (payload)
+    curl -X POST "http://localhost:7860/api/v1/get_data" \
+      -H "Authorization: Bearer your-token" \
+      -F "file=@document.pdf" \
+      -F "engine=hybrid"
+    ```
+    """
     start_ts = time.perf_counter()
     tmp_path = None
     req_id = request.state.request_id
+    # Validate engine parameter
+    engine_to_use = engine
+    if engine_to_use and engine_to_use not in [e.value for e in OCREngine]:
+        return JSONResponse(
+            status_code=400,
+            content={
+                "request_id": req_id,
+                "status": StatusEnum.ERROR,
+                "error_message": f"Invalid engine '{engine_to_use}'. Must be one of: tesseract, rapidocr, hybrid"
+            }
+        )
     try:
         FileValidator.validate(file)
         tmp_path = FileValidator.check_size_and_save(file)
+        logger.info(f"Processing with engine: {engine_to_use or Config.DEFAULT_ENGINE}")
+        # Initialize processor with selected engine
         processor = OCRProcessor()
         result = await run_in_threadpool(
             processor.process_file,
             tmp_path,
+            file.content_type,
+            engine_to_use
         )
         return {
                 "saved_file_path": tmp_path,
                 "total_pages": result["total_pages"],
                 "pages_content": result["pages_content"],
+                "average_confidence": result.get("average_confidence", 0.0),
+                "engine": result["engine"]
             }
         }
         )
     finally:
         if tmp_path:
             try:
                 os.remove(tmp_path)
                 logger.info(f"Temporary file deleted: {tmp_path}")
 @app.on_event("startup")
 async def startup_event():
+    """Initialize OCR engines on startup"""
+    logger.info("Starting Hybrid OCR API...")
     try:
+        test_processor = HybridOCRProcessor()
+        logger.info("All OCR engines ready for processing")
     except Exception as e:
+        logger.error(f"Failed to initialize OCR engines: {str(e)}")
         raise
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

@@ -9,5 +9,8 @@ opencv-python-headless==4.12.0.88
 numpy<2.3.0
 pdf2image==1.17.0
 Pillow==11.2.1
 rapidocr-onnxruntime>=1.3.0
 onnxruntime>=1.16.0

 numpy<2.3.0
 pdf2image==1.17.0
 Pillow==11.2.1
+# Tesseract OCR
+pytesseract==0.3.13
+# RapidOCR
 rapidocr-onnxruntime>=1.3.0
 onnxruntime>=1.16.0