Spaces:
Sleeping
Sleeping
| """ | |
| Mock OCR Service for Testing | |
| This is a simplified version for testing when PaddleOCR models cannot be downloaded | |
| """ | |
| import logging | |
| from typing import Dict, List, Any | |
| import numpy as np | |
| import cv2 | |
| from PIL import Image | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| class MockOCRService: | |
| """ | |
| Mock OCR Service for testing purposes. | |
| Returns simulated OCR results with proper structure. | |
| """ | |
| def __init__(self, use_gpu: bool = False, lang: str = 'en'): | |
| """Initialize Mock OCR Service""" | |
| self.use_gpu = use_gpu | |
| self.lang = lang | |
| logger.info(f"Initializing Mock OCR Service (GPU: {use_gpu}, Language: {lang})") | |
| logger.warning("Using MOCK OCR SERVICE - not real OCR! For testing structure only.") | |
| def process_image(self, image_path: str) -> Dict[str, Any]: | |
| """ | |
| Process an image and return mock structured OCR results | |
| Args: | |
| image_path: Path to the image file | |
| Returns: | |
| Dictionary containing mock structured OCR results | |
| """ | |
| # Load image to get dimensions | |
| image = cv2.imread(image_path) | |
| if image is None: | |
| raise ValueError(f"Cannot read image from {image_path}") | |
| height, width = image.shape[:2] | |
| logger.info(f"Processing image: {width}x{height}") | |
| # Return mock structured data | |
| return { | |
| "image_width": width, | |
| "image_height": height, | |
| "blocks": [ | |
| { | |
| "block_id": "block_0", | |
| "block_type": "header", | |
| "bounding_box": [ | |
| [int(width * 0.1), int(height * 0.05)], | |
| [int(width * 0.9), int(height * 0.05)], | |
| [int(width * 0.9), int(height * 0.15)], | |
| [int(width * 0.1), int(height * 0.15)] | |
| ], | |
| "lines": [ | |
| { | |
| "line_id": "line_0", | |
| "text": "Sample Document Title (Mock OCR)", | |
| "bounding_box": [ | |
| [int(width * 0.1), int(height * 0.05)], | |
| [int(width * 0.9), int(height * 0.05)], | |
| [int(width * 0.9), int(height * 0.15)], | |
| [int(width * 0.1), int(height * 0.15)] | |
| ], | |
| "font_size_estimate": int((height * 0.1) * 0.75), | |
| "words": [ | |
| { | |
| "word": "Sample", | |
| "bounding_box": [ | |
| [int(width * 0.1), int(height * 0.05)], | |
| [int(width * 0.25), int(height * 0.05)], | |
| [int(width * 0.25), int(height * 0.15)], | |
| [int(width * 0.1), int(height * 0.15)] | |
| ], | |
| "confidence": 0.95, | |
| "characters": [ | |
| { | |
| "char": c, | |
| "bounding_box": [ | |
| [int(width * (0.1 + i * 0.025)), int(height * 0.05)], | |
| [int(width * (0.1 + (i + 1) * 0.025)), int(height * 0.05)], | |
| [int(width * (0.1 + (i + 1) * 0.025)), int(height * 0.15)], | |
| [int(width * (0.1 + i * 0.025)), int(height * 0.15)] | |
| ], | |
| "confidence": 0.95 | |
| } | |
| for i, c in enumerate("Sample") | |
| ] | |
| }, | |
| { | |
| "word": "Document", | |
| "bounding_box": [ | |
| [int(width * 0.27), int(height * 0.05)], | |
| [int(width * 0.50), int(height * 0.05)], | |
| [int(width * 0.50), int(height * 0.15)], | |
| [int(width * 0.27), int(height * 0.15)] | |
| ], | |
| "confidence": 0.93, | |
| "characters": [] | |
| }, | |
| { | |
| "word": "Title", | |
| "bounding_box": [ | |
| [int(width * 0.52), int(height * 0.05)], | |
| [int(width * 0.68), int(height * 0.05)], | |
| [int(width * 0.68), int(height * 0.15)], | |
| [int(width * 0.52), int(height * 0.15)] | |
| ], | |
| "confidence": 0.96, | |
| "characters": [] | |
| } | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "block_id": "block_1", | |
| "block_type": "paragraph", | |
| "bounding_box": [ | |
| [int(width * 0.1), int(height * 0.2)], | |
| [int(width * 0.9), int(height * 0.2)], | |
| [int(width * 0.9), int(height * 0.6)], | |
| [int(width * 0.1), int(height * 0.6)] | |
| ], | |
| "lines": [ | |
| { | |
| "line_id": f"line_{i + 1}", | |
| "text": f"This is line {i + 1} of the mock paragraph content.", | |
| "bounding_box": [ | |
| [int(width * 0.1), int(height * (0.2 + i * 0.08))], | |
| [int(width * 0.9), int(height * (0.2 + i * 0.08))], | |
| [int(width * 0.9), int(height * (0.2 + (i + 1) * 0.08))], | |
| [int(width * 0.1), int(height * (0.2 + (i + 1) * 0.08))] | |
| ], | |
| "font_size_estimate": 12, | |
| "words": [] | |
| } | |
| for i in range(5) | |
| ] | |
| } | |
| ] | |
| } | |