fastapi-ocr / ocr_api /mock_ocr_service.py
nagpalsumit247's picture
Upload 4 files
9a34207 verified
"""
Mock OCR Service for Testing
This is a simplified version for testing when PaddleOCR models cannot be downloaded
"""
import logging
from typing import Dict, List, Any
import numpy as np
import cv2
from PIL import Image
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class MockOCRService:
"""
Mock OCR Service for testing purposes.
Returns simulated OCR results with proper structure.
"""
def __init__(self, use_gpu: bool = False, lang: str = 'en'):
"""Initialize Mock OCR Service"""
self.use_gpu = use_gpu
self.lang = lang
logger.info(f"Initializing Mock OCR Service (GPU: {use_gpu}, Language: {lang})")
logger.warning("Using MOCK OCR SERVICE - not real OCR! For testing structure only.")
def process_image(self, image_path: str) -> Dict[str, Any]:
"""
Process an image and return mock structured OCR results
Args:
image_path: Path to the image file
Returns:
Dictionary containing mock structured OCR results
"""
# Load image to get dimensions
image = cv2.imread(image_path)
if image is None:
raise ValueError(f"Cannot read image from {image_path}")
height, width = image.shape[:2]
logger.info(f"Processing image: {width}x{height}")
# Return mock structured data
return {
"image_width": width,
"image_height": height,
"blocks": [
{
"block_id": "block_0",
"block_type": "header",
"bounding_box": [
[int(width * 0.1), int(height * 0.05)],
[int(width * 0.9), int(height * 0.05)],
[int(width * 0.9), int(height * 0.15)],
[int(width * 0.1), int(height * 0.15)]
],
"lines": [
{
"line_id": "line_0",
"text": "Sample Document Title (Mock OCR)",
"bounding_box": [
[int(width * 0.1), int(height * 0.05)],
[int(width * 0.9), int(height * 0.05)],
[int(width * 0.9), int(height * 0.15)],
[int(width * 0.1), int(height * 0.15)]
],
"font_size_estimate": int((height * 0.1) * 0.75),
"words": [
{
"word": "Sample",
"bounding_box": [
[int(width * 0.1), int(height * 0.05)],
[int(width * 0.25), int(height * 0.05)],
[int(width * 0.25), int(height * 0.15)],
[int(width * 0.1), int(height * 0.15)]
],
"confidence": 0.95,
"characters": [
{
"char": c,
"bounding_box": [
[int(width * (0.1 + i * 0.025)), int(height * 0.05)],
[int(width * (0.1 + (i + 1) * 0.025)), int(height * 0.05)],
[int(width * (0.1 + (i + 1) * 0.025)), int(height * 0.15)],
[int(width * (0.1 + i * 0.025)), int(height * 0.15)]
],
"confidence": 0.95
}
for i, c in enumerate("Sample")
]
},
{
"word": "Document",
"bounding_box": [
[int(width * 0.27), int(height * 0.05)],
[int(width * 0.50), int(height * 0.05)],
[int(width * 0.50), int(height * 0.15)],
[int(width * 0.27), int(height * 0.15)]
],
"confidence": 0.93,
"characters": []
},
{
"word": "Title",
"bounding_box": [
[int(width * 0.52), int(height * 0.05)],
[int(width * 0.68), int(height * 0.05)],
[int(width * 0.68), int(height * 0.15)],
[int(width * 0.52), int(height * 0.15)]
],
"confidence": 0.96,
"characters": []
}
]
}
]
},
{
"block_id": "block_1",
"block_type": "paragraph",
"bounding_box": [
[int(width * 0.1), int(height * 0.2)],
[int(width * 0.9), int(height * 0.2)],
[int(width * 0.9), int(height * 0.6)],
[int(width * 0.1), int(height * 0.6)]
],
"lines": [
{
"line_id": f"line_{i + 1}",
"text": f"This is line {i + 1} of the mock paragraph content.",
"bounding_box": [
[int(width * 0.1), int(height * (0.2 + i * 0.08))],
[int(width * 0.9), int(height * (0.2 + i * 0.08))],
[int(width * 0.9), int(height * (0.2 + (i + 1) * 0.08))],
[int(width * 0.1), int(height * (0.2 + (i + 1) * 0.08))]
],
"font_size_estimate": 12,
"words": []
}
for i in range(5)
]
}
]
}