Spaces:
Sleeping
Sleeping
| """ | |
| OCR Handler - OCR-only Processing | |
| This module provides OCR-only functionality that bypasses the full detection pipeline. | |
| Useful for cases where you only need text extraction without RF-DETR/CLIP analysis. | |
| """ | |
| import torch | |
| import cv2 | |
| import numpy as np | |
| from PIL import Image | |
| from typing import Union, List, Dict, Tuple | |
| from pathlib import Path | |
| import easyocr | |
| from detection.image_utils import load_image | |
| def process_ocr_only( | |
| image: Union[str, Path, np.ndarray, Image.Image], | |
| gpu: bool = None | |
| ) -> List[Dict]: | |
| """ | |
| Run OCR across the full image and return detections | |
| This bypasses RF-DETR/CLIP and runs EasyOCR directly on the image. | |
| Args: | |
| image: Input image (path, PIL Image, or numpy array) | |
| gpu: Whether to use GPU. If None, auto-detects CUDA availability. | |
| Returns: | |
| List of detections with keys: | |
| - box: Dict with x1, y1, x2, y2 coordinates | |
| - confidence: OCR confidence score (float) | |
| - class_id: None (no classification) | |
| - class_name: "" (no classification) | |
| - text: Extracted text string | |
| - description: "" (no description) | |
| """ | |
| # Load image | |
| img_array = load_image(image) | |
| # Initialize OCR reader | |
| if gpu is None: | |
| gpu = torch.cuda.is_available() | |
| reader = easyocr.Reader(['en', 'fr'], gpu=gpu) | |
| # Run OCR - detail=1 returns [ [ (x,y)...4 points ], text, conf ] | |
| ocr_results = reader.readtext(img_array, detail=1) | |
| # Convert to standard detection format | |
| detections = [] | |
| for entry in ocr_results: | |
| if not isinstance(entry, (list, tuple)) or len(entry) < 3: | |
| continue | |
| quad, text, conf = entry[0], entry[1], entry[2] | |
| if not isinstance(text, str) or not text.strip(): | |
| continue | |
| # Convert quadrilateral to bounding box | |
| xs = [p[0] for p in quad] | |
| ys = [p[1] for p in quad] | |
| box = { | |
| "x1": float(int(min(xs))), | |
| "y1": float(int(min(ys))), | |
| "x2": float(int(max(xs))), | |
| "y2": float(int(max(ys))) | |
| } | |
| detections.append({ | |
| "box": box, | |
| "confidence": float(conf) if conf is not None else 1.0, | |
| "class_id": None, | |
| "class_name": "", | |
| "text": text.strip(), | |
| "description": "" | |
| }) | |
| return detections | |
| def annotate_ocr_detections( | |
| image: Union[str, Path, np.ndarray, Image.Image], | |
| detections: List[Dict], | |
| thickness: int = 2, | |
| return_format: str = "pil" | |
| ) -> Union[Image.Image, np.ndarray]: | |
| """ | |
| Annotate image with OCR detection boxes and text labels | |
| Args: | |
| image: Input image (path, PIL Image, or numpy array) | |
| detections: List of detections from process_ocr_only() | |
| thickness: Line thickness for bounding boxes | |
| return_format: "pil" for PIL Image or "numpy" for numpy array | |
| Returns: | |
| Annotated image as PIL Image or numpy array | |
| """ | |
| # Load image | |
| img_array = load_image(image) | |
| # Convert to BGR for OpenCV | |
| img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR) | |
| # Draw each detection | |
| for det in detections: | |
| x1 = int(det["box"]["x1"]) | |
| y1 = int(det["box"]["y1"]) | |
| x2 = int(det["box"]["x2"]) | |
| y2 = int(det["box"]["y2"]) | |
| # Draw bounding box | |
| cv2.rectangle(img_bgr, (x1, y1), (x2, y2), (0, 255, 0), thickness) | |
| # Draw text label | |
| text = det.get("text", "") | |
| if text: | |
| (tw, th), bl = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) | |
| ty = max(y1 - 10, th + 10) | |
| # Draw text background | |
| cv2.rectangle( | |
| img_bgr, | |
| (x1, ty - th - bl - 4), | |
| (x1 + tw + 6, ty + bl - 4), | |
| (0, 180, 0), # Darker green | |
| -1 | |
| ) | |
| # Draw text | |
| cv2.putText( | |
| img_bgr, | |
| text, | |
| (x1 + 3, ty - bl - 2), | |
| cv2.FONT_HERSHEY_SIMPLEX, | |
| 0.5, | |
| (255, 255, 255), | |
| 1 | |
| ) | |
| # Convert back to RGB | |
| img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) | |
| # Return in requested format | |
| if return_format.lower() == "pil": | |
| return Image.fromarray(img_rgb) | |
| else: | |
| return img_rgb | |