Spaces:

AI-DrivenTesting
/

CU1-X

Sleeping

App Files Files Community

CU1-X / detection /ocr_handler.py

AI-DrivenTesting

init

77da9e2 about 1 month ago

raw

history blame contribute delete

4.48 kB

	"""
	OCR Handler - OCR-only Processing

	This module provides OCR-only functionality that bypasses the full detection pipeline.
	Useful for cases where you only need text extraction without RF-DETR/CLIP analysis.
	"""

	import torch
	import cv2
	import numpy as np
	from PIL import Image
	from typing import Union, List, Dict, Tuple
	from pathlib import Path
	import easyocr

	from detection.image_utils import load_image


	def process_ocr_only(
	image: Union[str, Path, np.ndarray, Image.Image],
	gpu: bool = None
	) -> List[Dict]:
	"""
	Run OCR across the full image and return detections

	This bypasses RF-DETR/CLIP and runs EasyOCR directly on the image.

	Args:
	image: Input image (path, PIL Image, or numpy array)
	gpu: Whether to use GPU. If None, auto-detects CUDA availability.

	Returns:
	List of detections with keys:
	- box: Dict with x1, y1, x2, y2 coordinates
	- confidence: OCR confidence score (float)
	- class_id: None (no classification)
	- class_name: "" (no classification)
	- text: Extracted text string
	- description: "" (no description)
	"""
	# Load image
	img_array = load_image(image)

	# Initialize OCR reader
	if gpu is None:
	gpu = torch.cuda.is_available()
	reader = easyocr.Reader(['en', 'fr'], gpu=gpu)

	# Run OCR - detail=1 returns [ [ (x,y)...4 points ], text, conf ]
	ocr_results = reader.readtext(img_array, detail=1)

	# Convert to standard detection format
	detections = []
	for entry in ocr_results:
	if not isinstance(entry, (list, tuple)) or len(entry) < 3:
	continue
	quad, text, conf = entry[0], entry[1], entry[2]
	if not isinstance(text, str) or not text.strip():
	continue

	# Convert quadrilateral to bounding box
	xs = [p[0] for p in quad]
	ys = [p[1] for p in quad]
	box = {
	"x1": float(int(min(xs))),
	"y1": float(int(min(ys))),
	"x2": float(int(max(xs))),
	"y2": float(int(max(ys)))
	}

	detections.append({
	"box": box,
	"confidence": float(conf) if conf is not None else 1.0,
	"class_id": None,
	"class_name": "",
	"text": text.strip(),
	"description": ""
	})

	return detections


	def annotate_ocr_detections(
	image: Union[str, Path, np.ndarray, Image.Image],
	detections: List[Dict],
	thickness: int = 2,
	return_format: str = "pil"
	) -> Union[Image.Image, np.ndarray]:
	"""
	Annotate image with OCR detection boxes and text labels

	Args:
	image: Input image (path, PIL Image, or numpy array)
	detections: List of detections from process_ocr_only()
	thickness: Line thickness for bounding boxes
	return_format: "pil" for PIL Image or "numpy" for numpy array

	Returns:
	Annotated image as PIL Image or numpy array
	"""
	# Load image
	img_array = load_image(image)

	# Convert to BGR for OpenCV
	img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)

	# Draw each detection
	for det in detections:
	x1 = int(det["box"]["x1"])
	y1 = int(det["box"]["y1"])
	x2 = int(det["box"]["x2"])
	y2 = int(det["box"]["y2"])

	# Draw bounding box
	cv2.rectangle(img_bgr, (x1, y1), (x2, y2), (0, 255, 0), thickness)

	# Draw text label
	text = det.get("text", "")
	if text:
	(tw, th), bl = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
	ty = max(y1 - 10, th + 10)

	# Draw text background
	cv2.rectangle(
	img_bgr,
	(x1, ty - th - bl - 4),
	(x1 + tw + 6, ty + bl - 4),
	(0, 180, 0), # Darker green
	-1
	)

	# Draw text
	cv2.putText(
	img_bgr,
	text,
	(x1 + 3, ty - bl - 2),
	cv2.FONT_HERSHEY_SIMPLEX,
	0.5,
	(255, 255, 255),
	1
	)

	# Convert back to RGB
	img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)

	# Return in requested format
	if return_format.lower() == "pil":
	return Image.fromarray(img_rgb)
	else:
	return img_rgb