Spaces:

ghadgemadhuri92
/

mathstutor

Running

App Files Files Community

mathstutor / app /core /ocr.py

ghadgemadhuri92

double-firing of requests from UI blocked

ed6c1cd 3 months ago

raw

history blame contribute delete

4.67 kB


	import base64
	import requests
	import io
	import logging
	import numpy as np
	from typing import Optional, List
	from PIL import Image, ImageEnhance, UnidentifiedImageError
	import binascii
	try:
	from paddleocr import PaddleOCR
	except ImportError:
	PaddleOCR = None

	logger = logging.getLogger(__name__)

	class OCRProcessor:
	"""
	Handles OCR text extraction using PaddleOCR and image preprocessing.
	"""

	_os_instance = None # Singleton for OCR engine

	def __init__(self, max_size_bytes: int = 5 * 1024 * 1024): # 5MB limit
	self.max_size = max_size_bytes
	self.ocr_engine = None

	@property
	def engine(self):
	"""Lazy load PaddleOCR engine."""
	if self.ocr_engine is None:
	if PaddleOCR:
	logger.info("Initializing PaddleOCR engine...")
	# deterministic=True ensures consistent results
	self.ocr_engine = PaddleOCR(use_angle_cls=True, lang='en')
	else:
	logger.error("PaddleOCR not installed.")
	return None
	return self.ocr_engine

	def extract_text(self, headers_b64: Optional[str] = None, image_data: Optional[str] = None) -> str:
	"""
	Extract text from base64 image data.
	Arg 'headers_b64' is for backward compat/legacy signature matching if any,
	but we expect 'image_data' (base64 string).

	Args:
	image_data: Base64 string of the image.

	Returns:
	Extracted text string or empty string on failure.
	"""
	# Handle positional args if someone calls extract_text(b64)
	target_b64 = image_data or headers_b64
	if not target_b64:
	return ""

	if not self.engine:
	return "OCR Engine Unavailable"

	try:
	# 1. Decode Base64 to Array
	if ";base64," in target_b64:
	_, target_b64 = target_b64.split(";base64,")

	try:
	img_bytes = base64.b64decode(target_b64)
	img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
	img_arr = np.array(img)
	except (binascii.Error, UnidentifiedImageError, ValueError) as e:
	logger.error(f"Image decoding failed: {e}")
	return f"Error: Invalid image data ({str(e)})"

	# 2. Run OCR
	result = self.engine.ocr(img_arr, cls=True)

	# 3. Parse Results
	extracted_lines = []
	if result and result[0]:
	for line in result[0]:
	text = line[1][0]
	confidence = line[1][1]
	if confidence > 0.5: # Confidence threshold
	extracted_lines.append(text)

	full_text = "\n".join(extracted_lines)
	logger.info(f"OCR extracted {len(full_text)} chars.")
	return full_text

	except Exception as e:
	logger.error(f"OCR Failed: {e}")
	return f"Error reading image: {e}"

	def optimize_base64(self, b64_string: str) -> str:
	"""
	Optimize base64 image: resize to max 1024px and convert to JPEG.
	Returns optimized base64 string.
	"""
	try:
	if ";base64," in b64_string:
	_, data = b64_string.split(";base64,")
	else:
	data = b64_string

	img_data = base64.b64decode(data)
	img = Image.open(io.BytesIO(img_data))

	max_dim = 1024
	if max(img.size) > max_dim:
	img.thumbnail((max_dim, max_dim), Image.Resampling.LANCZOS)

	if img.mode in ('RGBA', 'P'):
	img = img.convert('RGB')

	buffer = io.BytesIO()
	img.save(buffer, format="JPEG", quality=85)
	return base64.b64encode(buffer.getvalue()).decode('utf-8')
	except Exception as e:
	logger.warning(f"Image optimization failed: {e}")
	return b64_string

	def download_image_as_base64(self, url: str) -> Optional[str]:
	"""Download image from URL and return as base64 string."""
	try:
	response = requests.get(url, timeout=10, stream=True)
	response.raise_for_status()
	if len(response.content) > self.max_size:
	return None
	b64 = base64.b64encode(response.content).decode('utf-8')
	return self.optimize_base64(b64)
	except Exception as e:
	logger.error(f"Image download failed: {e}")
	return None