Helpful_AI / backend /text_editor /ocr_engine.py
Kaushal05's picture
Upload 90 files
6c07fdf verified
Raw
History Blame Contribute Delete
1.99 kB
import numpy as np
try:
import easyocr
EASYOCR_AVAILABLE = True
except ImportError:
EASYOCR_AVAILABLE = False
_reader = None
def get_ocr_reader():
"""Lazy initializer for EasyOCR reader."""
global _reader
if not EASYOCR_AVAILABLE:
return None
if _reader is None:
try:
# Initialize with English and Hindi, CPU only for maximum compatibility
_reader = easyocr.Reader(['hi', 'en'], gpu=False)
except Exception as e:
print(f"Error initializing EasyOCR: {e}")
return None
return _reader
def detect_text_in_image(img_bgr: np.ndarray) -> list:
"""
Detect all text strings and their bounding boxes in the BGR image.
Returns:
A list of dictionaries:
[
{
"text": str,
"bbox": (x, y, w, h),
"confidence": float
}
]
"""
reader = get_ocr_reader()
if reader is None:
print("OCR Engine not available. Skipping automatic text detection.")
return []
try:
# EasyOCR requires RGB or BGR numpy array
# It returns: [([[x0,y0], [x1,y1], [x2,y2], [x3,y3]], text, confidence), ...]
raw_results = reader.readtext(img_bgr)
processed_results = []
for coords, text, confidence in raw_results:
# Map four-point coordinates to standard (x, y, w, h) bounding box
xs = [p[0] for p in coords]
ys = [p[1] for p in coords]
x = int(min(xs))
y = int(min(ys))
w = int(max(xs) - x)
h = int(max(ys) - y)
processed_results.append({
"text": text.strip(),
"bbox": (x, y, w, h),
"confidence": float(confidence)
})
return processed_results
except Exception as e:
print(f"OCR detection encountered an error: {e}")
return []