DIVYANSHI SINGH
Root project layout configured for deployment
b0bec61
"""
ocr.py — Optimized EasyOCR wrapper for Bill/Invoice Scanner.
Enabled for GPU acceleration on NVIDIA GTX 1650.
Part of the production-grade bill_scanner package.
"""
import logging
import easyocr
import os
# Suppress verbose easyocr/torch logs
# os.environ["OMP_NUM_THREADS"] = "1" # Optional CPU threading optimization
logging.getLogger("easyocr").setLevel(logging.ERROR)
_reader_instance = None
def _get_reader():
global _reader_instance
if _reader_instance is None:
# Initializing EasyOCR Reader with GPU=True for production scale-up
try:
_reader_instance = easyocr.Reader(['en'], gpu=True)
print("INFO: EasyOCR initialized with GPU acceleration.")
except Exception as e:
print(f"WARNING: GPU initialization failed, falling back to CPU. Error: {e}")
_reader_instance = easyocr.Reader(['en'], gpu=False)
return _reader_instance
class OCRScanner:
def extract_text(self, image_path):
"""
Extends the OCR functionality using EasyOCR with GPU acceleration.
Returns extracted text as a newline-joined string.
"""
try:
reader = _get_reader()
# readtext returns List[Tuple(bbox, text, confidence)]
results = reader.readtext(image_path)
if not results:
return ""
# Simple top-to-bottom text joining
texts = [res[1] for res in results]
return "\n".join(texts)
except Exception as e:
print(f"EasyOCR Error during extraction: {e}")
return ""