Spaces:
Sleeping
Sleeping
| """ | |
| ocr.py — Optimized EasyOCR wrapper for Bill/Invoice Scanner. | |
| Enabled for GPU acceleration on NVIDIA GTX 1650. | |
| Part of the production-grade bill_scanner package. | |
| """ | |
| import logging | |
| import easyocr | |
| import os | |
| # Suppress verbose easyocr/torch logs | |
| # os.environ["OMP_NUM_THREADS"] = "1" # Optional CPU threading optimization | |
| logging.getLogger("easyocr").setLevel(logging.ERROR) | |
| _reader_instance = None | |
| def _get_reader(): | |
| global _reader_instance | |
| if _reader_instance is None: | |
| # Initializing EasyOCR Reader with GPU=True for production scale-up | |
| try: | |
| _reader_instance = easyocr.Reader(['en'], gpu=True) | |
| print("INFO: EasyOCR initialized with GPU acceleration.") | |
| except Exception as e: | |
| print(f"WARNING: GPU initialization failed, falling back to CPU. Error: {e}") | |
| _reader_instance = easyocr.Reader(['en'], gpu=False) | |
| return _reader_instance | |
| class OCRScanner: | |
| def extract_text(self, image_path): | |
| """ | |
| Extends the OCR functionality using EasyOCR with GPU acceleration. | |
| Returns extracted text as a newline-joined string. | |
| """ | |
| try: | |
| reader = _get_reader() | |
| # readtext returns List[Tuple(bbox, text, confidence)] | |
| results = reader.readtext(image_path) | |
| if not results: | |
| return "" | |
| # Simple top-to-bottom text joining | |
| texts = [res[1] for res in results] | |
| return "\n".join(texts) | |
| except Exception as e: | |
| print(f"EasyOCR Error during extraction: {e}") | |
| return "" | |