File size: 1,629 Bytes
b0bec61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
"""
ocr.py — Optimized EasyOCR wrapper for Bill/Invoice Scanner.
Enabled for GPU acceleration on NVIDIA GTX 1650.
Part of the production-grade bill_scanner package.
"""

import logging
import easyocr
import os

# Suppress verbose easyocr/torch logs
# os.environ["OMP_NUM_THREADS"] = "1" # Optional CPU threading optimization
logging.getLogger("easyocr").setLevel(logging.ERROR)

_reader_instance = None

def _get_reader():
    global _reader_instance
    if _reader_instance is None:
        # Initializing EasyOCR Reader with GPU=True for production scale-up
        try:
            _reader_instance = easyocr.Reader(['en'], gpu=True)
            print("INFO: EasyOCR initialized with GPU acceleration.")
        except Exception as e:
            print(f"WARNING: GPU initialization failed, falling back to CPU. Error: {e}")
            _reader_instance = easyocr.Reader(['en'], gpu=False)
    return _reader_instance

class OCRScanner:
    def extract_text(self, image_path):
        """
        Extends the OCR functionality using EasyOCR with GPU acceleration.
        Returns extracted text as a newline-joined string.
        """
        try:
            reader = _get_reader()
            # readtext returns List[Tuple(bbox, text, confidence)]
            results = reader.readtext(image_path)
            
            if not results:
                return ""
            
            # Simple top-to-bottom text joining
            texts = [res[1] for res in results]
            return "\n".join(texts)
        except Exception as e:
            print(f"EasyOCR Error during extraction: {e}")
            return ""