import re import logging import easyocr from PIL import Image import numpy as np logger = logging.getLogger(__name__) class EasyOCRModel: """ OCR menggunakan EasyOCR. Lebih cocok untuk screenshot app Gojek/Grab/Shopee. """ def __init__(self, lang: list = ["en", "id"]): print(f"[EasyOCR] Loading model | lang={lang}") # gpu=False kalau tidak punya GPU self.reader = easyocr.Reader(lang, gpu=False) print("[EasyOCR] Model loaded ✅") def extract(self, image: Image.Image) -> str: """Extract raw text dari image.""" img_array = np.array(image) results = self.reader.readtext(img_array) # results = [(bbox, text, confidence), ...] lines = [text for (_, text, conf) in results if conf > 0.3] raw = "\n".join(lines) print(f"[EasyOCR] RAW TEXT:\n{'='*40}\n{raw}\n{'='*40}") return raw def extract_with_confidence(self, image: Image.Image, min_conf: float = 0.3) -> list: """Extract text beserta confidence score.""" img_array = np.array(image) results = self.reader.readtext(img_array) filtered = [ {"text": text, "confidence": round(conf, 2)} for (_, text, conf) in results if conf > min_conf ] print(f"[EasyOCR] Extracted {len(filtered)} text blocks") return filtered def extract_clean(self, image: Image.Image, receipt_type: str = "unknown") -> str: """Extract + clean sekaligus.""" raw = self.extract(image) cleaned = self.clean(raw) print(f"[EasyOCR] CLEANED:\n{'='*40}\n{cleaned}\n{'='*40}") return cleaned def clean(self, text: str) -> str: """Basic cleaning hasil OCR.""" text = re.sub(r'\n{3,}', '\n\n', text) text = re.sub(r'[ \t]{2,}', ' ', text) return text.strip()