predict / app /utils /ocr.py
Maulidaaa's picture
Upload 21 files
030432c verified
raw
history blame contribute delete
670 Bytes
import numpy as np
from PIL import Image
from io import BytesIO
import re
import logging
import easyocr
ocr_reader = easyocr.Reader(['en'], gpu=False)
def extract_text_from_image(image_file):
try:
image = Image.open(BytesIO(image_file.read())).convert("RGB")
image_np = np.array(image)
results = ocr_reader.readtext(image_np)
text = " ".join([res[1] for res in results])
text = re.sub(r'\b(Ingredients|Komposisi|Composition|Bahan|Daftar Bahan)\b', '', text, flags=re.IGNORECASE)
return re.sub(r'[^A-Za-z,.\s-]', '', text).strip()
except Exception as e:
logging.error(f"OCR error: {e}")
return ""