Invoice-Fraud-Detection / image_ocr.py
Abhisesh7's picture
Update image_ocr.py
78732d8 verified
raw
history blame
1.33 kB
import easyocr
import logging
import numpy as np
from PIL import Image
# Set up logging to suppress unnecessary warnings
logging.getLogger("easyocr").setLevel(logging.ERROR)
def extract_text_from_image(image_input):
"""Extract text from an image using EasyOCR."""
try:
# Initialize EasyOCR reader (English language, CPU mode)
reader = easyocr.Reader(['en'], gpu=False)
# Handle different input types
if isinstance(image_input, str):
# Input is a file path
results = reader.readtext(image_input, detail=0, paragraph=True)
elif isinstance(image_input, (bytes, np.ndarray)):
# Input is bytes or numpy array
results = reader.readtext(image_input, detail=0, paragraph=True)
else:
# Try to open as an image using PIL and convert to numpy array
image = Image.open(image_input).convert('RGB')
image_np = np.array(image)
results = reader.readtext(image_np, detail=0, paragraph=True)
# Combine the extracted text into a single string
text = "\n".join(results)
print("Extracted text from image:\n", text) # Debug: Print extracted text
return text
except Exception as e:
return f"Error extracting text from image: {str(e)}"