Spaces:
Sleeping
Sleeping
File size: 897 Bytes
6034171 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
# File: ocr_processor.py
import numpy as np
from paddleocr import PaddleOCR
from PIL import Image
import io
# Initialize PaddleOCR with modern, compatible settings
ocr = PaddleOCR(lang='en', use_angle_cls=True)
def extract_text_from_image(image_bytes: bytes) -> str:
"""
Performs OCR on a given image using PaddleOCR.
"""
try:
# 1. Convert bytes to PIL Image
img = Image.open(io.BytesIO(image_bytes))
img = img.convert("RGB")
img_array = np.array(img)
# 2. Run OCR
result = ocr.ocr(img_array)
# 3. Extract and combine the recognized text
if result and result[0]:
text_lines = [line[1][0] for line in result[0]]
return " ".join(text_lines)
else:
return "No text detected in the image."
except Exception as e:
return f"An error occurred during OCR: {str(e)}" |