Spaces:

ayushsoni155
/

Invoice_IMG_To_JSON

Sleeping

Invoice_IMG_To_JSON / ocr_processor.py

Ayush soni

Add application file

6034171 3 months ago

897 Bytes

	# File: ocr_processor.py
	import numpy as np
	from paddleocr import PaddleOCR
	from PIL import Image
	import io

	# Initialize PaddleOCR with modern, compatible settings
	ocr = PaddleOCR(lang='en', use_angle_cls=True)

	def extract_text_from_image(image_bytes: bytes) -> str:
	"""
	Performs OCR on a given image using PaddleOCR.
	"""
	try:
	# 1. Convert bytes to PIL Image
	img = Image.open(io.BytesIO(image_bytes))
	img = img.convert("RGB")
	img_array = np.array(img)

	# 2. Run OCR
	result = ocr.ocr(img_array)

	# 3. Extract and combine the recognized text
	if result and result[0]:
	text_lines = [line[1][0] for line in result[0]]
	return " ".join(text_lines)
	else:
	return "No text detected in the image."

	except Exception as e:
	return f"An error occurred during OCR: {str(e)}"