Spaces:

GiantAnalytics
/

ArabicOCRExtractor

Sleeping

App Files Files Community

ArabicOCRExtractor / apppaddle.py

GiantAnalytics

Rename app.py to apppaddle.py

f5cb039 verified about 1 year ago

raw

history blame contribute delete

2.5 kB

	import gradio as gr
	import paddle
	from paddleocr import PaddleOCR, draw_ocr
	import cv2
	from PIL import Image
	import numpy as np

	# Initialize PaddleOCR for Arabic
	ocr = PaddleOCR(use_angle_cls=True, lang='ar')

	def ocr_extract_text(image):
	if image is None:
	return "No image provided", None

	# Convert PIL Image to numpy array if needed
	if isinstance(image, Image.Image):
	image = np.array(image)

	# Ensure image is in BGR format for PaddleOCR
	if len(image.shape) == 3 and image.shape[2] == 4: # RGBA
	image = cv2.cvtColor(image, cv2.COLOR_RGBA2BGR)
	elif len(image.shape) == 3 and image.shape[2] == 3: # RGB
	image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

	# Perform OCR on the uploaded image
	result = ocr.ocr(image, cls=True)

	if result[0] is None:
	return "No text detected in the image", image

	# Initialize a list to store detected text
	detected_text = []

	# Loop through the results and extract text
	for line in result[0]: # result[0] contains the OCR results for text regions
	text = line[1][0] # Detected text
	detected_text.append(text)

	# Draw the OCR results on the image
	boxes = [line[0] for line in result[0]] # Bounding box coordinates
	texts = [line[1][0] for line in result[0]] # Detected text
	scores = [line[1][1] for line in result[0]] # Confidence scores

	try:
	annotated_image = draw_ocr(image, boxes, texts, scores)
	except Exception as e:
	print(f"Error annotating image: {e}")
	annotated_image = image

	# Join the detected text with Arabic-compatible line breaks
	text_output = "\n".join(detected_text)

	return text_output, annotated_image

	# Define Gradio interface with Arabic-friendly styling
	css = """
	.output-text { direction: rtl; text-align: right; }
	"""

	iface = gr.Interface(
	fn=ocr_extract_text,
	inputs=gr.Image(type="numpy", label="Upload Image"),
	outputs=[
	gr.Textbox(label="Extracted Arabic Text", elem_classes=["output-text"]),
	gr.Image(label="Annotated Image")
	],
	title="Arabic OCR Extractor",
	description="Upload an Arabic document or image to extract the text using OCR.",
	css=css,
	examples=[], # You can add example images here
	cache_examples=True
	)

	# For Colab and Hugging Face Spaces compatibility
	if __name__ == "__main__":
	iface.launch(debug=True, share=True) # share=True enables Colab public URL