Spaces:

Ramzan0553
/

Diagrams_Text_Detection

Runtime error

App Files Files Community

Diagrams_Text_Detection / app.py

Ramzan0553

Update app.py

646700a verified 9 months ago

raw

history blame contribute delete

3.32 kB

	import gradio as gr
	import cv2
	import pytesseract
	from pytesseract import Output
	import numpy as np

	def text_detection(img, config="--psm 11 --oem 3"):
	data = pytesseract.image_to_data(img, config=config, output_type=Output.DICT)
	horizontal_text = []
	vertical_text = []

	for i in range(len(data['text'])):
	if int(data['conf'][i]) > 20:
	x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i]
	text = data['text'][i]
	if w > h:
	horizontal_text.append(text)
	else:
	vertical_text.append(text)
	return horizontal_text, vertical_text, data

	def draw_boxes(img, data):
	for i in range(len(data['text'])):
	if int(data['conf'][i]) > 20:
	x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i]
	text = data['text'][i]
	cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
	cv2.putText(img, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
	return img

	def word_level_accuracy(data, ground_truth):
	ocr_text = ' '.join([text for text in data['text'] if text.strip()])
	gt_words = set(ground_truth.split())
	ocr_words = set(ocr_text.split())
	correct = gt_words.intersection(ocr_words)
	return (len(correct) / len(gt_words)) * 100 if gt_words else 0

	def character_level_accuracy(data, ground_truth):
	ocr_text = ''.join([text.strip() for text in data['text']])
	gt_chars = set(ground_truth.replace(" ", ""))
	ocr_chars = set(ocr_text.replace(" ", ""))
	correct = gt_chars.intersection(ocr_chars)
	return (len(correct) / len(gt_chars)) * 100 if gt_chars else 0

	def process(image, ground_truth):
	if image is None:
	return None, "Please upload an image."

	# Convert to NumPy array if it's a PIL Image
	if not isinstance(image, np.ndarray):
	image = np.array(image)

	# Convert RGB to BGR for OpenCV
	img_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

	# Run OCR and accuracy checks
	h_text, v_text, data = text_detection(img_bgr)
	word_acc = word_level_accuracy(data, ground_truth)
	char_acc = character_level_accuracy(data, ground_truth)

	# Draw boxes and convert back to RGB for display
	result_img = draw_boxes(img_bgr.copy(), data)
	result_img_rgb = cv2.cvtColor(result_img, cv2.COLOR_BGR2RGB)

	results = f"Horizontal Text: {' '.join(h_text) if h_text else 'None'}\n\n"
	results += f"Vertical Text: {' '.join(v_text) if v_text else 'None'}\n\n"
	results += f"Word-Level Accuracy: {word_acc:.2f}%\n"
	results += f"Character-Level Accuracy: {char_acc:.2f}%"

	return result_img_rgb, results

	demo = gr.Interface(
	fn=process,
	inputs=[
	gr.Image(type="numpy", label="Upload Image"),
	gr.Textbox(lines=4, placeholder="Enter ground truth text here", label="Ground Truth")
	],
	outputs=[
	gr.Image(type="numpy", label="Detected Text with Bounding Boxes"),
	gr.Markdown()
	],
	title="OCR Accuracy Evaluator with Bounding Boxes",
	description="Upload an image and ground truth text to evaluate Tesseract OCR accuracy by word and character. Bounding boxes are drawn around detected text."
	)

	if __name__ == "__main__":
	demo.launch()