Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import cv2 | |
| import pytesseract | |
| from pytesseract import Output | |
| import numpy as np | |
| def text_detection(img, config="--psm 11 --oem 3"): | |
| data = pytesseract.image_to_data(img, config=config, output_type=Output.DICT) | |
| horizontal_text = [] | |
| vertical_text = [] | |
| for i in range(len(data['text'])): | |
| if int(data['conf'][i]) > 20: | |
| x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i] | |
| text = data['text'][i] | |
| if w > h: | |
| horizontal_text.append(text) | |
| else: | |
| vertical_text.append(text) | |
| return horizontal_text, vertical_text, data | |
| def draw_boxes(img, data): | |
| for i in range(len(data['text'])): | |
| if int(data['conf'][i]) > 20: | |
| x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i] | |
| text = data['text'][i] | |
| cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2) | |
| cv2.putText(img, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) | |
| return img | |
| def word_level_accuracy(data, ground_truth): | |
| ocr_text = ' '.join([text for text in data['text'] if text.strip()]) | |
| gt_words = set(ground_truth.split()) | |
| ocr_words = set(ocr_text.split()) | |
| correct = gt_words.intersection(ocr_words) | |
| return (len(correct) / len(gt_words)) * 100 if gt_words else 0 | |
| def character_level_accuracy(data, ground_truth): | |
| ocr_text = ''.join([text.strip() for text in data['text']]) | |
| gt_chars = set(ground_truth.replace(" ", "")) | |
| ocr_chars = set(ocr_text.replace(" ", "")) | |
| correct = gt_chars.intersection(ocr_chars) | |
| return (len(correct) / len(gt_chars)) * 100 if gt_chars else 0 | |
| def process(image, ground_truth): | |
| if image is None: | |
| return None, "Please upload an image." | |
| # Convert to NumPy array if it's a PIL Image | |
| if not isinstance(image, np.ndarray): | |
| image = np.array(image) | |
| # Convert RGB to BGR for OpenCV | |
| img_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) | |
| # Run OCR and accuracy checks | |
| h_text, v_text, data = text_detection(img_bgr) | |
| word_acc = word_level_accuracy(data, ground_truth) | |
| char_acc = character_level_accuracy(data, ground_truth) | |
| # Draw boxes and convert back to RGB for display | |
| result_img = draw_boxes(img_bgr.copy(), data) | |
| result_img_rgb = cv2.cvtColor(result_img, cv2.COLOR_BGR2RGB) | |
| results = f"**Horizontal Text**: {' '.join(h_text) if h_text else 'None'}\n\n" | |
| results += f"**Vertical Text**: {' '.join(v_text) if v_text else 'None'}\n\n" | |
| results += f"**Word-Level Accuracy**: {word_acc:.2f}%\n" | |
| results += f"**Character-Level Accuracy**: {char_acc:.2f}%" | |
| return result_img_rgb, results | |
| demo = gr.Interface( | |
| fn=process, | |
| inputs=[ | |
| gr.Image(type="numpy", label="Upload Image"), | |
| gr.Textbox(lines=4, placeholder="Enter ground truth text here", label="Ground Truth") | |
| ], | |
| outputs=[ | |
| gr.Image(type="numpy", label="Detected Text with Bounding Boxes"), | |
| gr.Markdown() | |
| ], | |
| title="OCR Accuracy Evaluator with Bounding Boxes", | |
| description="Upload an image and ground truth text to evaluate Tesseract OCR accuracy by word and character. Bounding boxes are drawn around detected text." | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |