Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pytesseract | |
| from PIL import Image, ImageDraw | |
| def calculate_iou(box1, box2): | |
| xA = max(box1[0], box2[0]) | |
| yA = max(box1[1], box2[1]) | |
| xB = min(box1[0] + box1[2], box2[0] + box2[2]) | |
| yB = min(box1[1] + box1[3], box2[1] + box2[3]) | |
| intersection_area = max(0, xB - xA) * max(0, yB - yA) | |
| box1_area = box1[2] * box1[3] | |
| box2_area = box2[2] * box2[3] | |
| iou = intersection_area / float(box1_area + box2_area - intersection_area) | |
| return iou | |
| def extract_text_and_boxes(image): | |
| data = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT) | |
| boxes_and_words = [] | |
| for i in range(len(data['text'])): | |
| if data['text'][i].strip() != '': | |
| x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i] | |
| word = data['text'][i] | |
| boxes_and_words.append({'box': (x, y, w, h), 'word': word}) | |
| # Remove overlapping boxes based on IoU | |
| to_remove = set() | |
| for i in range(len(boxes_and_words)): | |
| for j in range(i + 1, len(boxes_and_words)): | |
| box1 = boxes_and_words[i]['box'] | |
| box2 = boxes_and_words[j]['box'] | |
| iou = calculate_iou(box1, box2) | |
| if iou > 0.5: | |
| # Remove the box with the smaller area | |
| area1 = box1[2] * box1[3] | |
| area2 = box2[2] * box2[3] | |
| if area1 > area2: | |
| to_remove.add(j) | |
| else: | |
| to_remove.add(i) | |
| filtered_boxes_and_words = [bw for i, bw in enumerate(boxes_and_words) if i not in to_remove] | |
| # Draw the filtered boxes | |
| draw = ImageDraw.Draw(image) | |
| for bw in filtered_boxes_and_words: | |
| x, y, w, h = bw['box'] | |
| draw.rectangle([x, y, x + w, y + h], outline='red', width=2) | |
| return image, filtered_boxes_and_words | |
| iface = gr.Interface(fn=extract_text_and_boxes, | |
| inputs=gr.Image(type='pil'), | |
| outputs=[gr.Image(type='pil', label="Image with Bounding Boxes"), | |
| gr.JSON(label="Extracted Words and Boxes")], | |
| title="Test Tesseract", | |
| description="Test PyTesseract.", | |
| examples=[ | |
| ["examples/0OO7hzT.jpg"], | |
| ["examples/AMzONbY.jpg"], | |
| ["examples/eHKLg23.jpg"], | |
| ["examples/GXEadxE.jpg"], | |
| ["examples/HiuFcCq.jpg"], | |
| ["examples/aDsFny6.jpg"], | |
| ["examples/iF0qg3H.jpg"], | |
| ]) | |
| iface.launch() | |