Spaces:
Sleeping
Sleeping
| import gradio as gr, pytesseract, cv2, numpy as np, os | |
| from PIL import Image, ImageDraw | |
| def process(image_path: str, lang: str) -> list[Image.Image, str]: | |
| if not image_path: | |
| return [] | |
| with Image.open(image_path).convert('RGB') as image_data: | |
| os.remove(image_path) | |
| gray = cv2.cvtColor(np.array(image_data), cv2.COLOR_BGR2GRAY) | |
| _,threshold = cv2.threshold(gray, 127, 255, cv2.THRESH_TOZERO) | |
| data = pytesseract.image_to_data(threshold, output_type=pytesseract.Output.DICT, lang=lang) | |
| boxes_and_words = [] | |
| for i in range(len(data['text'])): | |
| # Process based on available text and higher confidence score | |
| if data['text'][i].strip() != '' and data['conf'][i] > 50: | |
| x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i] | |
| word = data['text'][i] | |
| boxes_and_words.append({'box': (x, y, w, h), 'word': word}) | |
| # Remove overlapping boxes based on IoU | |
| to_remove = set() | |
| for i in range(len(boxes_and_words)): | |
| for j in range(i + 1, len(boxes_and_words)): | |
| box1 = boxes_and_words[i]['box'] | |
| box2 = boxes_and_words[j]['box'] | |
| iou = calculate_iou(box1, box2) | |
| if iou > 0.5: | |
| # Remove the box with the smaller area | |
| area1 = box1[2] * box1[3] | |
| area2 = box2[2] * box2[3] | |
| if area1 > area2: | |
| to_remove.add(j) | |
| else: | |
| to_remove.add(i) | |
| filtered_boxes_and_words = [bw for i, bw in enumerate(boxes_and_words) if i not in to_remove] | |
| # Sort the text data based on bbox coordinates | |
| filtered_boxes_and_words.sort(key=lambda bw: (bw['box'][1], bw['box'][0])) | |
| # Group words into lines | |
| lines = [] | |
| current_line = [] | |
| current_top = -1 | |
| for bw in filtered_boxes_and_words: | |
| x, y, w, h = bw['box'] | |
| if current_top == -1 or y > current_top + h: | |
| if current_line: | |
| lines.append(current_line) | |
| current_line = [bw] | |
| current_top = y | |
| else: | |
| current_line.append(bw) | |
| if current_line: | |
| lines.append(current_line) | |
| sorted_text = '' | |
| for line in lines: | |
| line.sort(key=lambda bw: bw['box'][0]) | |
| line_text = " ".join([bw['word'] for bw in line]) | |
| sorted_text += line_text + "\n" | |
| draw = ImageDraw.Draw(image_data) | |
| for bw in filtered_boxes_and_words: | |
| x, y, w, h = bw['box'] | |
| draw.rectangle([x, y, x + w, y + h], outline='yellow', width=2) | |
| return [image_data, sorted_text.strip()] | |
| def calculate_iou(box1, box2): | |
| xA = max(box1[0], box2[0]) | |
| yA = max(box1[1], box2[1]) | |
| xB = min(box1[0] + box1[2], box2[0] + box2[2]) | |
| yB = min(box1[1] + box1[3], box2[1] + box2[3]) | |
| intersection_area = max(0, xB - xA) * max(0, yB - yA) | |
| box1_area = box1[2] * box1[3] | |
| box2_area = box2[2] * box2[3] | |
| iou = intersection_area / float(box1_area + box2_area - intersection_area) | |
| return iou | |
| languages = pytesseract.get_languages() | |
| iface = gr.Interface( | |
| fn=process, | |
| inputs=[gr.Image(label='Upload Image', type='filepath'), gr.Dropdown(label="Select Language", choices=languages, type="value")], | |
| outputs=[gr.Image(type='filepath', label="Image with Bounding Boxes"), gr.Textbox(label='Output Text')], | |
| css="footer {visibility: hidden}", | |
| title="OCR | PyTesseract with bbox", | |
| article = """<p style='text-align: center;'>Hello, thanks for coming, visit: <a href="https://www.genelify.com" target="_blank">Genelify</a>, <a href="https://www.tubtic.com" target="_blank">Tubtic</a></p>""" | |
| ) | |
| iface.launch(show_api=False, inline=False) |