import gradio as gr, pytesseract, cv2, numpy as np, os from PIL import Image, ImageDraw def process(image_path: str, lang: str) -> list[Image.Image, str]: if not image_path: return [] with Image.open(image_path).convert('RGB') as image_data: os.remove(image_path) gray = cv2.cvtColor(np.array(image_data), cv2.COLOR_BGR2GRAY) _,threshold = cv2.threshold(gray, 127, 255, cv2.THRESH_TOZERO) data = pytesseract.image_to_data(threshold, output_type=pytesseract.Output.DICT, lang=lang) boxes_and_words = [] for i in range(len(data['text'])): # Process based on available text and higher confidence score if data['text'][i].strip() != '' and data['conf'][i] > 50: x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i] word = data['text'][i] boxes_and_words.append({'box': (x, y, w, h), 'word': word}) # Remove overlapping boxes based on IoU to_remove = set() for i in range(len(boxes_and_words)): for j in range(i + 1, len(boxes_and_words)): box1 = boxes_and_words[i]['box'] box2 = boxes_and_words[j]['box'] iou = calculate_iou(box1, box2) if iou > 0.5: # Remove the box with the smaller area area1 = box1[2] * box1[3] area2 = box2[2] * box2[3] if area1 > area2: to_remove.add(j) else: to_remove.add(i) filtered_boxes_and_words = [bw for i, bw in enumerate(boxes_and_words) if i not in to_remove] # Sort the text data based on bbox coordinates filtered_boxes_and_words.sort(key=lambda bw: (bw['box'][1], bw['box'][0])) # Group words into lines lines = [] current_line = [] current_top = -1 for bw in filtered_boxes_and_words: x, y, w, h = bw['box'] if current_top == -1 or y > current_top + h: if current_line: lines.append(current_line) current_line = [bw] current_top = y else: current_line.append(bw) if current_line: lines.append(current_line) sorted_text = '' for line in lines: line.sort(key=lambda bw: bw['box'][0]) line_text = " ".join([bw['word'] for bw in line]) sorted_text += line_text + "\n" draw = ImageDraw.Draw(image_data) for bw in filtered_boxes_and_words: x, y, w, h = bw['box'] draw.rectangle([x, y, x + w, y + h], outline='yellow', width=2) return [image_data, sorted_text.strip()] def calculate_iou(box1, box2): xA = max(box1[0], box2[0]) yA = max(box1[1], box2[1]) xB = min(box1[0] + box1[2], box2[0] + box2[2]) yB = min(box1[1] + box1[3], box2[1] + box2[3]) intersection_area = max(0, xB - xA) * max(0, yB - yA) box1_area = box1[2] * box1[3] box2_area = box2[2] * box2[3] iou = intersection_area / float(box1_area + box2_area - intersection_area) return iou languages = pytesseract.get_languages() iface = gr.Interface( fn=process, inputs=[gr.Image(label='Upload Image', type='filepath'), gr.Dropdown(label="Select Language", choices=languages, type="value")], outputs=[gr.Image(type='filepath', label="Image with Bounding Boxes"), gr.Textbox(label='Output Text')], css="footer {visibility: hidden}", title="OCR | PyTesseract with bbox", article = """

Hello, thanks for coming, visit: Genelify, Tubtic

""" ) iface.launch(show_api=False, inline=False)