Spaces:

genelify
/

tesseract-ocr-bbox

Running

File size: 3,961 Bytes

import gradio as gr, pytesseract, cv2, numpy as np, os
from PIL import Image, ImageDraw

def process(image_path: str, lang: str) -> list[Image.Image, str]:

    if not image_path:
        return []

    with Image.open(image_path).convert('RGB') as image_data:
        
        os.remove(image_path)
        gray = cv2.cvtColor(np.array(image_data), cv2.COLOR_BGR2GRAY) 
        _,threshold = cv2.threshold(gray, 127, 255, cv2.THRESH_TOZERO)
        data = pytesseract.image_to_data(threshold, output_type=pytesseract.Output.DICT, lang=lang)
        
        boxes_and_words = []
        for i in range(len(data['text'])):
            # Process based on available text and higher confidence score
            if data['text'][i].strip() != '' and data['conf'][i] > 50:
                x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i]
                word = data['text'][i]
                boxes_and_words.append({'box': (x, y, w, h), 'word': word})
        
        # Remove overlapping boxes based on IoU
        to_remove = set()
        for i in range(len(boxes_and_words)):
            for j in range(i + 1, len(boxes_and_words)):
                box1 = boxes_and_words[i]['box']
                box2 = boxes_and_words[j]['box']
                iou = calculate_iou(box1, box2)
                if iou > 0.5:
                    # Remove the box with the smaller area
                    area1 = box1[2] * box1[3]
                    area2 = box2[2] * box2[3]
                    if area1 > area2:
                        to_remove.add(j)
                    else:
                        to_remove.add(i)
        
        filtered_boxes_and_words = [bw for i, bw in enumerate(boxes_and_words) if i not in to_remove]
        
        # Sort the text data based on bbox coordinates
        filtered_boxes_and_words.sort(key=lambda bw: (bw['box'][1], bw['box'][0]))

        # Group words into lines
        lines = []
        current_line = []
        current_top = -1

        for bw in filtered_boxes_and_words:
            x, y, w, h = bw['box']
            if current_top == -1 or y > current_top + h:
                if current_line:
                    lines.append(current_line)
                current_line = [bw]
                current_top = y
            else:
                current_line.append(bw)

        if current_line:
            lines.append(current_line)

        sorted_text = ''
        for line in lines:
            line.sort(key=lambda bw: bw['box'][0])
            line_text = " ".join([bw['word'] for bw in line])
            sorted_text += line_text + "\n"
    
        draw = ImageDraw.Draw(image_data)
        for bw in filtered_boxes_and_words:
            x, y, w, h = bw['box']
            draw.rectangle([x, y, x + w, y + h], outline='yellow', width=2)
        
        return [image_data, sorted_text.strip()]
    
def calculate_iou(box1, box2):

    xA = max(box1[0], box2[0])
    yA = max(box1[1], box2[1])
    xB = min(box1[0] + box1[2], box2[0] + box2[2])
    yB = min(box1[1] + box1[3], box2[1] + box2[3])

    intersection_area = max(0, xB - xA) * max(0, yB - yA)
    box1_area = box1[2] * box1[3]
    box2_area = box2[2] * box2[3]
    iou = intersection_area / float(box1_area + box2_area - intersection_area)
    return iou

languages = pytesseract.get_languages()

iface = gr.Interface(
    fn=process,
    inputs=[gr.Image(label='Upload Image', type='filepath'), gr.Dropdown(label="Select Language", choices=languages, type="value")],
    outputs=[gr.Image(type='filepath', label="Image with Bounding Boxes"), gr.Textbox(label='Output Text')],
    css="footer {visibility: hidden}",
    title="OCR | PyTesseract with bbox",
    article = """<p style='text-align: center;'>Hello, thanks for coming, visit: <a href="https://www.genelify.com" target="_blank">Genelify</a>, <a href="https://www.tubtic.com" target="_blank">Tubtic</a></p>"""
)
iface.launch(show_api=False, inline=False)