genelify's picture
Update app.py
193d94a verified
import gradio as gr, pytesseract, cv2, numpy as np, os
from PIL import Image, ImageDraw
def process(image_path: str, lang: str) -> list[Image.Image, str]:
if not image_path:
return []
with Image.open(image_path).convert('RGB') as image_data:
os.remove(image_path)
gray = cv2.cvtColor(np.array(image_data), cv2.COLOR_BGR2GRAY)
_,threshold = cv2.threshold(gray, 127, 255, cv2.THRESH_TOZERO)
data = pytesseract.image_to_data(threshold, output_type=pytesseract.Output.DICT, lang=lang)
boxes_and_words = []
for i in range(len(data['text'])):
# Process based on available text and higher confidence score
if data['text'][i].strip() != '' and data['conf'][i] > 50:
x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i]
word = data['text'][i]
boxes_and_words.append({'box': (x, y, w, h), 'word': word})
# Remove overlapping boxes based on IoU
to_remove = set()
for i in range(len(boxes_and_words)):
for j in range(i + 1, len(boxes_and_words)):
box1 = boxes_and_words[i]['box']
box2 = boxes_and_words[j]['box']
iou = calculate_iou(box1, box2)
if iou > 0.5:
# Remove the box with the smaller area
area1 = box1[2] * box1[3]
area2 = box2[2] * box2[3]
if area1 > area2:
to_remove.add(j)
else:
to_remove.add(i)
filtered_boxes_and_words = [bw for i, bw in enumerate(boxes_and_words) if i not in to_remove]
# Sort the text data based on bbox coordinates
filtered_boxes_and_words.sort(key=lambda bw: (bw['box'][1], bw['box'][0]))
# Group words into lines
lines = []
current_line = []
current_top = -1
for bw in filtered_boxes_and_words:
x, y, w, h = bw['box']
if current_top == -1 or y > current_top + h:
if current_line:
lines.append(current_line)
current_line = [bw]
current_top = y
else:
current_line.append(bw)
if current_line:
lines.append(current_line)
sorted_text = ''
for line in lines:
line.sort(key=lambda bw: bw['box'][0])
line_text = " ".join([bw['word'] for bw in line])
sorted_text += line_text + "\n"
draw = ImageDraw.Draw(image_data)
for bw in filtered_boxes_and_words:
x, y, w, h = bw['box']
draw.rectangle([x, y, x + w, y + h], outline='yellow', width=2)
return [image_data, sorted_text.strip()]
def calculate_iou(box1, box2):
xA = max(box1[0], box2[0])
yA = max(box1[1], box2[1])
xB = min(box1[0] + box1[2], box2[0] + box2[2])
yB = min(box1[1] + box1[3], box2[1] + box2[3])
intersection_area = max(0, xB - xA) * max(0, yB - yA)
box1_area = box1[2] * box1[3]
box2_area = box2[2] * box2[3]
iou = intersection_area / float(box1_area + box2_area - intersection_area)
return iou
languages = pytesseract.get_languages()
iface = gr.Interface(
fn=process,
inputs=[gr.Image(label='Upload Image', type='filepath'), gr.Dropdown(label="Select Language", choices=languages, type="value")],
outputs=[gr.Image(type='filepath', label="Image with Bounding Boxes"), gr.Textbox(label='Output Text')],
css="footer {visibility: hidden}",
title="OCR | PyTesseract with bbox",
article = """<p style='text-align: center;'>Hello, thanks for coming, visit: <a href="https://www.genelify.com" target="_blank">Genelify</a>, <a href="https://www.tubtic.com" target="_blank">Tubtic</a></p>"""
)
iface.launch(show_api=False, inline=False)