RT-Detr-ArabicLayoutAnalysisRrr

Sleeping

File size: 3,675 Bytes

from ultralytics import YOLO
import gradio as gr
from huggingface_hub import snapshot_download
from PIL import Image, ImageDraw, ImageFont
import cv2
import numpy as np
hoppa = {
    "0": "٠",
    "1": "١",
    "2": "٢",
    "3": "٣",
    "4": "٤",
    "5": "٥",
    "6": "٦",
    "7": "٧",
    "8":"٨",
    "9":"٩", 
    "Beh": "ب", 
    "Daad": "ض",
    "Een": "ع", 
    "F": "ف",
    "Heeh": "هه",
    "Kaaf": "ك",
    "Laam": "ل",
    "License Plate": "",
    "Meem": "م",
    "Noon": "ن",
    "Q": "ق",
    "R": "ر", 
    "Saad": "ص", 
    "Seen": "س",
    "Taa": "ط", 
    "Wow": "و",
    "Yeeh": "ي",
    "Zah": "ظ",
    "Zeen": "ز", 
    "alef": "أ",
    "car": "",
    "daal": "د",
    "geem": "ج",
    "ghayn": "غ",
    "khaa": "خ",
    "sheen": "ش",
    "teh": "ت",
    "theh": "ث",
    "zaal": "ذ",
    "7aah" : "ح"
}

# Model path
model_dir = snapshot_download("omarelsayeed/licence_plates") + "/license_yolo8s_1024.pt"
# YOLO model initialization
model = YOLO(model_dir)

def predict_and_draw(img, conf_threshold, iou_threshold):
    """
    Predict objects in an image, annotate with bounding boxes and markers,
    and generate a concatenated class string.
    """
    # Convert Gradio input to numpy array
    image = np.array(img)

    # Predict with YOLO
    results = model.predict(image, conf=conf_threshold, iou=iou_threshold, verbose=False ,imgsz=1024,)
    
    if not results:
        return img, "No objects detected."
    
    # Extract bounding boxes, classes, and sort them right to left
    boxes_and_classes = []
    for box, cls in zip(results[0].boxes.xyxy, results[0].boxes.cls):
        x_min, y_min, x_max, y_max = map(int, box[:4])
        class_name = model.names[int(cls)]
        boxes_and_classes.append((x_max, y_min, class_name, (x_min, y_min, x_max, y_max)))

    # Sort boxes by x_max (right-to-left order)
    boxes_and_classes.sort(key=lambda b: b[0], reverse=True)

    # Extract and process classes
    processed_classes = []
    for _, _, cls, _ in boxes_and_classes:
        if cls.isalpha():
            processed_classes.append(hoppa.get(cls, cls))
        elif not cls.isdigit():
            processed_classes.append(cls)
        else:
            processed_classes.append(cls)

    # Join letters with '-' and digits separately
    concatenated_classes = '-'.join([cls for cls in processed_classes if cls.isalpha()])
    digits = ''.join([cls for cls in processed_classes if cls.isdigit()])
    digits = ''.join([hoppa[str(c)] for c in digits])
    digits = digits[::-1]
    if digits:
        concatenated_classes += f'-{digits}' if concatenated_classes else digits

    # Annotate image
    annotated_image = Image.fromarray(image)
    draw = ImageDraw.Draw(annotated_image)

    for _, _, _, (x_min, y_min, x_max, y_max) in boxes_and_classes:
        # Draw bounding box
        draw.rectangle([x_min, y_min, x_max, y_max], outline="green", width=2)
        # Mark the top-right corner
        draw.ellipse([x_max - 5, y_min - 5, x_max + 5, y_min + 5], fill="red")

    return annotated_image, concatenated_classes

# Gradio interface
iface = gr.Interface(
    fn=predict_and_draw,
    inputs=[
        gr.Image(type="pil", label="Upload Image"),
        gr.Slider(minimum=0, maximum=1, value=0.25, label="Confidence threshold"),
        gr.Slider(minimum=0, maximum=1, value=0.45, label="IoU threshold")
    ],
    outputs=[
        gr.Image(type="pil", label="Annotated Image"),
        gr.Textbox(label="Concatenated Classes")
    ],
    title="License Plate Recognition.",
    description="Upload an image to detect license plates."
)

# Launch the interface
iface.launch()