Spaces:

MohammedHamdy32
/

Information_extraction_from_IDs

Sleeping

File size: 2,553 Bytes

77f8d5f

from ultralytics import YOLO
from glob import glob
import matplotlib.pyplot as plt
import cv2
import os
from PIL import Image
from ultralytics.engine.results import Results
import numpy as np


class detection:

    def __init__(self,model_path='detection.pt'):
        current_dir = os.path.dirname(os.path.abspath(__file__))
        model_path = os.path.join(current_dir , model_path )
        self.model = YOLO(model_path)

    def get_distance(self,res):
        boxes = res[0].boxes.xywh.numpy()  # Convert to numpy array
        # Sort primarily by Y (vertical), then X (horizontal) using lexsort
        sorted_indices = np.lexsort((boxes[:, 0], boxes[:, 1]))
        sorted_boxes = boxes[sorted_indices]
        return sorted_boxes[:, 1], sorted_indices  # Return sorted Y values and indices

    def handle_the_boxes(self,res, img, y_threshold=30):
        distance_sorted, sorted_indices = self.get_distance(res)
        PB = res[0].boxes.xyxy.numpy()[sorted_indices]  # Get boxes in sorted order
        same_object = []
        current_line = [PB[0]]

        # Group boxes into lines using Y threshold
        for i in range(1, len(PB)):
            prev_y = current_line[-1][1]  # Use ymin from XYXY format
            current_y = PB[i][1]
            if abs(current_y - prev_y) > y_threshold:
                # Sort line left-to-right before adding
                current_line = sorted(current_line, key=lambda x: x[0] , reverse=True)
                same_object.append(current_line)
                current_line = [PB[i]]
            else:
                current_line.append(PB[i])

        # Add the last line and sort it
        if current_line:
            current_line = sorted(current_line, key=lambda x: x[0])
            same_object.append(current_line)

        # Extract word images in final order
        return [
            [self.words_pixels(img, box) for box in line]
            for line in same_object
        ]

    # Keep words_pixels as original
    def words_pixels(self,img, xyxy):
        xmin, ymin, xmax, ymax = xyxy.tolist()
        return img[int(ymin):int(ymax)+1, int(xmin):int(xmax)+1]

    def full_pipeline(self,image,show=False):

        if isinstance(image, str):  # If the input is a file path
            img  = cv2.imread(image)
        elif isinstance(image, np.ndarray):  # If the input is a NumPy array
            image = image
            img = image
    
        res = self.model(image)

        if show:
            res[0].show()


        return  self.handle_the_boxes(res , img)