IDs_demo / detection /detection.py
MohammedHamdy32's picture
test
7b7f574
from ultralytics import YOLO
from glob import glob
import matplotlib.pyplot as plt
import cv2
import os
from PIL import Image
from ultralytics.engine.results import Results
import numpy as np
class detection:
def __init__(self,model_path='../detection.pt'):
current_dir = os.path.dirname(os.path.abspath(__file__))
model_path = os.path.join(current_dir , model_path )
self.model = YOLO(model_path)
def get_distance(self,res):
boxes = res[0].boxes.xywh.numpy() # Convert to numpy array
# Sort primarily by Y (vertical), then X (horizontal) using lexsort
sorted_indices = np.lexsort((boxes[:, 0], boxes[:, 1]))
sorted_boxes = boxes[sorted_indices]
return sorted_boxes[:, 1], sorted_indices # Return sorted Y values and indices
def handle_the_boxes(self,res, img, y_threshold=30):
distance_sorted, sorted_indices = self.get_distance(res)
PB = res[0].boxes.xyxy.numpy()[sorted_indices] # Get boxes in sorted order
same_object = []
current_line = [PB[0]]
# Group boxes into lines using Y threshold
for i in range(1, len(PB)):
prev_y = current_line[-1][1] # Use ymin from XYXY format
current_y = PB[i][1]
if abs(current_y - prev_y) > y_threshold:
# Sort line left-to-right before adding
current_line = sorted(current_line, key=lambda x: x[0] , reverse=True)
same_object.append(current_line)
current_line = [PB[i]]
else:
current_line.append(PB[i])
# Add the last line and sort it
if current_line:
current_line = sorted(current_line, key=lambda x: x[0])
same_object.append(current_line)
# Extract word images in final order
return [
[self.words_pixels(img, box) for box in line]
for line in same_object
]
# Keep words_pixels as original
def words_pixels(self,img, xyxy):
xmin, ymin, xmax, ymax = xyxy.tolist()
return img[int(ymin):int(ymax)+1, int(xmin):int(xmax)+1]
def full_pipeline(self,image,show=False):
if isinstance(image, str): # If the input is a file path
current_dir = os.path.dirname(os.path.abspath(__file__))
image = os.path.join(current_dir , image )
img = cv2.imread(image)
elif isinstance(image, np.ndarray): # If the input is a NumPy array
image = image
img = image
res = self.model(image)
if show:
res[0].show()
return self.handle_the_boxes(res , img)
det = detection()
det.full_pipeline('id_1.png')