Spaces:
Sleeping
Sleeping
| from ultralytics import YOLO | |
| from glob import glob | |
| import matplotlib.pyplot as plt | |
| import cv2 | |
| import os | |
| from PIL import Image | |
| from ultralytics.engine.results import Results | |
| import numpy as np | |
| class detection: | |
| def __init__(self,model_path='../detection.pt'): | |
| current_dir = os.path.dirname(os.path.abspath(__file__)) | |
| model_path = os.path.join(current_dir , model_path ) | |
| self.model = YOLO(model_path) | |
| def get_distance(self,res): | |
| boxes = res[0].boxes.xywh.numpy() # Convert to numpy array | |
| # Sort primarily by Y (vertical), then X (horizontal) using lexsort | |
| sorted_indices = np.lexsort((boxes[:, 0], boxes[:, 1])) | |
| sorted_boxes = boxes[sorted_indices] | |
| return sorted_boxes[:, 1], sorted_indices # Return sorted Y values and indices | |
| def handle_the_boxes(self,res, img, y_threshold=30): | |
| distance_sorted, sorted_indices = self.get_distance(res) | |
| PB = res[0].boxes.xyxy.numpy()[sorted_indices] # Get boxes in sorted order | |
| same_object = [] | |
| current_line = [PB[0]] | |
| # Group boxes into lines using Y threshold | |
| for i in range(1, len(PB)): | |
| prev_y = current_line[-1][1] # Use ymin from XYXY format | |
| current_y = PB[i][1] | |
| if abs(current_y - prev_y) > y_threshold: | |
| # Sort line left-to-right before adding | |
| current_line = sorted(current_line, key=lambda x: x[0] , reverse=True) | |
| same_object.append(current_line) | |
| current_line = [PB[i]] | |
| else: | |
| current_line.append(PB[i]) | |
| # Add the last line and sort it | |
| if current_line: | |
| current_line = sorted(current_line, key=lambda x: x[0]) | |
| same_object.append(current_line) | |
| # Extract word images in final order | |
| return [ | |
| [self.words_pixels(img, box) for box in line] | |
| for line in same_object | |
| ] | |
| # Keep words_pixels as original | |
| def words_pixels(self,img, xyxy): | |
| xmin, ymin, xmax, ymax = xyxy.tolist() | |
| return img[int(ymin):int(ymax)+1, int(xmin):int(xmax)+1] | |
| def full_pipeline(self,image,show=False): | |
| if isinstance(image, str): # If the input is a file path | |
| current_dir = os.path.dirname(os.path.abspath(__file__)) | |
| image = os.path.join(current_dir , image ) | |
| img = cv2.imread(image) | |
| elif isinstance(image, np.ndarray): # If the input is a NumPy array | |
| image = image | |
| img = image | |
| res = self.model(image) | |
| if show: | |
| res[0].show() | |
| return self.handle_the_boxes(res , img) | |
| det = detection() | |
| det.full_pipeline('id_1.png') | |