File size: 2,553 Bytes
77f8d5f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
from ultralytics import YOLO
from glob import glob
import matplotlib.pyplot as plt
import cv2
import os
from PIL import Image
from ultralytics.engine.results import Results
import numpy as np
class detection:
def __init__(self,model_path='detection.pt'):
current_dir = os.path.dirname(os.path.abspath(__file__))
model_path = os.path.join(current_dir , model_path )
self.model = YOLO(model_path)
def get_distance(self,res):
boxes = res[0].boxes.xywh.numpy() # Convert to numpy array
# Sort primarily by Y (vertical), then X (horizontal) using lexsort
sorted_indices = np.lexsort((boxes[:, 0], boxes[:, 1]))
sorted_boxes = boxes[sorted_indices]
return sorted_boxes[:, 1], sorted_indices # Return sorted Y values and indices
def handle_the_boxes(self,res, img, y_threshold=30):
distance_sorted, sorted_indices = self.get_distance(res)
PB = res[0].boxes.xyxy.numpy()[sorted_indices] # Get boxes in sorted order
same_object = []
current_line = [PB[0]]
# Group boxes into lines using Y threshold
for i in range(1, len(PB)):
prev_y = current_line[-1][1] # Use ymin from XYXY format
current_y = PB[i][1]
if abs(current_y - prev_y) > y_threshold:
# Sort line left-to-right before adding
current_line = sorted(current_line, key=lambda x: x[0] , reverse=True)
same_object.append(current_line)
current_line = [PB[i]]
else:
current_line.append(PB[i])
# Add the last line and sort it
if current_line:
current_line = sorted(current_line, key=lambda x: x[0])
same_object.append(current_line)
# Extract word images in final order
return [
[self.words_pixels(img, box) for box in line]
for line in same_object
]
# Keep words_pixels as original
def words_pixels(self,img, xyxy):
xmin, ymin, xmax, ymax = xyxy.tolist()
return img[int(ymin):int(ymax)+1, int(xmin):int(xmax)+1]
def full_pipeline(self,image,show=False):
if isinstance(image, str): # If the input is a file path
img = cv2.imread(image)
elif isinstance(image, np.ndarray): # If the input is a NumPy array
image = image
img = image
res = self.model(image)
if show:
res[0].show()
return self.handle_the_boxes(res , img)
|