File size: 2,553 Bytes
77f8d5f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from ultralytics import YOLO
from glob import glob
import matplotlib.pyplot as plt
import cv2
import os
from PIL import Image
from ultralytics.engine.results import Results
import numpy as np


class detection:

    def __init__(self,model_path='detection.pt'):
        current_dir = os.path.dirname(os.path.abspath(__file__))
        model_path = os.path.join(current_dir , model_path )
        self.model = YOLO(model_path)

    def get_distance(self,res):
        boxes = res[0].boxes.xywh.numpy()  # Convert to numpy array
        # Sort primarily by Y (vertical), then X (horizontal) using lexsort
        sorted_indices = np.lexsort((boxes[:, 0], boxes[:, 1]))
        sorted_boxes = boxes[sorted_indices]
        return sorted_boxes[:, 1], sorted_indices  # Return sorted Y values and indices

    def handle_the_boxes(self,res, img, y_threshold=30):
        distance_sorted, sorted_indices = self.get_distance(res)
        PB = res[0].boxes.xyxy.numpy()[sorted_indices]  # Get boxes in sorted order
        same_object = []
        current_line = [PB[0]]

        # Group boxes into lines using Y threshold
        for i in range(1, len(PB)):
            prev_y = current_line[-1][1]  # Use ymin from XYXY format
            current_y = PB[i][1]
            if abs(current_y - prev_y) > y_threshold:
                # Sort line left-to-right before adding
                current_line = sorted(current_line, key=lambda x: x[0] , reverse=True)
                same_object.append(current_line)
                current_line = [PB[i]]
            else:
                current_line.append(PB[i])

        # Add the last line and sort it
        if current_line:
            current_line = sorted(current_line, key=lambda x: x[0])
            same_object.append(current_line)

        # Extract word images in final order
        return [
            [self.words_pixels(img, box) for box in line]
            for line in same_object
        ]

    # Keep words_pixels as original
    def words_pixels(self,img, xyxy):
        xmin, ymin, xmax, ymax = xyxy.tolist()
        return img[int(ymin):int(ymax)+1, int(xmin):int(xmax)+1]

    def full_pipeline(self,image,show=False):

        if isinstance(image, str):  # If the input is a file path
            img  = cv2.imread(image)
        elif isinstance(image, np.ndarray):  # If the input is a NumPy array
            image = image
            img = image
    
        res = self.model(image)

        if show:
            res[0].show()


        return  self.handle_the_boxes(res , img)