File size: 7,324 Bytes
76da613
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
import cv2
import numpy as np
from sklearn import svm
import os
import random
from concurrent.futures import ThreadPoolExecutor
from joblib import dump, load
import pytesseract
from tqdm import tqdm

pytesseract.pytesseract.tesseract_cmd = '/opt/homebrew/bin/tesseract'
dir_names = []

try:
    clf = load('contour.joblib')
except:
    def load_data(batch_size=100):
        global dir_names
        img_shape = (300, 700)
        dir_names = ['organized_spectrograms/' + d for d in os.listdir('organized_spectrograms') if not d == ".DS_Store" and not '.py' in d]
        X, y = [], []

        def load_and_resize_image(f):
            return cv2.resize(cv2.imread(os.path.join(dir_name, f), 0), img_shape)

        for i, dir_name in tqdm(enumerate(dir_names), desc="Loading Directories", total=len(dir_names)):
            with ThreadPoolExecutor(max_workers=20) as executor:
                images = list(executor.map(load_and_resize_image, [f for f in os.listdir(dir_name) if f.endswith('.jpg') or f.endswith('.png')]))

            if not images:
                print(f'Error: No images found in {dir_name}')
                continue

            X.extend([img.flatten() for img in images])
            y.extend([i] * len(images))

            if len(X) >= batch_size:
                yield np.array(X), np.array(y)
                X, y = [], []

        if X and y:
            yield np.array(X), np.array(y)

    def train_classifier_in_batches(batch_generator, batch_size=100):
        clf = svm.SVC()

        for X_batch, y_batch in tqdm(batch_generator, desc="Training SVM in Batches"):
            if X_batch.size > 0 and y_batch.size > 0:
                clf.fit(X_batch, y_batch)

        dump(clf, 'contour.joblib')

    def process_image(clf, dir_names, min_size, max_size, image_path, num_classes_to_detect):
        img_shape = (300, 700)  # This is the shape used during training

        # Load the image from the file
        screenshot = cv2.imread(image_path)
        screenshot_gray = cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY)

        _, thresh = cv2.threshold(screenshot_gray, 127, 255, 0)
        contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

        avg_scores = np.zeros(len(dir_names))
        num_rois, drawn_boxes, details = 0, [], []

        def process_contour(contour):
            nonlocal avg_scores, num_rois, drawn_boxes

            x, y, w, h = cv2.boundingRect(contour)
            if min_size <= w * h <= max_size:
                roi = screenshot_gray[y:y + h, x:x + w]

                # Resize the ROI to match the training shape
                roi_resized = cv2.resize(roi, img_shape).flatten().reshape(1, -1)
                scores = clf.decision_function(roi_resized)[0]

                # Get the indices of the top-N classes
                top_indices = np.argsort(scores)[-num_classes_to_detect:]
                for idx in top_indices:
                    avg_scores[idx] += scores[idx]

                num_rois += 1

                max_score_index = np.argmax(scores)
                color_str = dir_names[max_score_index]
                color_hash = hash(color_str) & 0xffffff
                color_bgr = ((color_hash >> 16) & 0xff), ((color_hash >> 8) & 0xff), (color_hash & 0xff)
                cv2.rectangle(screenshot, (x, y), (x + w, y + h), color_bgr, 2)
                drawn_boxes.append({'x': x, 'y': y, 'w': w, 'h': h})

                negative_dir_name = f"{dir_names[max_score_index]}_negative"
                if not os.path.exists(negative_dir_name):
                    os.makedirs(negative_dir_name)
                cv2.imwrite(os.path.join(negative_dir_name, str(random.randint(1, 999999999)) + '.png'), roi)

        with ThreadPoolExecutor(max_workers=5) as executor:
            executor.map(process_contour, contours)

        if num_rois > 0:
            avg_scores /= num_rois
        else:
            print('Warning: No ROIs were processed')

        return screenshot, avg_scores, details

    def draw_menu(screenshot, detected_classes, max_menu_width=200, item_height=30, font_scale=0.5):
        # Determine the dimensions of the screenshot
        screenshot_height, screenshot_width = screenshot.shape[:2]

        # Calculate menu height based on the number of detected classes
        num_items = len(detected_classes)
        menu_height = num_items * item_height + 20  # Additional space for padding

        # Set menu width to be the smaller of the max_menu_width or a portion of the screenshot width
        menu_width = min(max_menu_width, screenshot_width // 4)

        # Create blank menu image with the determined size
        menu_img = np.zeros((menu_height, menu_width, 3), dtype=np.uint8)

        for i, class_name in enumerate(detected_classes):
            # Generate color based on the detected class name using hash function
            color_hash = hash(class_name) & 0xffffff
            color_bgr = ((color_hash >> 16) & 0xff), ((color_hash >> 8) & 0xff), (color_hash & 0xff)

            # Draw color swatch and class name on menu image
            cv2.rectangle(menu_img, (10, i * item_height + 10), (40, i * item_height + item_height), color_bgr, -1)
            cv2.putText(menu_img, class_name, (50, i * item_height + item_height // 2 + 5), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), 1, cv2.LINE_AA)

        # Adjust total dimensions to fit the menu
        total_height = max(screenshot.shape[0], menu_height)
        total_width = screenshot.shape[1] + menu_width

        # Create a new blank image with the required size
        new_image = np.zeros((total_height, total_width, 3), dtype=np.uint8)

        # Place the original screenshot in the new image
        new_image[:screenshot.shape[0], :screenshot.shape[1]] = screenshot

        # Place the menu at the right side of the new image
        new_image[:menu_height, -menu_width:] = menu_img

        return new_image


def save_image(image, file_path):
    """Saves the given image to the specified file path."""
    cv2.imwrite(file_path, image)
    print(f"Image saved to {file_path}")


def display_image(image):
    save_image(image,'finalle.png')
    cv2.imshow('Processed Image', image)
    key = cv2.waitKey(0)
    return key



# Load data in batches and train classifiers
batch_generator = load_data(batch_size=100)  # Adjust batch size as needed
train_classifier_in_batches(batch_generator)

# Load the trained model
clf = load('contour.joblib')

menu_height = 10
menu_width = 10

while True:
    num_classes_to_detect = int(input('Enter the number of top classes to detect: '))
    min_size = int(input('Enter minimum size for ROIs (width*height): '))
    max_size = int(input('Enter maximum size for ROIs (width*height): '))
    image_path = input('Enter the path of the image you want to process: ')
    screenshot, avg_scores, details = process_image(clf, dir_names, min_size, max_size, image_path, num_classes_to_detect)

    # Only display the detected classes
    top_indices = np.argsort(avg_scores)[-num_classes_to_detect:]
    detected_classes = [dir_names[i] for i in top_indices]

    screenshot = draw_menu(screenshot, detected_classes, menu_height, menu_width)
    key = display_image(screenshot)

    if key == ord('r'):
        continue
    else:
        break