File size: 7,324 Bytes

76da613

import cv2
import numpy as np
from sklearn import svm
import os
import random
from concurrent.futures import ThreadPoolExecutor
from joblib import dump, load
import pytesseract
from tqdm import tqdm

pytesseract.pytesseract.tesseract_cmd = '/opt/homebrew/bin/tesseract'
dir_names = []

try:
    clf = load('contour.joblib')
except:
    def load_data(batch_size=100):
        global dir_names
        img_shape = (300, 700)
        dir_names = ['organized_spectrograms/' + d for d in os.listdir('organized_spectrograms') if not d == ".DS_Store" and not '.py' in d]
        X, y = [], []

        def load_and_resize_image(f):
            return cv2.resize(cv2.imread(os.path.join(dir_name, f), 0), img_shape)

        for i, dir_name in tqdm(enumerate(dir_names), desc="Loading Directories", total=len(dir_names)):
            with ThreadPoolExecutor(max_workers=20) as executor:
                images = list(executor.map(load_and_resize_image, [f for f in os.listdir(dir_name) if f.endswith('.jpg') or f.endswith('.png')]))

            if not images:
                print(f'Error: No images found in {dir_name}')
                continue

            X.extend([img.flatten() for img in images])
            y.extend([i] * len(images))

            if len(X) >= batch_size:
                yield np.array(X), np.array(y)
                X, y = [], []

        if X and y:
            yield np.array(X), np.array(y)

    def train_classifier_in_batches(batch_generator, batch_size=100):
        clf = svm.SVC()

        for X_batch, y_batch in tqdm(batch_generator, desc="Training SVM in Batches"):
            if X_batch.size > 0 and y_batch.size > 0:
                clf.fit(X_batch, y_batch)

        dump(clf, 'contour.joblib')

    def process_image(clf, dir_names, min_size, max_size, image_path, num_classes_to_detect):
        img_shape = (300, 700)  # This is the shape used during training

        # Load the image from the file
        screenshot = cv2.imread(image_path)
        screenshot_gray = cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY)

        _, thresh = cv2.threshold(screenshot_gray, 127, 255, 0)
        contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

        avg_scores = np.zeros(len(dir_names))
        num_rois, drawn_boxes, details = 0, [], []

        def process_contour(contour):
            nonlocal avg_scores, num_rois, drawn_boxes

            x, y, w, h = cv2.boundingRect(contour)
            if min_size <= w * h <= max_size:
                roi = screenshot_gray[y:y + h, x:x + w]

                # Resize the ROI to match the training shape
                roi_resized = cv2.resize(roi, img_shape).flatten().reshape(1, -1)
                scores = clf.decision_function(roi_resized)[0]

                # Get the indices of the top-N classes
                top_indices = np.argsort(scores)[-num_classes_to_detect:]
                for idx in top_indices:
                    avg_scores[idx] += scores[idx]

                num_rois += 1

                max_score_index = np.argmax(scores)
                color_str = dir_names[max_score_index]
                color_hash = hash(color_str) & 0xffffff
                color_bgr = ((color_hash >> 16) & 0xff), ((color_hash >> 8) & 0xff), (color_hash & 0xff)
                cv2.rectangle(screenshot, (x, y), (x + w, y + h), color_bgr, 2)
                drawn_boxes.append({'x': x, 'y': y, 'w': w, 'h': h})

                negative_dir_name = f"{dir_names[max_score_index]}_negative"
                if not os.path.exists(negative_dir_name):
                    os.makedirs(negative_dir_name)
                cv2.imwrite(os.path.join(negative_dir_name, str(random.randint(1, 999999999)) + '.png'), roi)

        with ThreadPoolExecutor(max_workers=5) as executor:
            executor.map(process_contour, contours)

        if num_rois > 0:
            avg_scores /= num_rois
        else:
            print('Warning: No ROIs were processed')

        return screenshot, avg_scores, details

    def draw_menu(screenshot, detected_classes, max_menu_width=200, item_height=30, font_scale=0.5):
        # Determine the dimensions of the screenshot
        screenshot_height, screenshot_width = screenshot.shape[:2]

        # Calculate menu height based on the number of detected classes
        num_items = len(detected_classes)
        menu_height = num_items * item_height + 20  # Additional space for padding

        # Set menu width to be the smaller of the max_menu_width or a portion of the screenshot width
        menu_width = min(max_menu_width, screenshot_width // 4)

        # Create blank menu image with the determined size
        menu_img = np.zeros((menu_height, menu_width, 3), dtype=np.uint8)

        for i, class_name in enumerate(detected_classes):
            # Generate color based on the detected class name using hash function
            color_hash = hash(class_name) & 0xffffff
            color_bgr = ((color_hash >> 16) & 0xff), ((color_hash >> 8) & 0xff), (color_hash & 0xff)

            # Draw color swatch and class name on menu image
            cv2.rectangle(menu_img, (10, i * item_height + 10), (40, i * item_height + item_height), color_bgr, -1)
            cv2.putText(menu_img, class_name, (50, i * item_height + item_height // 2 + 5), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), 1, cv2.LINE_AA)

        # Adjust total dimensions to fit the menu
        total_height = max(screenshot.shape[0], menu_height)
        total_width = screenshot.shape[1] + menu_width

        # Create a new blank image with the required size
        new_image = np.zeros((total_height, total_width, 3), dtype=np.uint8)

        # Place the original screenshot in the new image
        new_image[:screenshot.shape[0], :screenshot.shape[1]] = screenshot

        # Place the menu at the right side of the new image
        new_image[:menu_height, -menu_width:] = menu_img

        return new_image


def save_image(image, file_path):
    """Saves the given image to the specified file path."""
    cv2.imwrite(file_path, image)
    print(f"Image saved to {file_path}")


def display_image(image):
    save_image(image,'finalle.png')
    cv2.imshow('Processed Image', image)
    key = cv2.waitKey(0)
    return key



# Load data in batches and train classifiers
batch_generator = load_data(batch_size=100)  # Adjust batch size as needed
train_classifier_in_batches(batch_generator)

# Load the trained model
clf = load('contour.joblib')

menu_height = 10
menu_width = 10

while True:
    num_classes_to_detect = int(input('Enter the number of top classes to detect: '))
    min_size = int(input('Enter minimum size for ROIs (width*height): '))
    max_size = int(input('Enter maximum size for ROIs (width*height): '))
    image_path = input('Enter the path of the image you want to process: ')
    screenshot, avg_scores, details = process_image(clf, dir_names, min_size, max_size, image_path, num_classes_to_detect)

    # Only display the detected classes
    top_indices = np.argsort(avg_scores)[-num_classes_to_detect:]
    detected_classes = [dir_names[i] for i in top_indices]

    screenshot = draw_menu(screenshot, detected_classes, menu_height, menu_width)
    key = display_image(screenshot)

    if key == ord('r'):
        continue
    else:
        break