import cv2 import numpy as np from sklearn import svm import os import random from concurrent.futures import ThreadPoolExecutor from joblib import dump, load import pytesseract from tqdm import tqdm pytesseract.pytesseract.tesseract_cmd = '/opt/homebrew/bin/tesseract' dir_names = [] try: clf = load('contour.joblib') except: def load_data(batch_size=100): global dir_names img_shape = (300, 700) dir_names = ['organized_spectrograms/' + d for d in os.listdir('organized_spectrograms') if not d == ".DS_Store" and not '.py' in d] X, y = [], [] def load_and_resize_image(f): return cv2.resize(cv2.imread(os.path.join(dir_name, f), 0), img_shape) for i, dir_name in tqdm(enumerate(dir_names), desc="Loading Directories", total=len(dir_names)): with ThreadPoolExecutor(max_workers=20) as executor: images = list(executor.map(load_and_resize_image, [f for f in os.listdir(dir_name) if f.endswith('.jpg') or f.endswith('.png')])) if not images: print(f'Error: No images found in {dir_name}') continue X.extend([img.flatten() for img in images]) y.extend([i] * len(images)) if len(X) >= batch_size: yield np.array(X), np.array(y) X, y = [], [] if X and y: yield np.array(X), np.array(y) def train_classifier_in_batches(batch_generator, batch_size=100): clf = svm.SVC() for X_batch, y_batch in tqdm(batch_generator, desc="Training SVM in Batches"): if X_batch.size > 0 and y_batch.size > 0: clf.fit(X_batch, y_batch) dump(clf, 'contour.joblib') def process_image(clf, dir_names, min_size, max_size, image_path, num_classes_to_detect): img_shape = (300, 700) # This is the shape used during training # Load the image from the file screenshot = cv2.imread(image_path) screenshot_gray = cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY) _, thresh = cv2.threshold(screenshot_gray, 127, 255, 0) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) avg_scores = np.zeros(len(dir_names)) num_rois, drawn_boxes, details = 0, [], [] def process_contour(contour): nonlocal avg_scores, num_rois, drawn_boxes x, y, w, h = cv2.boundingRect(contour) if min_size <= w * h <= max_size: roi = screenshot_gray[y:y + h, x:x + w] # Resize the ROI to match the training shape roi_resized = cv2.resize(roi, img_shape).flatten().reshape(1, -1) scores = clf.decision_function(roi_resized)[0] # Get the indices of the top-N classes top_indices = np.argsort(scores)[-num_classes_to_detect:] for idx in top_indices: avg_scores[idx] += scores[idx] num_rois += 1 max_score_index = np.argmax(scores) color_str = dir_names[max_score_index] color_hash = hash(color_str) & 0xffffff color_bgr = ((color_hash >> 16) & 0xff), ((color_hash >> 8) & 0xff), (color_hash & 0xff) cv2.rectangle(screenshot, (x, y), (x + w, y + h), color_bgr, 2) drawn_boxes.append({'x': x, 'y': y, 'w': w, 'h': h}) negative_dir_name = f"{dir_names[max_score_index]}_negative" if not os.path.exists(negative_dir_name): os.makedirs(negative_dir_name) cv2.imwrite(os.path.join(negative_dir_name, str(random.randint(1, 999999999)) + '.png'), roi) with ThreadPoolExecutor(max_workers=5) as executor: executor.map(process_contour, contours) if num_rois > 0: avg_scores /= num_rois else: print('Warning: No ROIs were processed') return screenshot, avg_scores, details def draw_menu(screenshot, detected_classes, max_menu_width=200, item_height=30, font_scale=0.5): # Determine the dimensions of the screenshot screenshot_height, screenshot_width = screenshot.shape[:2] # Calculate menu height based on the number of detected classes num_items = len(detected_classes) menu_height = num_items * item_height + 20 # Additional space for padding # Set menu width to be the smaller of the max_menu_width or a portion of the screenshot width menu_width = min(max_menu_width, screenshot_width // 4) # Create blank menu image with the determined size menu_img = np.zeros((menu_height, menu_width, 3), dtype=np.uint8) for i, class_name in enumerate(detected_classes): # Generate color based on the detected class name using hash function color_hash = hash(class_name) & 0xffffff color_bgr = ((color_hash >> 16) & 0xff), ((color_hash >> 8) & 0xff), (color_hash & 0xff) # Draw color swatch and class name on menu image cv2.rectangle(menu_img, (10, i * item_height + 10), (40, i * item_height + item_height), color_bgr, -1) cv2.putText(menu_img, class_name, (50, i * item_height + item_height // 2 + 5), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), 1, cv2.LINE_AA) # Adjust total dimensions to fit the menu total_height = max(screenshot.shape[0], menu_height) total_width = screenshot.shape[1] + menu_width # Create a new blank image with the required size new_image = np.zeros((total_height, total_width, 3), dtype=np.uint8) # Place the original screenshot in the new image new_image[:screenshot.shape[0], :screenshot.shape[1]] = screenshot # Place the menu at the right side of the new image new_image[:menu_height, -menu_width:] = menu_img return new_image def save_image(image, file_path): """Saves the given image to the specified file path.""" cv2.imwrite(file_path, image) print(f"Image saved to {file_path}") def display_image(image): save_image(image,'finalle.png') cv2.imshow('Processed Image', image) key = cv2.waitKey(0) return key # Load data in batches and train classifiers batch_generator = load_data(batch_size=100) # Adjust batch size as needed train_classifier_in_batches(batch_generator) # Load the trained model clf = load('contour.joblib') menu_height = 10 menu_width = 10 while True: num_classes_to_detect = int(input('Enter the number of top classes to detect: ')) min_size = int(input('Enter minimum size for ROIs (width*height): ')) max_size = int(input('Enter maximum size for ROIs (width*height): ')) image_path = input('Enter the path of the image you want to process: ') screenshot, avg_scores, details = process_image(clf, dir_names, min_size, max_size, image_path, num_classes_to_detect) # Only display the detected classes top_indices = np.argsort(avg_scores)[-num_classes_to_detect:] detected_classes = [dir_names[i] for i in top_indices] screenshot = draw_menu(screenshot, detected_classes, menu_height, menu_width) key = display_image(screenshot) if key == ord('r'): continue else: break