File size: 7,324 Bytes
76da613 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
import cv2
import numpy as np
from sklearn import svm
import os
import random
from concurrent.futures import ThreadPoolExecutor
from joblib import dump, load
import pytesseract
from tqdm import tqdm
pytesseract.pytesseract.tesseract_cmd = '/opt/homebrew/bin/tesseract'
dir_names = []
try:
clf = load('contour.joblib')
except:
def load_data(batch_size=100):
global dir_names
img_shape = (300, 700)
dir_names = ['organized_spectrograms/' + d for d in os.listdir('organized_spectrograms') if not d == ".DS_Store" and not '.py' in d]
X, y = [], []
def load_and_resize_image(f):
return cv2.resize(cv2.imread(os.path.join(dir_name, f), 0), img_shape)
for i, dir_name in tqdm(enumerate(dir_names), desc="Loading Directories", total=len(dir_names)):
with ThreadPoolExecutor(max_workers=20) as executor:
images = list(executor.map(load_and_resize_image, [f for f in os.listdir(dir_name) if f.endswith('.jpg') or f.endswith('.png')]))
if not images:
print(f'Error: No images found in {dir_name}')
continue
X.extend([img.flatten() for img in images])
y.extend([i] * len(images))
if len(X) >= batch_size:
yield np.array(X), np.array(y)
X, y = [], []
if X and y:
yield np.array(X), np.array(y)
def train_classifier_in_batches(batch_generator, batch_size=100):
clf = svm.SVC()
for X_batch, y_batch in tqdm(batch_generator, desc="Training SVM in Batches"):
if X_batch.size > 0 and y_batch.size > 0:
clf.fit(X_batch, y_batch)
dump(clf, 'contour.joblib')
def process_image(clf, dir_names, min_size, max_size, image_path, num_classes_to_detect):
img_shape = (300, 700) # This is the shape used during training
# Load the image from the file
screenshot = cv2.imread(image_path)
screenshot_gray = cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(screenshot_gray, 127, 255, 0)
contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
avg_scores = np.zeros(len(dir_names))
num_rois, drawn_boxes, details = 0, [], []
def process_contour(contour):
nonlocal avg_scores, num_rois, drawn_boxes
x, y, w, h = cv2.boundingRect(contour)
if min_size <= w * h <= max_size:
roi = screenshot_gray[y:y + h, x:x + w]
# Resize the ROI to match the training shape
roi_resized = cv2.resize(roi, img_shape).flatten().reshape(1, -1)
scores = clf.decision_function(roi_resized)[0]
# Get the indices of the top-N classes
top_indices = np.argsort(scores)[-num_classes_to_detect:]
for idx in top_indices:
avg_scores[idx] += scores[idx]
num_rois += 1
max_score_index = np.argmax(scores)
color_str = dir_names[max_score_index]
color_hash = hash(color_str) & 0xffffff
color_bgr = ((color_hash >> 16) & 0xff), ((color_hash >> 8) & 0xff), (color_hash & 0xff)
cv2.rectangle(screenshot, (x, y), (x + w, y + h), color_bgr, 2)
drawn_boxes.append({'x': x, 'y': y, 'w': w, 'h': h})
negative_dir_name = f"{dir_names[max_score_index]}_negative"
if not os.path.exists(negative_dir_name):
os.makedirs(negative_dir_name)
cv2.imwrite(os.path.join(negative_dir_name, str(random.randint(1, 999999999)) + '.png'), roi)
with ThreadPoolExecutor(max_workers=5) as executor:
executor.map(process_contour, contours)
if num_rois > 0:
avg_scores /= num_rois
else:
print('Warning: No ROIs were processed')
return screenshot, avg_scores, details
def draw_menu(screenshot, detected_classes, max_menu_width=200, item_height=30, font_scale=0.5):
# Determine the dimensions of the screenshot
screenshot_height, screenshot_width = screenshot.shape[:2]
# Calculate menu height based on the number of detected classes
num_items = len(detected_classes)
menu_height = num_items * item_height + 20 # Additional space for padding
# Set menu width to be the smaller of the max_menu_width or a portion of the screenshot width
menu_width = min(max_menu_width, screenshot_width // 4)
# Create blank menu image with the determined size
menu_img = np.zeros((menu_height, menu_width, 3), dtype=np.uint8)
for i, class_name in enumerate(detected_classes):
# Generate color based on the detected class name using hash function
color_hash = hash(class_name) & 0xffffff
color_bgr = ((color_hash >> 16) & 0xff), ((color_hash >> 8) & 0xff), (color_hash & 0xff)
# Draw color swatch and class name on menu image
cv2.rectangle(menu_img, (10, i * item_height + 10), (40, i * item_height + item_height), color_bgr, -1)
cv2.putText(menu_img, class_name, (50, i * item_height + item_height // 2 + 5), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), 1, cv2.LINE_AA)
# Adjust total dimensions to fit the menu
total_height = max(screenshot.shape[0], menu_height)
total_width = screenshot.shape[1] + menu_width
# Create a new blank image with the required size
new_image = np.zeros((total_height, total_width, 3), dtype=np.uint8)
# Place the original screenshot in the new image
new_image[:screenshot.shape[0], :screenshot.shape[1]] = screenshot
# Place the menu at the right side of the new image
new_image[:menu_height, -menu_width:] = menu_img
return new_image
def save_image(image, file_path):
"""Saves the given image to the specified file path."""
cv2.imwrite(file_path, image)
print(f"Image saved to {file_path}")
def display_image(image):
save_image(image,'finalle.png')
cv2.imshow('Processed Image', image)
key = cv2.waitKey(0)
return key
# Load data in batches and train classifiers
batch_generator = load_data(batch_size=100) # Adjust batch size as needed
train_classifier_in_batches(batch_generator)
# Load the trained model
clf = load('contour.joblib')
menu_height = 10
menu_width = 10
while True:
num_classes_to_detect = int(input('Enter the number of top classes to detect: '))
min_size = int(input('Enter minimum size for ROIs (width*height): '))
max_size = int(input('Enter maximum size for ROIs (width*height): '))
image_path = input('Enter the path of the image you want to process: ')
screenshot, avg_scores, details = process_image(clf, dir_names, min_size, max_size, image_path, num_classes_to_detect)
# Only display the detected classes
top_indices = np.argsort(avg_scores)[-num_classes_to_detect:]
detected_classes = [dir_names[i] for i in top_indices]
screenshot = draw_menu(screenshot, detected_classes, menu_height, menu_width)
key = display_image(screenshot)
if key == ord('r'):
continue
else:
break
|