|
|
import os |
|
|
import cv2 |
|
|
import numpy as np |
|
|
import pickle |
|
|
import sys |
|
|
import threading |
|
|
import concurrent.futures |
|
|
|
|
|
from sklearn.model_selection import train_test_split |
|
|
from sklearn.svm import SVC |
|
|
from sklearn.metrics import classification_report, confusion_matrix |
|
|
|
|
|
|
|
|
from feature_extractor import extract_features_from_image |
|
|
|
|
|
training_log = "" |
|
|
|
|
|
|
|
|
def process_image(file_path, class_name): |
|
|
""" |
|
|
Reads the image, extracts combined features, and returns (features, label). |
|
|
Returns (None, None) if image reading fails. |
|
|
""" |
|
|
image = cv2.imread(file_path, cv2.IMREAD_COLOR) |
|
|
if image is None: |
|
|
print(f"Warning: Could not read {file_path}") |
|
|
return None, None |
|
|
|
|
|
feats = extract_features_from_image(image) |
|
|
return feats['combined_features'], class_name |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_dataset(dataset_folder, max_workers=4): |
|
|
""" |
|
|
Expects dataset_folder to contain subfolders (one per class). |
|
|
Each subfolder has images of that class. This function: |
|
|
- Reads each image (in parallel) |
|
|
- Extracts a feature vector |
|
|
- Returns arrays of feature vectors (X) and labels (y). |
|
|
""" |
|
|
X = [] |
|
|
y = [] |
|
|
classes = [] |
|
|
|
|
|
print(f"Scanning dataset folder: {dataset_folder}") |
|
|
for class_name in os.listdir(dataset_folder): |
|
|
class_path = os.path.join(dataset_folder, class_name) |
|
|
if not os.path.isdir(class_path): |
|
|
continue |
|
|
classes.append(class_name) |
|
|
|
|
|
print(f"\nProcessing class: {class_name}") |
|
|
image_files = [ |
|
|
f for f in os.listdir(class_path) |
|
|
if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.tiff')) |
|
|
] |
|
|
total_images = len(image_files) |
|
|
image_count = 0 |
|
|
|
|
|
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: |
|
|
future_to_file = {} |
|
|
for filename in image_files: |
|
|
file_path = os.path.join(class_path, filename) |
|
|
|
|
|
future = executor.submit(process_image, file_path, class_name) |
|
|
future_to_file[future] = filename |
|
|
|
|
|
for future in concurrent.futures.as_completed(future_to_file): |
|
|
filename = future_to_file[future] |
|
|
features, label = future.result() |
|
|
if features is not None: |
|
|
X.append(features) |
|
|
y.append(label) |
|
|
image_count += 1 |
|
|
print( |
|
|
f"\rProcessed {image_count}/{total_images} images in '{class_name}'", |
|
|
end="", flush=True |
|
|
) |
|
|
|
|
|
print(f"\nCompleted class: {class_name} with {image_count} images.") |
|
|
|
|
|
X = np.array(X, dtype=np.float32) |
|
|
y = np.array(y) |
|
|
print(f"Finished loading dataset. Total classes: {len(classes)}. Total images: {len(X)}.") |
|
|
|
|
|
return X, y, classes |
|
|
|
|
|
|
|
|
from sklearn.svm import LinearSVC |
|
|
from sklearn.ensemble import BaggingClassifier |
|
|
from sklearn.metrics import classification_report, confusion_matrix |
|
|
from sklearn.model_selection import train_test_split |
|
|
from sklearn.preprocessing import StandardScaler |
|
|
from sklearn.pipeline import make_pipeline |
|
|
|
|
|
from sklearn.ensemble import VotingClassifier, BaggingClassifier, RandomForestClassifier |
|
|
from sklearn.pipeline import make_pipeline |
|
|
from sklearn.svm import LinearSVC |
|
|
from sklearn.preprocessing import StandardScaler |
|
|
from sklearn.model_selection import train_test_split |
|
|
from sklearn.metrics import classification_report, confusion_matrix |
|
|
|
|
|
def train_classifiers(X, y): |
|
|
""" |
|
|
Splits data into training/test sets, trains: |
|
|
- A Bagging ensemble of LinearSVC classifiers (with scaling) |
|
|
- A RandomForestClassifier |
|
|
- A VotingClassifier that combines both |
|
|
|
|
|
Returns: |
|
|
models (dict): A dictionary with keys 'svm', 'rf', 'combined' |
|
|
test_data (tuple): (X_test, y_test) |
|
|
""" |
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split( |
|
|
X, y, test_size=0.2, random_state=31, shuffle=True |
|
|
) |
|
|
|
|
|
|
|
|
svm_pipeline = make_pipeline(StandardScaler(), |
|
|
LinearSVC(random_state=31)) |
|
|
|
|
|
svm_ensemble = BaggingClassifier( |
|
|
estimator=svm_pipeline, |
|
|
n_estimators=10, |
|
|
n_jobs=-1, |
|
|
verbose=1 |
|
|
) |
|
|
|
|
|
print("Training SVM ensemble classifier...") |
|
|
svm_ensemble.fit(X_train, y_train) |
|
|
|
|
|
print("\nSVM Ensemble Classification Report:") |
|
|
y_pred_svm = svm_ensemble.predict(X_test) |
|
|
print(classification_report(y_test, y_pred_svm)) |
|
|
print("Confusion Matrix:") |
|
|
print(confusion_matrix(y_test, y_pred_svm)) |
|
|
|
|
|
|
|
|
rf = RandomForestClassifier( |
|
|
n_estimators=100, |
|
|
random_state=31, |
|
|
n_jobs=-1 |
|
|
) |
|
|
|
|
|
print("\nTraining RandomForest classifier...") |
|
|
rf.fit(X_train, y_train) |
|
|
|
|
|
print("\nRandomForest Classification Report:") |
|
|
y_pred_rf = rf.predict(X_test) |
|
|
print(classification_report(y_test, y_pred_rf)) |
|
|
print("Confusion Matrix:") |
|
|
print(confusion_matrix(y_test, y_pred_rf)) |
|
|
|
|
|
|
|
|
combined_clf = VotingClassifier( |
|
|
estimators=[('svm', svm_ensemble), ('rf', rf)], |
|
|
voting='soft' |
|
|
) |
|
|
|
|
|
print("\nTraining Combined Voting classifier...") |
|
|
combined_clf.fit(X_train, y_train) |
|
|
|
|
|
print("\nCombined Voting Classifier Report:") |
|
|
y_pred_combined = combined_clf.predict(X_test) |
|
|
print(classification_report(y_test, y_pred_combined)) |
|
|
print("Confusion Matrix:") |
|
|
print(confusion_matrix(y_test, y_pred_combined)) |
|
|
|
|
|
models = { |
|
|
'svm': svm_ensemble, |
|
|
'rf': rf, |
|
|
'combined': combined_clf |
|
|
} |
|
|
|
|
|
return models, (X_test, y_test) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def train_model_thread(dataset_folder, model_filename, max_workers=4): |
|
|
global training_log |
|
|
|
|
|
training_log += "Starting training...\n" |
|
|
print("Starting training...") |
|
|
|
|
|
|
|
|
X, y, classes = load_dataset(dataset_folder, max_workers=max_workers) |
|
|
|
|
|
|
|
|
models, _ = train_classifiers(X, y) |
|
|
print("Training complete.") |
|
|
training_log += "Training complete.\n" |
|
|
|
|
|
|
|
|
model_data = {'models': models, 'class_names': classes} |
|
|
with open(model_filename, "wb") as f: |
|
|
pickle.dump(model_data, f) |
|
|
training_log += f"Model saved to {model_filename}\n" |
|
|
print(f"Model saved to {model_filename}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
dataset_folder = "./../images_dataset" |
|
|
model_filename = "svm_rf_combined.pkl" |
|
|
|
|
|
|
|
|
|
|
|
max_workers = 32 |
|
|
|
|
|
training_thread = threading.Thread( |
|
|
target=train_model_thread, |
|
|
args=(dataset_folder, model_filename, max_workers) |
|
|
) |
|
|
training_thread.start() |
|
|
|
|
|
training_thread.join() |
|
|
|