Spaces:

kalhar
/

Texture_Classification_of_Stone_Brick_and_Wood

Build error

Texture_Classification_of_Stone_Brick_and_Wood / train.py

Kalhar.Pandya

final

600cada 11 months ago

7.73 kB

	import os
	import cv2
	import numpy as np
	import pickle
	import sys
	import threading
	import concurrent.futures

	from sklearn.model_selection import train_test_split
	from sklearn.svm import SVC
	from sklearn.metrics import classification_report, confusion_matrix

	# Import the feature extraction function (from feature_extractor.py)
	from feature_extractor import extract_features_from_image

	training_log = ""

	# Helper function for parallel processing
	def process_image(file_path, class_name):
	"""
	Reads the image, extracts combined features, and returns (features, label).
	Returns (None, None) if image reading fails.
	"""
	image = cv2.imread(file_path, cv2.IMREAD_COLOR)
	if image is None:
	print(f"Warning: Could not read {file_path}")
	return None, None

	feats = extract_features_from_image(image)
	return feats['combined_features'], class_name

	# ---------------------------------------------------------------------
	# 1. Data Loading with Parallel Feature Extraction
	# ---------------------------------------------------------------------
	def load_dataset(dataset_folder, max_workers=4):
	"""
	Expects dataset_folder to contain subfolders (one per class).
	Each subfolder has images of that class. This function:
	- Reads each image (in parallel)
	- Extracts a feature vector
	- Returns arrays of feature vectors (X) and labels (y).
	"""
	X = []
	y = []
	classes = [] # list of class names

	print(f"Scanning dataset folder: {dataset_folder}")
	for class_name in os.listdir(dataset_folder):
	class_path = os.path.join(dataset_folder, class_name)
	if not os.path.isdir(class_path):
	continue
	classes.append(class_name)

	print(f"\nProcessing class: {class_name}")
	image_files = [
	f for f in os.listdir(class_path)
	if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.tiff'))
	]
	total_images = len(image_files)
	image_count = 0

	# Use ThreadPoolExecutor for parallel extraction
	with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
	future_to_file = {}
	for filename in image_files:
	file_path = os.path.join(class_path, filename)
	# Submit tasks
	future = executor.submit(process_image, file_path, class_name)
	future_to_file[future] = filename

	for future in concurrent.futures.as_completed(future_to_file):
	filename = future_to_file[future]
	features, label = future.result()
	if features is not None:
	X.append(features)
	y.append(label)
	image_count += 1
	print(
	f"\rProcessed {image_count}/{total_images} images in '{class_name}'",
	end="", flush=True
	)

	print(f"\nCompleted class: {class_name} with {image_count} images.")

	X = np.array(X, dtype=np.float32)
	y = np.array(y)
	print(f"Finished loading dataset. Total classes: {len(classes)}. Total images: {len(X)}.")

	return X, y, classes


	from sklearn.svm import LinearSVC
	from sklearn.ensemble import BaggingClassifier
	from sklearn.metrics import classification_report, confusion_matrix
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import StandardScaler
	from sklearn.pipeline import make_pipeline

	from sklearn.ensemble import VotingClassifier, BaggingClassifier, RandomForestClassifier
	from sklearn.pipeline import make_pipeline
	from sklearn.svm import LinearSVC
	from sklearn.preprocessing import StandardScaler
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import classification_report, confusion_matrix

	def train_classifiers(X, y):
	"""
	Splits data into training/test sets, trains:
	- A Bagging ensemble of LinearSVC classifiers (with scaling)
	- A RandomForestClassifier
	- A VotingClassifier that combines both

	Returns:
	models (dict): A dictionary with keys 'svm', 'rf', 'combined'
	test_data (tuple): (X_test, y_test)
	"""
	# Split dataset: 80% train, 20% test
	X_train, X_test, y_train, y_test = train_test_split(
	X, y, test_size=0.2, random_state=31, shuffle=True
	)

	# --- Train SVM Ensemble ---
	svm_pipeline = make_pipeline(StandardScaler(),
	LinearSVC(random_state=31))

	svm_ensemble = BaggingClassifier(
	estimator=svm_pipeline,
	n_estimators=10, # Adjust for speed/accuracy trade-off
	n_jobs=-1,
	verbose=1
	)

	print("Training SVM ensemble classifier...")
	svm_ensemble.fit(X_train, y_train)

	print("\nSVM Ensemble Classification Report:")
	y_pred_svm = svm_ensemble.predict(X_test)
	print(classification_report(y_test, y_pred_svm))
	print("Confusion Matrix:")
	print(confusion_matrix(y_test, y_pred_svm))

	# --- Train RandomForest ---
	rf = RandomForestClassifier(
	n_estimators=100, # Adjust as needed
	random_state=31,
	n_jobs=-1
	)

	print("\nTraining RandomForest classifier...")
	rf.fit(X_train, y_train)

	print("\nRandomForest Classification Report:")
	y_pred_rf = rf.predict(X_test)
	print(classification_report(y_test, y_pred_rf))
	print("Confusion Matrix:")
	print(confusion_matrix(y_test, y_pred_rf))

	# --- Train Combined Voting Classifier ---
	combined_clf = VotingClassifier(
	estimators=[('svm', svm_ensemble), ('rf', rf)],
	voting='soft'
	)

	print("\nTraining Combined Voting classifier...")
	combined_clf.fit(X_train, y_train)

	print("\nCombined Voting Classifier Report:")
	y_pred_combined = combined_clf.predict(X_test)
	print(classification_report(y_test, y_pred_combined))
	print("Confusion Matrix:")
	print(confusion_matrix(y_test, y_pred_combined))

	models = {
	'svm': svm_ensemble,
	'rf': rf,
	'combined': combined_clf
	}

	return models, (X_test, y_test)



	# ---------------------------------------------------------------------
	# 3. Training Thread
	# ---------------------------------------------------------------------
	def train_model_thread(dataset_folder, model_filename, max_workers=4):
	global training_log

	training_log += "Starting training...\n"
	print("Starting training...")

	# (A) Load Data (in parallel)
	X, y, classes = load_dataset(dataset_folder, max_workers=max_workers)

	# (B) Train Classifier and Print Metrics
	models, _ = train_classifiers(X, y)
	print("Training complete.")
	training_log += "Training complete.\n"

	# (C) Save the Model
	model_data = {'models': models, 'class_names': classes}
	with open(model_filename, "wb") as f:
	pickle.dump(model_data, f)
	training_log += f"Model saved to {model_filename}\n"
	print(f"Model saved to {model_filename}")

	# ---------------------------------------------------------------------
	# Main
	# ---------------------------------------------------------------------
	if __name__ == "__main__":
	dataset_folder = "./../images_dataset" # Adjust path as needed
	model_filename = "svm_rf_combined.pkl"

	# Launch training in a separate thread
	# You can tune 'max_workers' to the number of desired threads.
	max_workers = 32

	training_thread = threading.Thread(
	target=train_model_thread,
	args=(dataset_folder, model_filename, max_workers)
	)
	training_thread.start()
	# Wait until training finishes
	training_thread.join()