Spaces:

SiyunHE
/

Grass_Wood_Classification

Sleeping

Grass_Wood_Classification / classification.py

Siyun He

update the documentation

6cd8c15 over 1 year ago

14.5 kB

	# save the resized image to ./grass_resized/ folder
	import os
	import cv2
	import numpy as np

	# Resize the image to 128x128
	def resize_image(image_path, save_path):
	img = cv2.imread(image_path)
	img = cv2.resize(img, (128, 128))
	cv2.imwrite(save_path, img)

	# Do data augmentation by flipping the images horizontally on train data
	# Save the augmented data to the same folders
	def augment_image(image_path, save_path):
	img = cv2.imread(image_path)
	#flip with 50% probability
	if np.random.rand() > 0.5:
	img = cv2.flip(img, 1)
	#rotate by 90 degrees with 50% probability
	if np.random.rand() > 0.5:
	img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
	cv2.imwrite(save_path, img)

	# Compute the GLCM for each image.
	# Extract features like contrast, correlaton, energy, and homogeneity.
	# Save the features to a CSV file.
	# Label each feature vector with the correct class (grass or wood).
	import pandas as pd
	from skimage.feature import graycomatrix, graycoprops

	def compute_glcm(image_path, ispath=True):
	'''Compute GLCM features for an image.'''
	if ispath:
	img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
	else:
	img = image_path
	# compute the GLCM properties. Distance = 3, and 4 angles: 0, 45, 90, 135
	glcm = graycomatrix(img, [3], [0, np.pi/4, np.pi/2, 3*np.pi/4], 256, symmetric=True, normed=True)
	# extract the properties
	contrast = graycoprops(glcm, 'contrast')
	correlation = graycoprops(glcm, 'correlation')
	energy = graycoprops(glcm, 'energy')
	homogeneity = graycoprops(glcm, 'homogeneity')
	# return the feature vector
	# Flatten the arrays first
	contrast_flat = contrast.flatten()
	correlation_flat = correlation.flatten()
	energy_flat = energy.flatten()
	homogeneity_flat = homogeneity.flatten()

	# Calculate the mean for each GLCM feature category
	mean_contrast = np.mean(contrast_flat)
	mean_correlation = np.mean(correlation_flat)
	mean_energy = np.mean(energy_flat)
	mean_homogeneity = np.mean(homogeneity_flat)
	return [mean_contrast, mean_correlation, mean_energy, mean_homogeneity]

	# Apply the LBP operator to each image.
	# Generate histograms of LBP codes to create feature vectors.
	# Save the features to a CSV file.
	# Label each feature vector with the correct class (grass or wood).
	from skimage.feature import local_binary_pattern
	import pickle
	import warnings

	def compute_lbp(image_path, ispath=True):
	if ispath:
	img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
	else:
	img = image_path
	lbp = local_binary_pattern(img, 8, 1, 'uniform')
	hist, _ = np.histogram(lbp, bins=np.arange(0, 11), density=True)
	return hist

	def classify_image(image, algorithm):
	# Suppress the warning about feature names
	warnings.filterwarnings("ignore", message="X does not have valid feature names")

	# Load the pre-trained classifiers
	clf_glcm = pickle.load(open('clf_glcm.pkl', 'rb'))
	clf_lbp = pickle.load(open('clf_lbp.pkl', 'rb'))

	# If the image is a NumPy array, it's already loaded
	if isinstance(image, np.ndarray):
	img = cv2.resize(image, (128, 128))
	img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

	# Perform classification based on the selected algorithm
	if algorithm == 'GLCM':
	features = compute_glcm(img_gray, ispath=False)
	else:
	features = compute_lbp(img_gray, ispath=False)

	# Convert features to a DataFrame to match the format used in training
	features_df = pd.DataFrame([features])

	# Make predictions using the pre-trained classifiers
	if algorithm == 'GLCM':
	prediction = clf_glcm.predict(features_df)[0]
	else:
	prediction = clf_lbp.predict(features_df)[0]

	return prediction

	# If the script is run directly, perform the classification, training, and testing steps.
	if __name__ == '__main__':
	# If the images are available, resize them and save them to the appropriate folders
	has_pics = False
	has_csv = True
	if has_pics:
	# read image data from ./grass/ folder
	if not os.path.exists('./grass_resized/'):
	os.makedirs('./grass_resized/')

	# rename the image file to 1.jpg, 2.jpg, 3.jpg, ...
	count = 1
	for file in os.listdir('./grass/'):
	if file.endswith('.jpg') or file.endswith('.jpeg') or file.endswith('.png'):
	resize_image('./grass/' + file, './grass_resized/' + str(count) + '.jpg')
	count += 1

	print('Done!')

	# save the resized image to ./wood_resized/ folder
	if not os.path.exists('./wood_resized/'):
	os.makedirs('./wood_resized/')

	# rename the image file to 1.jpg, 2.jpg, 3.jpg, ...
	count = 1
	for file in os.listdir('./wood/'):
	if file.endswith('.jpg') or file.endswith('.jpeg') or file.endswith('.png'):
	resize_image('./wood/' + file, './wood_resized/' + str(count) + '.jpg')
	count += 1

	print('Done!')

	# Divide the data into training and testing data: 70% training, 30% testing
	# Merge grass and wood data into training and testing data
	# Save the training data to ./train/ folder
	# Save the testing data to ./test/ folder
	import shutil

	if not os.path.exists('./train/'):
	os.makedirs('./train/')
	if not os.path.exists('./test/'):
	os.makedirs('./test/')

	# Rename files so that they do not overwrite each other
	for i in range(1, 36):
	shutil.copy('./grass_resized/' + str(i) + '.jpg', './train/' + str(i) + '.jpg')
	for i in range(36, 51):
	shutil.copy('./grass_resized/' +
	str(i) + '.jpg', './test/' + str(i - 35) + '.jpg')
	for i in range(1, 36):
	shutil.copy('./wood_resized/' + str(i) + '.jpg', './train/' + str(i + 35) + '.jpg')
	for i in range(36, 51):
	shutil.copy('./wood_resized/' +
	str(i) + '.jpg', './test/' + str(i - 20) + '.jpg')


	for i in range(1, 36):
	augment_image('./train/' + str(i) + '.jpg', './train/' + str(i + 70) + '.jpg')
	for i in range(36, 51):
	augment_image('./train/' + str(i) + '.jpg', './train/' + str(i + 70) + '.jpg')

	# Compute the LBP for each image in the training data
	data = []
	for i in range(1, 71):
	data.append(compute_lbp('./train/' + str(i) + '.jpg'))
	df = pd.DataFrame(data, columns=['lbp_' + str(i) for i in range(10)])
	df['class'] = ['grass']35 + ['wood']35
	df.to_csv('train_lbp.csv', index=False)

	# Compute the LBP for each image in the testing data
	data = []
	for i in range(1, 31):
	data.append(compute_lbp('./test/' + str(i) + '.jpg'))
	df = pd.DataFrame(data, columns=['lbp_' + str(i) for i in range(10)])
	df['class'] = ['grass']15 + ['wood']15
	df.to_csv('test_lbp.csv', index=False)

	# Compute the GLCM for each image in the training data
	data = []
	for i in range(1, 71):
	data.append(compute_glcm('./train/' + str(i) + '.jpg'))
	df = pd.DataFrame(data, columns=['contrast', 'correlation', 'energy', 'homogeneity'])
	df['class'] = ['grass']35 + ['wood']35
	df.to_csv('train_glcm.csv', index=False)

	# Compute the GLCM for each image in the testing data
	data = []
	for i in range(1, 31):
	data.append(compute_glcm('./test/' + str(i) + '.jpg'))
	df = pd.DataFrame(data, columns=['contrast', 'correlation', 'energy', 'homogeneity'])
	df['class'] = ['grass']15 + ['wood']15
	df.to_csv('test_glcm.csv', index=False)

	if has_csv:

	# Select Support Vector Machines (SVM) as the classifier.
	# Train the classifier using the training data.
	# Test the classifier using the testing data.
	from sklearn.svm import SVC
	from sklearn.model_selection import GridSearchCV
	from sklearn.metrics import accuracy_score
	from sklearn.metrics import precision_score
	import pandas as pd

	train_glcm = pd.read_csv('train_glcm.csv')
	test_glcm = pd.read_csv('test_glcm.csv')
	train_lbp = pd.read_csv('train_lbp.csv')
	test_lbp = pd.read_csv('test_lbp.csv')

	X_train_glcm = train_glcm.drop('class', axis=1)
	y_train_glcm = train_glcm['class']
	X_test_glcm = test_glcm.drop('class', axis=1)
	y_test_glcm = test_glcm['class']

	X_train_lbp = train_lbp.drop('class', axis=1)
	y_train_lbp = train_lbp['class']
	X_test_lbp = test_lbp.drop('class', axis=1)
	y_test_lbp = test_lbp['class']

	# Define the parameter grid for tuning
	param_grid = {
	'C': [0.1, 1, 10, 100], # Regularization parameter
	'kernel': ['linear', 'rbf'], # Kernels to explore: linear and RBF
	'gamma': [1, 0.1, 0.01, 0.001] # Gamma values for RBF kernel
	}

	clf_glcm = SVC()

	# Set up GridSearchCV with 5-fold cross-validation
	grid_search = GridSearchCV(clf_glcm, param_grid, cv=5, scoring='accuracy', verbose=2, n_jobs=-1)

	# Perform the grid search to find the best hyperparameters
	grid_search.fit(X_train_glcm, y_train_glcm)

	# Output the best parameters from the search
	print("Best parameters for clf_glcm: ", grid_search.best_params_)

	# Use the best estimator found by GridSearchCV to make predictions
	clf_glcm = grid_search.best_estimator_

	clf_glcm.fit(X_train_glcm, y_train_glcm)
	y_pred_glcm = clf_glcm.predict(X_test_glcm)

	# calculate the accuracy
	print('Accuracy for GLCM features:', accuracy_score(y_test_glcm, y_pred_glcm))

	# calculate the precsion
	precision = precision_score(y_test_glcm, y_pred_glcm, average='weighted')
	print('Precision for GLCM features:', precision)

	clf_lbp = SVC()
	# Set up GridSearchCV with 5-fold cross-validation
	grid_search = GridSearchCV(clf_lbp, param_grid, cv=5, scoring='accuracy', verbose=2, n_jobs=-1)

	# Perform the grid search to find the best hyperparameters
	grid_search.fit(X_train_lbp, y_train_lbp)

	# Output the best parameters from the search
	print("Best parameters for clf_lbp: ", grid_search.best_params_)

	# Use the best estimator found by GridSearchCV to make predictions
	clf_lbp = grid_search.best_estimator_

	clf_lbp.fit(X_train_lbp, y_train_lbp)

	y_pred_lbp = clf_lbp.predict(X_test_lbp)

	# calculate the accuracy
	print('Accuracy for LBP features:', accuracy_score(y_test_lbp, y_pred_lbp))

	# calculate the precsion
	precision = precision_score(y_test_lbp, y_pred_lbp, average='weighted')
	print('Precision for LBP features:', precision)

	# Evaluate each classifier on the tesing set.
	# Compare the results.
	# Save the results to a CSV file.
	results = pd.DataFrame({'GLCM_accuracy': [accuracy_score(y_test_glcm, y_pred_glcm)], 'LBP_accuracy': [accuracy_score(y_test_lbp, y_pred_lbp)]})
	# Add the precision to the results
	results['GLCM_precision'] = precision_score(y_test_glcm, y_pred_glcm, average='weighted')
	results['LBP_precision'] = precision_score(y_test_lbp, y_pred_lbp, average='weighted')
	results.to_csv('results.csv', index=False)
	# save clf_glcm and clf_lbp as pickle files
	with open('clf_glcm.pkl', 'wb') as f:
	pickle.dump(clf_glcm, f)
	with open('clf_lbp.pkl', 'wb') as f:
	pickle.dump(clf_lbp, f)


	#Use plots to visualize feature distributions and decision boundaries of the classifiers clf_glcm, clf_lbp
	import matplotlib.pyplot as plt
	import seaborn as sns

	sns.pairplot(train_glcm, hue='class')
	# save the plot to a file
	plt.savefig('train_glcm_distribution.png')
	plt.close()

	sns.pairplot(train_lbp, hue='class')
	# save the plot to a file
	plt.savefig('train_lbp_distribution.png')
	plt.close()

	# Use plots to visualize feature distributions and decision boundaries of the classifiers clf_glcm, clf_lbp using t-sne
	from sklearn.manifold import TSNE

	tsne = TSNE(n_components=2)
	X_train_glcm_tsne = tsne.fit_transform(X_train_glcm)
	X_train_lbp_tsne = tsne.fit_transform(X_train_lbp)

	plt.scatter(X_train_glcm_tsne[y_train_glcm == 'grass', 0], X_train_glcm_tsne[y_train_glcm == 'grass', 1], color='red', label='grass')
	plt.scatter(X_train_glcm_tsne[y_train_glcm == 'wood', 0], X_train_glcm_tsne[y_train_glcm == 'wood', 1], color='blue', label='wood')
	plt.legend()
	plt.title('GLCM features')
	# save the plot to a file
	plt.savefig('train_glcm_tsne.png')
	plt.close()

	plt.scatter(X_train_lbp_tsne[y_train_lbp == 'grass', 0], X_train_lbp_tsne[y_train_lbp == 'grass', 1], color='red', label='grass')
	plt.scatter(X_train_lbp_tsne[y_train_lbp == 'wood', 0], X_train_lbp_tsne[y_train_lbp == 'wood', 1], color='blue', label='wood')
	plt.legend()
	plt.title('LBP features')
	# save the plot to a file
	plt.savefig('train_lbp_tsne.png')
	plt.close()

	# plot t-sne it for the testing data
	tsne = TSNE(n_components=2, perplexity=5)
	X_test_glcm_tsne = tsne.fit_transform(X_test_glcm)
	X_test_lbp_tsne = tsne.fit_transform(X_test_lbp)

	plt.scatter(X_test_glcm_tsne[y_test_glcm == 'grass', 0], X_test_glcm_tsne[y_test_glcm == 'grass', 1], color='red', label='grass')
	plt.scatter(X_test_glcm_tsne[y_test_glcm == 'wood', 0], X_test_glcm_tsne[y_test_glcm == 'wood', 1], color='blue', label='wood')
	plt.legend()
	plt.title('GLCM features')
	plt.savefig('test_glcm_tsne.png')
	plt.close()

	plt.scatter(X_test_lbp_tsne[y_test_lbp == 'grass', 0], X_test_lbp_tsne[y_test_lbp == 'grass', 1], color='red', label='grass')
	plt.scatter(X_test_lbp_tsne[y_test_lbp == 'wood', 0], X_test_lbp_tsne[y_test_lbp == 'wood', 1], color='blue', label='wood')
	plt.legend()
	plt.title('LBP features')
	plt.savefig('test_lbp_tsne.png')
	plt.close()