Grass_Wood_Classification / classification.py
Siyun He
update the documentation
6cd8c15
# save the resized image to ./grass_resized/ folder
import os
import cv2
import numpy as np
# Resize the image to 128x128
def resize_image(image_path, save_path):
img = cv2.imread(image_path)
img = cv2.resize(img, (128, 128))
cv2.imwrite(save_path, img)
# Do data augmentation by flipping the images horizontally on train data
# Save the augmented data to the same folders
def augment_image(image_path, save_path):
img = cv2.imread(image_path)
#flip with 50% probability
if np.random.rand() > 0.5:
img = cv2.flip(img, 1)
#rotate by 90 degrees with 50% probability
if np.random.rand() > 0.5:
img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
cv2.imwrite(save_path, img)
# Compute the GLCM for each image.
# Extract features like contrast, correlaton, energy, and homogeneity.
# Save the features to a CSV file.
# Label each feature vector with the correct class (grass or wood).
import pandas as pd
from skimage.feature import graycomatrix, graycoprops
def compute_glcm(image_path, ispath=True):
'''Compute GLCM features for an image.'''
if ispath:
img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
else:
img = image_path
# compute the GLCM properties. Distance = 3, and 4 angles: 0, 45, 90, 135
glcm = graycomatrix(img, [3], [0, np.pi/4, np.pi/2, 3*np.pi/4], 256, symmetric=True, normed=True)
# extract the properties
contrast = graycoprops(glcm, 'contrast')
correlation = graycoprops(glcm, 'correlation')
energy = graycoprops(glcm, 'energy')
homogeneity = graycoprops(glcm, 'homogeneity')
# return the feature vector
# Flatten the arrays first
contrast_flat = contrast.flatten()
correlation_flat = correlation.flatten()
energy_flat = energy.flatten()
homogeneity_flat = homogeneity.flatten()
# Calculate the mean for each GLCM feature category
mean_contrast = np.mean(contrast_flat)
mean_correlation = np.mean(correlation_flat)
mean_energy = np.mean(energy_flat)
mean_homogeneity = np.mean(homogeneity_flat)
return [mean_contrast, mean_correlation, mean_energy, mean_homogeneity]
# Apply the LBP operator to each image.
# Generate histograms of LBP codes to create feature vectors.
# Save the features to a CSV file.
# Label each feature vector with the correct class (grass or wood).
from skimage.feature import local_binary_pattern
import pickle
import warnings
def compute_lbp(image_path, ispath=True):
if ispath:
img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
else:
img = image_path
lbp = local_binary_pattern(img, 8, 1, 'uniform')
hist, _ = np.histogram(lbp, bins=np.arange(0, 11), density=True)
return hist
def classify_image(image, algorithm):
# Suppress the warning about feature names
warnings.filterwarnings("ignore", message="X does not have valid feature names")
# Load the pre-trained classifiers
clf_glcm = pickle.load(open('clf_glcm.pkl', 'rb'))
clf_lbp = pickle.load(open('clf_lbp.pkl', 'rb'))
# If the image is a NumPy array, it's already loaded
if isinstance(image, np.ndarray):
img = cv2.resize(image, (128, 128))
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Perform classification based on the selected algorithm
if algorithm == 'GLCM':
features = compute_glcm(img_gray, ispath=False)
else:
features = compute_lbp(img_gray, ispath=False)
# Convert features to a DataFrame to match the format used in training
features_df = pd.DataFrame([features])
# Make predictions using the pre-trained classifiers
if algorithm == 'GLCM':
prediction = clf_glcm.predict(features_df)[0]
else:
prediction = clf_lbp.predict(features_df)[0]
return prediction
# If the script is run directly, perform the classification, training, and testing steps.
if __name__ == '__main__':
# If the images are available, resize them and save them to the appropriate folders
has_pics = False
has_csv = True
if has_pics:
# read image data from ./grass/ folder
if not os.path.exists('./grass_resized/'):
os.makedirs('./grass_resized/')
# rename the image file to 1.jpg, 2.jpg, 3.jpg, ...
count = 1
for file in os.listdir('./grass/'):
if file.endswith('.jpg') or file.endswith('.jpeg') or file.endswith('.png'):
resize_image('./grass/' + file, './grass_resized/' + str(count) + '.jpg')
count += 1
print('Done!')
# save the resized image to ./wood_resized/ folder
if not os.path.exists('./wood_resized/'):
os.makedirs('./wood_resized/')
# rename the image file to 1.jpg, 2.jpg, 3.jpg, ...
count = 1
for file in os.listdir('./wood/'):
if file.endswith('.jpg') or file.endswith('.jpeg') or file.endswith('.png'):
resize_image('./wood/' + file, './wood_resized/' + str(count) + '.jpg')
count += 1
print('Done!')
# Divide the data into training and testing data: 70% training, 30% testing
# Merge grass and wood data into training and testing data
# Save the training data to ./train/ folder
# Save the testing data to ./test/ folder
import shutil
if not os.path.exists('./train/'):
os.makedirs('./train/')
if not os.path.exists('./test/'):
os.makedirs('./test/')
# Rename files so that they do not overwrite each other
for i in range(1, 36):
shutil.copy('./grass_resized/' + str(i) + '.jpg', './train/' + str(i) + '.jpg')
for i in range(36, 51):
shutil.copy('./grass_resized/' +
str(i) + '.jpg', './test/' + str(i - 35) + '.jpg')
for i in range(1, 36):
shutil.copy('./wood_resized/' + str(i) + '.jpg', './train/' + str(i + 35) + '.jpg')
for i in range(36, 51):
shutil.copy('./wood_resized/' +
str(i) + '.jpg', './test/' + str(i - 20) + '.jpg')
for i in range(1, 36):
augment_image('./train/' + str(i) + '.jpg', './train/' + str(i + 70) + '.jpg')
for i in range(36, 51):
augment_image('./train/' + str(i) + '.jpg', './train/' + str(i + 70) + '.jpg')
# Compute the LBP for each image in the training data
data = []
for i in range(1, 71):
data.append(compute_lbp('./train/' + str(i) + '.jpg'))
df = pd.DataFrame(data, columns=['lbp_' + str(i) for i in range(10)])
df['class'] = ['grass']*35 + ['wood']*35
df.to_csv('train_lbp.csv', index=False)
# Compute the LBP for each image in the testing data
data = []
for i in range(1, 31):
data.append(compute_lbp('./test/' + str(i) + '.jpg'))
df = pd.DataFrame(data, columns=['lbp_' + str(i) for i in range(10)])
df['class'] = ['grass']*15 + ['wood']*15
df.to_csv('test_lbp.csv', index=False)
# Compute the GLCM for each image in the training data
data = []
for i in range(1, 71):
data.append(compute_glcm('./train/' + str(i) + '.jpg'))
df = pd.DataFrame(data, columns=['contrast', 'correlation', 'energy', 'homogeneity'])
df['class'] = ['grass']*35 + ['wood']*35
df.to_csv('train_glcm.csv', index=False)
# Compute the GLCM for each image in the testing data
data = []
for i in range(1, 31):
data.append(compute_glcm('./test/' + str(i) + '.jpg'))
df = pd.DataFrame(data, columns=['contrast', 'correlation', 'energy', 'homogeneity'])
df['class'] = ['grass']*15 + ['wood']*15
df.to_csv('test_glcm.csv', index=False)
if has_csv:
# Select Support Vector Machines (SVM) as the classifier.
# Train the classifier using the training data.
# Test the classifier using the testing data.
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
import pandas as pd
train_glcm = pd.read_csv('train_glcm.csv')
test_glcm = pd.read_csv('test_glcm.csv')
train_lbp = pd.read_csv('train_lbp.csv')
test_lbp = pd.read_csv('test_lbp.csv')
X_train_glcm = train_glcm.drop('class', axis=1)
y_train_glcm = train_glcm['class']
X_test_glcm = test_glcm.drop('class', axis=1)
y_test_glcm = test_glcm['class']
X_train_lbp = train_lbp.drop('class', axis=1)
y_train_lbp = train_lbp['class']
X_test_lbp = test_lbp.drop('class', axis=1)
y_test_lbp = test_lbp['class']
# Define the parameter grid for tuning
param_grid = {
'C': [0.1, 1, 10, 100], # Regularization parameter
'kernel': ['linear', 'rbf'], # Kernels to explore: linear and RBF
'gamma': [1, 0.1, 0.01, 0.001] # Gamma values for RBF kernel
}
clf_glcm = SVC()
# Set up GridSearchCV with 5-fold cross-validation
grid_search = GridSearchCV(clf_glcm, param_grid, cv=5, scoring='accuracy', verbose=2, n_jobs=-1)
# Perform the grid search to find the best hyperparameters
grid_search.fit(X_train_glcm, y_train_glcm)
# Output the best parameters from the search
print("Best parameters for clf_glcm: ", grid_search.best_params_)
# Use the best estimator found by GridSearchCV to make predictions
clf_glcm = grid_search.best_estimator_
clf_glcm.fit(X_train_glcm, y_train_glcm)
y_pred_glcm = clf_glcm.predict(X_test_glcm)
# calculate the accuracy
print('Accuracy for GLCM features:', accuracy_score(y_test_glcm, y_pred_glcm))
# calculate the precsion
precision = precision_score(y_test_glcm, y_pred_glcm, average='weighted')
print('Precision for GLCM features:', precision)
clf_lbp = SVC()
# Set up GridSearchCV with 5-fold cross-validation
grid_search = GridSearchCV(clf_lbp, param_grid, cv=5, scoring='accuracy', verbose=2, n_jobs=-1)
# Perform the grid search to find the best hyperparameters
grid_search.fit(X_train_lbp, y_train_lbp)
# Output the best parameters from the search
print("Best parameters for clf_lbp: ", grid_search.best_params_)
# Use the best estimator found by GridSearchCV to make predictions
clf_lbp = grid_search.best_estimator_
clf_lbp.fit(X_train_lbp, y_train_lbp)
y_pred_lbp = clf_lbp.predict(X_test_lbp)
# calculate the accuracy
print('Accuracy for LBP features:', accuracy_score(y_test_lbp, y_pred_lbp))
# calculate the precsion
precision = precision_score(y_test_lbp, y_pred_lbp, average='weighted')
print('Precision for LBP features:', precision)
# Evaluate each classifier on the tesing set.
# Compare the results.
# Save the results to a CSV file.
results = pd.DataFrame({'GLCM_accuracy': [accuracy_score(y_test_glcm, y_pred_glcm)], 'LBP_accuracy': [accuracy_score(y_test_lbp, y_pred_lbp)]})
# Add the precision to the results
results['GLCM_precision'] = precision_score(y_test_glcm, y_pred_glcm, average='weighted')
results['LBP_precision'] = precision_score(y_test_lbp, y_pred_lbp, average='weighted')
results.to_csv('results.csv', index=False)
# save clf_glcm and clf_lbp as pickle files
with open('clf_glcm.pkl', 'wb') as f:
pickle.dump(clf_glcm, f)
with open('clf_lbp.pkl', 'wb') as f:
pickle.dump(clf_lbp, f)
#Use plots to visualize feature distributions and decision boundaries of the classifiers clf_glcm, clf_lbp
import matplotlib.pyplot as plt
import seaborn as sns
sns.pairplot(train_glcm, hue='class')
# save the plot to a file
plt.savefig('train_glcm_distribution.png')
plt.close()
sns.pairplot(train_lbp, hue='class')
# save the plot to a file
plt.savefig('train_lbp_distribution.png')
plt.close()
# Use plots to visualize feature distributions and decision boundaries of the classifiers clf_glcm, clf_lbp using t-sne
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2)
X_train_glcm_tsne = tsne.fit_transform(X_train_glcm)
X_train_lbp_tsne = tsne.fit_transform(X_train_lbp)
plt.scatter(X_train_glcm_tsne[y_train_glcm == 'grass', 0], X_train_glcm_tsne[y_train_glcm == 'grass', 1], color='red', label='grass')
plt.scatter(X_train_glcm_tsne[y_train_glcm == 'wood', 0], X_train_glcm_tsne[y_train_glcm == 'wood', 1], color='blue', label='wood')
plt.legend()
plt.title('GLCM features')
# save the plot to a file
plt.savefig('train_glcm_tsne.png')
plt.close()
plt.scatter(X_train_lbp_tsne[y_train_lbp == 'grass', 0], X_train_lbp_tsne[y_train_lbp == 'grass', 1], color='red', label='grass')
plt.scatter(X_train_lbp_tsne[y_train_lbp == 'wood', 0], X_train_lbp_tsne[y_train_lbp == 'wood', 1], color='blue', label='wood')
plt.legend()
plt.title('LBP features')
# save the plot to a file
plt.savefig('train_lbp_tsne.png')
plt.close()
# plot t-sne it for the testing data
tsne = TSNE(n_components=2, perplexity=5)
X_test_glcm_tsne = tsne.fit_transform(X_test_glcm)
X_test_lbp_tsne = tsne.fit_transform(X_test_lbp)
plt.scatter(X_test_glcm_tsne[y_test_glcm == 'grass', 0], X_test_glcm_tsne[y_test_glcm == 'grass', 1], color='red', label='grass')
plt.scatter(X_test_glcm_tsne[y_test_glcm == 'wood', 0], X_test_glcm_tsne[y_test_glcm == 'wood', 1], color='blue', label='wood')
plt.legend()
plt.title('GLCM features')
plt.savefig('test_glcm_tsne.png')
plt.close()
plt.scatter(X_test_lbp_tsne[y_test_lbp == 'grass', 0], X_test_lbp_tsne[y_test_lbp == 'grass', 1], color='red', label='grass')
plt.scatter(X_test_lbp_tsne[y_test_lbp == 'wood', 0], X_test_lbp_tsne[y_test_lbp == 'wood', 1], color='blue', label='wood')
plt.legend()
plt.title('LBP features')
plt.savefig('test_lbp_tsne.png')
plt.close()