# save the resized image to ./grass_resized/ folder import os import cv2 import numpy as np # Resize the image to 128x128 def resize_image(image_path, save_path): img = cv2.imread(image_path) img = cv2.resize(img, (128, 128)) cv2.imwrite(save_path, img) # Do data augmentation by flipping the images horizontally on train data # Save the augmented data to the same folders def augment_image(image_path, save_path): img = cv2.imread(image_path) #flip with 50% probability if np.random.rand() > 0.5: img = cv2.flip(img, 1) #rotate by 90 degrees with 50% probability if np.random.rand() > 0.5: img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE) cv2.imwrite(save_path, img) # Compute the GLCM for each image. # Extract features like contrast, correlaton, energy, and homogeneity. # Save the features to a CSV file. # Label each feature vector with the correct class (grass or wood). import pandas as pd from skimage.feature import graycomatrix, graycoprops def compute_glcm(image_path, ispath=True): '''Compute GLCM features for an image.''' if ispath: img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) else: img = image_path # compute the GLCM properties. Distance = 3, and 4 angles: 0, 45, 90, 135 glcm = graycomatrix(img, [3], [0, np.pi/4, np.pi/2, 3*np.pi/4], 256, symmetric=True, normed=True) # extract the properties contrast = graycoprops(glcm, 'contrast') correlation = graycoprops(glcm, 'correlation') energy = graycoprops(glcm, 'energy') homogeneity = graycoprops(glcm, 'homogeneity') # return the feature vector # Flatten the arrays first contrast_flat = contrast.flatten() correlation_flat = correlation.flatten() energy_flat = energy.flatten() homogeneity_flat = homogeneity.flatten() # Calculate the mean for each GLCM feature category mean_contrast = np.mean(contrast_flat) mean_correlation = np.mean(correlation_flat) mean_energy = np.mean(energy_flat) mean_homogeneity = np.mean(homogeneity_flat) return [mean_contrast, mean_correlation, mean_energy, mean_homogeneity] # Apply the LBP operator to each image. # Generate histograms of LBP codes to create feature vectors. # Save the features to a CSV file. # Label each feature vector with the correct class (grass or wood). from skimage.feature import local_binary_pattern import pickle import warnings def compute_lbp(image_path, ispath=True): if ispath: img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) else: img = image_path lbp = local_binary_pattern(img, 8, 1, 'uniform') hist, _ = np.histogram(lbp, bins=np.arange(0, 11), density=True) return hist def classify_image(image, algorithm): # Suppress the warning about feature names warnings.filterwarnings("ignore", message="X does not have valid feature names") # Load the pre-trained classifiers clf_glcm = pickle.load(open('clf_glcm.pkl', 'rb')) clf_lbp = pickle.load(open('clf_lbp.pkl', 'rb')) # If the image is a NumPy array, it's already loaded if isinstance(image, np.ndarray): img = cv2.resize(image, (128, 128)) img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Perform classification based on the selected algorithm if algorithm == 'GLCM': features = compute_glcm(img_gray, ispath=False) else: features = compute_lbp(img_gray, ispath=False) # Convert features to a DataFrame to match the format used in training features_df = pd.DataFrame([features]) # Make predictions using the pre-trained classifiers if algorithm == 'GLCM': prediction = clf_glcm.predict(features_df)[0] else: prediction = clf_lbp.predict(features_df)[0] return prediction # If the script is run directly, perform the classification, training, and testing steps. if __name__ == '__main__': # If the images are available, resize them and save them to the appropriate folders has_pics = False has_csv = True if has_pics: # read image data from ./grass/ folder if not os.path.exists('./grass_resized/'): os.makedirs('./grass_resized/') # rename the image file to 1.jpg, 2.jpg, 3.jpg, ... count = 1 for file in os.listdir('./grass/'): if file.endswith('.jpg') or file.endswith('.jpeg') or file.endswith('.png'): resize_image('./grass/' + file, './grass_resized/' + str(count) + '.jpg') count += 1 print('Done!') # save the resized image to ./wood_resized/ folder if not os.path.exists('./wood_resized/'): os.makedirs('./wood_resized/') # rename the image file to 1.jpg, 2.jpg, 3.jpg, ... count = 1 for file in os.listdir('./wood/'): if file.endswith('.jpg') or file.endswith('.jpeg') or file.endswith('.png'): resize_image('./wood/' + file, './wood_resized/' + str(count) + '.jpg') count += 1 print('Done!') # Divide the data into training and testing data: 70% training, 30% testing # Merge grass and wood data into training and testing data # Save the training data to ./train/ folder # Save the testing data to ./test/ folder import shutil if not os.path.exists('./train/'): os.makedirs('./train/') if not os.path.exists('./test/'): os.makedirs('./test/') # Rename files so that they do not overwrite each other for i in range(1, 36): shutil.copy('./grass_resized/' + str(i) + '.jpg', './train/' + str(i) + '.jpg') for i in range(36, 51): shutil.copy('./grass_resized/' + str(i) + '.jpg', './test/' + str(i - 35) + '.jpg') for i in range(1, 36): shutil.copy('./wood_resized/' + str(i) + '.jpg', './train/' + str(i + 35) + '.jpg') for i in range(36, 51): shutil.copy('./wood_resized/' + str(i) + '.jpg', './test/' + str(i - 20) + '.jpg') for i in range(1, 36): augment_image('./train/' + str(i) + '.jpg', './train/' + str(i + 70) + '.jpg') for i in range(36, 51): augment_image('./train/' + str(i) + '.jpg', './train/' + str(i + 70) + '.jpg') # Compute the LBP for each image in the training data data = [] for i in range(1, 71): data.append(compute_lbp('./train/' + str(i) + '.jpg')) df = pd.DataFrame(data, columns=['lbp_' + str(i) for i in range(10)]) df['class'] = ['grass']*35 + ['wood']*35 df.to_csv('train_lbp.csv', index=False) # Compute the LBP for each image in the testing data data = [] for i in range(1, 31): data.append(compute_lbp('./test/' + str(i) + '.jpg')) df = pd.DataFrame(data, columns=['lbp_' + str(i) for i in range(10)]) df['class'] = ['grass']*15 + ['wood']*15 df.to_csv('test_lbp.csv', index=False) # Compute the GLCM for each image in the training data data = [] for i in range(1, 71): data.append(compute_glcm('./train/' + str(i) + '.jpg')) df = pd.DataFrame(data, columns=['contrast', 'correlation', 'energy', 'homogeneity']) df['class'] = ['grass']*35 + ['wood']*35 df.to_csv('train_glcm.csv', index=False) # Compute the GLCM for each image in the testing data data = [] for i in range(1, 31): data.append(compute_glcm('./test/' + str(i) + '.jpg')) df = pd.DataFrame(data, columns=['contrast', 'correlation', 'energy', 'homogeneity']) df['class'] = ['grass']*15 + ['wood']*15 df.to_csv('test_glcm.csv', index=False) if has_csv: # Select Support Vector Machines (SVM) as the classifier. # Train the classifier using the training data. # Test the classifier using the testing data. from sklearn.svm import SVC from sklearn.model_selection import GridSearchCV from sklearn.metrics import accuracy_score from sklearn.metrics import precision_score import pandas as pd train_glcm = pd.read_csv('train_glcm.csv') test_glcm = pd.read_csv('test_glcm.csv') train_lbp = pd.read_csv('train_lbp.csv') test_lbp = pd.read_csv('test_lbp.csv') X_train_glcm = train_glcm.drop('class', axis=1) y_train_glcm = train_glcm['class'] X_test_glcm = test_glcm.drop('class', axis=1) y_test_glcm = test_glcm['class'] X_train_lbp = train_lbp.drop('class', axis=1) y_train_lbp = train_lbp['class'] X_test_lbp = test_lbp.drop('class', axis=1) y_test_lbp = test_lbp['class'] # Define the parameter grid for tuning param_grid = { 'C': [0.1, 1, 10, 100], # Regularization parameter 'kernel': ['linear', 'rbf'], # Kernels to explore: linear and RBF 'gamma': [1, 0.1, 0.01, 0.001] # Gamma values for RBF kernel } clf_glcm = SVC() # Set up GridSearchCV with 5-fold cross-validation grid_search = GridSearchCV(clf_glcm, param_grid, cv=5, scoring='accuracy', verbose=2, n_jobs=-1) # Perform the grid search to find the best hyperparameters grid_search.fit(X_train_glcm, y_train_glcm) # Output the best parameters from the search print("Best parameters for clf_glcm: ", grid_search.best_params_) # Use the best estimator found by GridSearchCV to make predictions clf_glcm = grid_search.best_estimator_ clf_glcm.fit(X_train_glcm, y_train_glcm) y_pred_glcm = clf_glcm.predict(X_test_glcm) # calculate the accuracy print('Accuracy for GLCM features:', accuracy_score(y_test_glcm, y_pred_glcm)) # calculate the precsion precision = precision_score(y_test_glcm, y_pred_glcm, average='weighted') print('Precision for GLCM features:', precision) clf_lbp = SVC() # Set up GridSearchCV with 5-fold cross-validation grid_search = GridSearchCV(clf_lbp, param_grid, cv=5, scoring='accuracy', verbose=2, n_jobs=-1) # Perform the grid search to find the best hyperparameters grid_search.fit(X_train_lbp, y_train_lbp) # Output the best parameters from the search print("Best parameters for clf_lbp: ", grid_search.best_params_) # Use the best estimator found by GridSearchCV to make predictions clf_lbp = grid_search.best_estimator_ clf_lbp.fit(X_train_lbp, y_train_lbp) y_pred_lbp = clf_lbp.predict(X_test_lbp) # calculate the accuracy print('Accuracy for LBP features:', accuracy_score(y_test_lbp, y_pred_lbp)) # calculate the precsion precision = precision_score(y_test_lbp, y_pred_lbp, average='weighted') print('Precision for LBP features:', precision) # Evaluate each classifier on the tesing set. # Compare the results. # Save the results to a CSV file. results = pd.DataFrame({'GLCM_accuracy': [accuracy_score(y_test_glcm, y_pred_glcm)], 'LBP_accuracy': [accuracy_score(y_test_lbp, y_pred_lbp)]}) # Add the precision to the results results['GLCM_precision'] = precision_score(y_test_glcm, y_pred_glcm, average='weighted') results['LBP_precision'] = precision_score(y_test_lbp, y_pred_lbp, average='weighted') results.to_csv('results.csv', index=False) # save clf_glcm and clf_lbp as pickle files with open('clf_glcm.pkl', 'wb') as f: pickle.dump(clf_glcm, f) with open('clf_lbp.pkl', 'wb') as f: pickle.dump(clf_lbp, f) #Use plots to visualize feature distributions and decision boundaries of the classifiers clf_glcm, clf_lbp import matplotlib.pyplot as plt import seaborn as sns sns.pairplot(train_glcm, hue='class') # save the plot to a file plt.savefig('train_glcm_distribution.png') plt.close() sns.pairplot(train_lbp, hue='class') # save the plot to a file plt.savefig('train_lbp_distribution.png') plt.close() # Use plots to visualize feature distributions and decision boundaries of the classifiers clf_glcm, clf_lbp using t-sne from sklearn.manifold import TSNE tsne = TSNE(n_components=2) X_train_glcm_tsne = tsne.fit_transform(X_train_glcm) X_train_lbp_tsne = tsne.fit_transform(X_train_lbp) plt.scatter(X_train_glcm_tsne[y_train_glcm == 'grass', 0], X_train_glcm_tsne[y_train_glcm == 'grass', 1], color='red', label='grass') plt.scatter(X_train_glcm_tsne[y_train_glcm == 'wood', 0], X_train_glcm_tsne[y_train_glcm == 'wood', 1], color='blue', label='wood') plt.legend() plt.title('GLCM features') # save the plot to a file plt.savefig('train_glcm_tsne.png') plt.close() plt.scatter(X_train_lbp_tsne[y_train_lbp == 'grass', 0], X_train_lbp_tsne[y_train_lbp == 'grass', 1], color='red', label='grass') plt.scatter(X_train_lbp_tsne[y_train_lbp == 'wood', 0], X_train_lbp_tsne[y_train_lbp == 'wood', 1], color='blue', label='wood') plt.legend() plt.title('LBP features') # save the plot to a file plt.savefig('train_lbp_tsne.png') plt.close() # plot t-sne it for the testing data tsne = TSNE(n_components=2, perplexity=5) X_test_glcm_tsne = tsne.fit_transform(X_test_glcm) X_test_lbp_tsne = tsne.fit_transform(X_test_lbp) plt.scatter(X_test_glcm_tsne[y_test_glcm == 'grass', 0], X_test_glcm_tsne[y_test_glcm == 'grass', 1], color='red', label='grass') plt.scatter(X_test_glcm_tsne[y_test_glcm == 'wood', 0], X_test_glcm_tsne[y_test_glcm == 'wood', 1], color='blue', label='wood') plt.legend() plt.title('GLCM features') plt.savefig('test_glcm_tsne.png') plt.close() plt.scatter(X_test_lbp_tsne[y_test_lbp == 'grass', 0], X_test_lbp_tsne[y_test_lbp == 'grass', 1], color='red', label='grass') plt.scatter(X_test_lbp_tsne[y_test_lbp == 'wood', 0], X_test_lbp_tsne[y_test_lbp == 'wood', 1], color='blue', label='wood') plt.legend() plt.title('LBP features') plt.savefig('test_lbp_tsne.png') plt.close()