Spaces:
Sleeping
Sleeping
| # save the resized image to ./grass_resized/ folder | |
| import os | |
| import cv2 | |
| import numpy as np | |
| # Resize the image to 128x128 | |
| def resize_image(image_path, save_path): | |
| img = cv2.imread(image_path) | |
| img = cv2.resize(img, (128, 128)) | |
| cv2.imwrite(save_path, img) | |
| # Do data augmentation by flipping the images horizontally on train data | |
| # Save the augmented data to the same folders | |
| def augment_image(image_path, save_path): | |
| img = cv2.imread(image_path) | |
| #flip with 50% probability | |
| if np.random.rand() > 0.5: | |
| img = cv2.flip(img, 1) | |
| #rotate by 90 degrees with 50% probability | |
| if np.random.rand() > 0.5: | |
| img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE) | |
| cv2.imwrite(save_path, img) | |
| # Compute the GLCM for each image. | |
| # Extract features like contrast, correlaton, energy, and homogeneity. | |
| # Save the features to a CSV file. | |
| # Label each feature vector with the correct class (grass or wood). | |
| import pandas as pd | |
| from skimage.feature import graycomatrix, graycoprops | |
| def compute_glcm(image_path, ispath=True): | |
| '''Compute GLCM features for an image.''' | |
| if ispath: | |
| img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) | |
| else: | |
| img = image_path | |
| # compute the GLCM properties. Distance = 3, and 4 angles: 0, 45, 90, 135 | |
| glcm = graycomatrix(img, [3], [0, np.pi/4, np.pi/2, 3*np.pi/4], 256, symmetric=True, normed=True) | |
| # extract the properties | |
| contrast = graycoprops(glcm, 'contrast') | |
| correlation = graycoprops(glcm, 'correlation') | |
| energy = graycoprops(glcm, 'energy') | |
| homogeneity = graycoprops(glcm, 'homogeneity') | |
| # return the feature vector | |
| # Flatten the arrays first | |
| contrast_flat = contrast.flatten() | |
| correlation_flat = correlation.flatten() | |
| energy_flat = energy.flatten() | |
| homogeneity_flat = homogeneity.flatten() | |
| # Calculate the mean for each GLCM feature category | |
| mean_contrast = np.mean(contrast_flat) | |
| mean_correlation = np.mean(correlation_flat) | |
| mean_energy = np.mean(energy_flat) | |
| mean_homogeneity = np.mean(homogeneity_flat) | |
| return [mean_contrast, mean_correlation, mean_energy, mean_homogeneity] | |
| # Apply the LBP operator to each image. | |
| # Generate histograms of LBP codes to create feature vectors. | |
| # Save the features to a CSV file. | |
| # Label each feature vector with the correct class (grass or wood). | |
| from skimage.feature import local_binary_pattern | |
| import pickle | |
| import warnings | |
| def compute_lbp(image_path, ispath=True): | |
| if ispath: | |
| img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) | |
| else: | |
| img = image_path | |
| lbp = local_binary_pattern(img, 8, 1, 'uniform') | |
| hist, _ = np.histogram(lbp, bins=np.arange(0, 11), density=True) | |
| return hist | |
| def classify_image(image, algorithm): | |
| # Suppress the warning about feature names | |
| warnings.filterwarnings("ignore", message="X does not have valid feature names") | |
| # Load the pre-trained classifiers | |
| clf_glcm = pickle.load(open('clf_glcm.pkl', 'rb')) | |
| clf_lbp = pickle.load(open('clf_lbp.pkl', 'rb')) | |
| # If the image is a NumPy array, it's already loaded | |
| if isinstance(image, np.ndarray): | |
| img = cv2.resize(image, (128, 128)) | |
| img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
| # Perform classification based on the selected algorithm | |
| if algorithm == 'GLCM': | |
| features = compute_glcm(img_gray, ispath=False) | |
| else: | |
| features = compute_lbp(img_gray, ispath=False) | |
| # Convert features to a DataFrame to match the format used in training | |
| features_df = pd.DataFrame([features]) | |
| # Make predictions using the pre-trained classifiers | |
| if algorithm == 'GLCM': | |
| prediction = clf_glcm.predict(features_df)[0] | |
| else: | |
| prediction = clf_lbp.predict(features_df)[0] | |
| return prediction | |
| # If the script is run directly, perform the classification, training, and testing steps. | |
| if __name__ == '__main__': | |
| # If the images are available, resize them and save them to the appropriate folders | |
| has_pics = False | |
| has_csv = True | |
| if has_pics: | |
| # read image data from ./grass/ folder | |
| if not os.path.exists('./grass_resized/'): | |
| os.makedirs('./grass_resized/') | |
| # rename the image file to 1.jpg, 2.jpg, 3.jpg, ... | |
| count = 1 | |
| for file in os.listdir('./grass/'): | |
| if file.endswith('.jpg') or file.endswith('.jpeg') or file.endswith('.png'): | |
| resize_image('./grass/' + file, './grass_resized/' + str(count) + '.jpg') | |
| count += 1 | |
| print('Done!') | |
| # save the resized image to ./wood_resized/ folder | |
| if not os.path.exists('./wood_resized/'): | |
| os.makedirs('./wood_resized/') | |
| # rename the image file to 1.jpg, 2.jpg, 3.jpg, ... | |
| count = 1 | |
| for file in os.listdir('./wood/'): | |
| if file.endswith('.jpg') or file.endswith('.jpeg') or file.endswith('.png'): | |
| resize_image('./wood/' + file, './wood_resized/' + str(count) + '.jpg') | |
| count += 1 | |
| print('Done!') | |
| # Divide the data into training and testing data: 70% training, 30% testing | |
| # Merge grass and wood data into training and testing data | |
| # Save the training data to ./train/ folder | |
| # Save the testing data to ./test/ folder | |
| import shutil | |
| if not os.path.exists('./train/'): | |
| os.makedirs('./train/') | |
| if not os.path.exists('./test/'): | |
| os.makedirs('./test/') | |
| # Rename files so that they do not overwrite each other | |
| for i in range(1, 36): | |
| shutil.copy('./grass_resized/' + str(i) + '.jpg', './train/' + str(i) + '.jpg') | |
| for i in range(36, 51): | |
| shutil.copy('./grass_resized/' + | |
| str(i) + '.jpg', './test/' + str(i - 35) + '.jpg') | |
| for i in range(1, 36): | |
| shutil.copy('./wood_resized/' + str(i) + '.jpg', './train/' + str(i + 35) + '.jpg') | |
| for i in range(36, 51): | |
| shutil.copy('./wood_resized/' + | |
| str(i) + '.jpg', './test/' + str(i - 20) + '.jpg') | |
| for i in range(1, 36): | |
| augment_image('./train/' + str(i) + '.jpg', './train/' + str(i + 70) + '.jpg') | |
| for i in range(36, 51): | |
| augment_image('./train/' + str(i) + '.jpg', './train/' + str(i + 70) + '.jpg') | |
| # Compute the LBP for each image in the training data | |
| data = [] | |
| for i in range(1, 71): | |
| data.append(compute_lbp('./train/' + str(i) + '.jpg')) | |
| df = pd.DataFrame(data, columns=['lbp_' + str(i) for i in range(10)]) | |
| df['class'] = ['grass']*35 + ['wood']*35 | |
| df.to_csv('train_lbp.csv', index=False) | |
| # Compute the LBP for each image in the testing data | |
| data = [] | |
| for i in range(1, 31): | |
| data.append(compute_lbp('./test/' + str(i) + '.jpg')) | |
| df = pd.DataFrame(data, columns=['lbp_' + str(i) for i in range(10)]) | |
| df['class'] = ['grass']*15 + ['wood']*15 | |
| df.to_csv('test_lbp.csv', index=False) | |
| # Compute the GLCM for each image in the training data | |
| data = [] | |
| for i in range(1, 71): | |
| data.append(compute_glcm('./train/' + str(i) + '.jpg')) | |
| df = pd.DataFrame(data, columns=['contrast', 'correlation', 'energy', 'homogeneity']) | |
| df['class'] = ['grass']*35 + ['wood']*35 | |
| df.to_csv('train_glcm.csv', index=False) | |
| # Compute the GLCM for each image in the testing data | |
| data = [] | |
| for i in range(1, 31): | |
| data.append(compute_glcm('./test/' + str(i) + '.jpg')) | |
| df = pd.DataFrame(data, columns=['contrast', 'correlation', 'energy', 'homogeneity']) | |
| df['class'] = ['grass']*15 + ['wood']*15 | |
| df.to_csv('test_glcm.csv', index=False) | |
| if has_csv: | |
| # Select Support Vector Machines (SVM) as the classifier. | |
| # Train the classifier using the training data. | |
| # Test the classifier using the testing data. | |
| from sklearn.svm import SVC | |
| from sklearn.model_selection import GridSearchCV | |
| from sklearn.metrics import accuracy_score | |
| from sklearn.metrics import precision_score | |
| import pandas as pd | |
| train_glcm = pd.read_csv('train_glcm.csv') | |
| test_glcm = pd.read_csv('test_glcm.csv') | |
| train_lbp = pd.read_csv('train_lbp.csv') | |
| test_lbp = pd.read_csv('test_lbp.csv') | |
| X_train_glcm = train_glcm.drop('class', axis=1) | |
| y_train_glcm = train_glcm['class'] | |
| X_test_glcm = test_glcm.drop('class', axis=1) | |
| y_test_glcm = test_glcm['class'] | |
| X_train_lbp = train_lbp.drop('class', axis=1) | |
| y_train_lbp = train_lbp['class'] | |
| X_test_lbp = test_lbp.drop('class', axis=1) | |
| y_test_lbp = test_lbp['class'] | |
| # Define the parameter grid for tuning | |
| param_grid = { | |
| 'C': [0.1, 1, 10, 100], # Regularization parameter | |
| 'kernel': ['linear', 'rbf'], # Kernels to explore: linear and RBF | |
| 'gamma': [1, 0.1, 0.01, 0.001] # Gamma values for RBF kernel | |
| } | |
| clf_glcm = SVC() | |
| # Set up GridSearchCV with 5-fold cross-validation | |
| grid_search = GridSearchCV(clf_glcm, param_grid, cv=5, scoring='accuracy', verbose=2, n_jobs=-1) | |
| # Perform the grid search to find the best hyperparameters | |
| grid_search.fit(X_train_glcm, y_train_glcm) | |
| # Output the best parameters from the search | |
| print("Best parameters for clf_glcm: ", grid_search.best_params_) | |
| # Use the best estimator found by GridSearchCV to make predictions | |
| clf_glcm = grid_search.best_estimator_ | |
| clf_glcm.fit(X_train_glcm, y_train_glcm) | |
| y_pred_glcm = clf_glcm.predict(X_test_glcm) | |
| # calculate the accuracy | |
| print('Accuracy for GLCM features:', accuracy_score(y_test_glcm, y_pred_glcm)) | |
| # calculate the precsion | |
| precision = precision_score(y_test_glcm, y_pred_glcm, average='weighted') | |
| print('Precision for GLCM features:', precision) | |
| clf_lbp = SVC() | |
| # Set up GridSearchCV with 5-fold cross-validation | |
| grid_search = GridSearchCV(clf_lbp, param_grid, cv=5, scoring='accuracy', verbose=2, n_jobs=-1) | |
| # Perform the grid search to find the best hyperparameters | |
| grid_search.fit(X_train_lbp, y_train_lbp) | |
| # Output the best parameters from the search | |
| print("Best parameters for clf_lbp: ", grid_search.best_params_) | |
| # Use the best estimator found by GridSearchCV to make predictions | |
| clf_lbp = grid_search.best_estimator_ | |
| clf_lbp.fit(X_train_lbp, y_train_lbp) | |
| y_pred_lbp = clf_lbp.predict(X_test_lbp) | |
| # calculate the accuracy | |
| print('Accuracy for LBP features:', accuracy_score(y_test_lbp, y_pred_lbp)) | |
| # calculate the precsion | |
| precision = precision_score(y_test_lbp, y_pred_lbp, average='weighted') | |
| print('Precision for LBP features:', precision) | |
| # Evaluate each classifier on the tesing set. | |
| # Compare the results. | |
| # Save the results to a CSV file. | |
| results = pd.DataFrame({'GLCM_accuracy': [accuracy_score(y_test_glcm, y_pred_glcm)], 'LBP_accuracy': [accuracy_score(y_test_lbp, y_pred_lbp)]}) | |
| # Add the precision to the results | |
| results['GLCM_precision'] = precision_score(y_test_glcm, y_pred_glcm, average='weighted') | |
| results['LBP_precision'] = precision_score(y_test_lbp, y_pred_lbp, average='weighted') | |
| results.to_csv('results.csv', index=False) | |
| # save clf_glcm and clf_lbp as pickle files | |
| with open('clf_glcm.pkl', 'wb') as f: | |
| pickle.dump(clf_glcm, f) | |
| with open('clf_lbp.pkl', 'wb') as f: | |
| pickle.dump(clf_lbp, f) | |
| #Use plots to visualize feature distributions and decision boundaries of the classifiers clf_glcm, clf_lbp | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| sns.pairplot(train_glcm, hue='class') | |
| # save the plot to a file | |
| plt.savefig('train_glcm_distribution.png') | |
| plt.close() | |
| sns.pairplot(train_lbp, hue='class') | |
| # save the plot to a file | |
| plt.savefig('train_lbp_distribution.png') | |
| plt.close() | |
| # Use plots to visualize feature distributions and decision boundaries of the classifiers clf_glcm, clf_lbp using t-sne | |
| from sklearn.manifold import TSNE | |
| tsne = TSNE(n_components=2) | |
| X_train_glcm_tsne = tsne.fit_transform(X_train_glcm) | |
| X_train_lbp_tsne = tsne.fit_transform(X_train_lbp) | |
| plt.scatter(X_train_glcm_tsne[y_train_glcm == 'grass', 0], X_train_glcm_tsne[y_train_glcm == 'grass', 1], color='red', label='grass') | |
| plt.scatter(X_train_glcm_tsne[y_train_glcm == 'wood', 0], X_train_glcm_tsne[y_train_glcm == 'wood', 1], color='blue', label='wood') | |
| plt.legend() | |
| plt.title('GLCM features') | |
| # save the plot to a file | |
| plt.savefig('train_glcm_tsne.png') | |
| plt.close() | |
| plt.scatter(X_train_lbp_tsne[y_train_lbp == 'grass', 0], X_train_lbp_tsne[y_train_lbp == 'grass', 1], color='red', label='grass') | |
| plt.scatter(X_train_lbp_tsne[y_train_lbp == 'wood', 0], X_train_lbp_tsne[y_train_lbp == 'wood', 1], color='blue', label='wood') | |
| plt.legend() | |
| plt.title('LBP features') | |
| # save the plot to a file | |
| plt.savefig('train_lbp_tsne.png') | |
| plt.close() | |
| # plot t-sne it for the testing data | |
| tsne = TSNE(n_components=2, perplexity=5) | |
| X_test_glcm_tsne = tsne.fit_transform(X_test_glcm) | |
| X_test_lbp_tsne = tsne.fit_transform(X_test_lbp) | |
| plt.scatter(X_test_glcm_tsne[y_test_glcm == 'grass', 0], X_test_glcm_tsne[y_test_glcm == 'grass', 1], color='red', label='grass') | |
| plt.scatter(X_test_glcm_tsne[y_test_glcm == 'wood', 0], X_test_glcm_tsne[y_test_glcm == 'wood', 1], color='blue', label='wood') | |
| plt.legend() | |
| plt.title('GLCM features') | |
| plt.savefig('test_glcm_tsne.png') | |
| plt.close() | |
| plt.scatter(X_test_lbp_tsne[y_test_lbp == 'grass', 0], X_test_lbp_tsne[y_test_lbp == 'grass', 1], color='red', label='grass') | |
| plt.scatter(X_test_lbp_tsne[y_test_lbp == 'wood', 0], X_test_lbp_tsne[y_test_lbp == 'wood', 1], color='blue', label='wood') | |
| plt.legend() | |
| plt.title('LBP features') | |
| plt.savefig('test_lbp_tsne.png') | |
| plt.close() |