Spaces:

SiyunHE
/

Grass_Wood_Classification

Sleeping

File size: 14,511 Bytes

# save the resized image to ./grass_resized/ folder
import os
import cv2
import numpy as np

# Resize the image to 128x128
def resize_image(image_path, save_path):
    img = cv2.imread(image_path)
    img = cv2.resize(img, (128, 128))
    cv2.imwrite(save_path, img)

# Do data augmentation by flipping the images horizontally on train data
# Save the augmented data to the same folders
def augment_image(image_path, save_path):
    img = cv2.imread(image_path)
    #flip with 50% probability
    if np.random.rand() > 0.5:
        img = cv2.flip(img, 1)
    #rotate by 90 degrees with 50% probability
    if np.random.rand() > 0.5:
        img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
    cv2.imwrite(save_path, img)
    
# Compute the GLCM for each image.
# Extract features like contrast, correlaton, energy, and homogeneity.
# Save the features to a CSV file.
# Label each feature vector with the correct class (grass or wood).
import pandas as pd
from skimage.feature import graycomatrix, graycoprops

def compute_glcm(image_path, ispath=True):
    '''Compute GLCM features for an image.'''
    if ispath:
        img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    else:
        img = image_path
    # compute the GLCM properties. Distance = 3, and 4 angles: 0, 45, 90, 135
    glcm = graycomatrix(img, [3], [0, np.pi/4, np.pi/2, 3*np.pi/4], 256, symmetric=True, normed=True)
    # extract the properties
    contrast = graycoprops(glcm, 'contrast')
    correlation = graycoprops(glcm, 'correlation')
    energy = graycoprops(glcm, 'energy')
    homogeneity = graycoprops(glcm, 'homogeneity')    
    # return the feature vector
    # Flatten the arrays first
    contrast_flat = contrast.flatten()
    correlation_flat = correlation.flatten()
    energy_flat = energy.flatten()
    homogeneity_flat = homogeneity.flatten()

    # Calculate the mean for each GLCM feature category
    mean_contrast = np.mean(contrast_flat)
    mean_correlation = np.mean(correlation_flat)
    mean_energy = np.mean(energy_flat)
    mean_homogeneity = np.mean(homogeneity_flat)
    return [mean_contrast, mean_correlation, mean_energy, mean_homogeneity]

# Apply the LBP operator to each image.
# Generate histograms of LBP codes to create feature vectors.
# Save the features to a CSV file.
# Label each feature vector with the correct class (grass or wood).
from skimage.feature import local_binary_pattern
import pickle
import warnings

def compute_lbp(image_path, ispath=True):
    if ispath:
        img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    else:
        img = image_path
    lbp = local_binary_pattern(img, 8, 1, 'uniform')
    hist, _ = np.histogram(lbp, bins=np.arange(0, 11), density=True)
    return hist

def classify_image(image, algorithm):
    # Suppress the warning about feature names
    warnings.filterwarnings("ignore", message="X does not have valid feature names")
    
    # Load the pre-trained classifiers
    clf_glcm = pickle.load(open('clf_glcm.pkl', 'rb'))
    clf_lbp = pickle.load(open('clf_lbp.pkl', 'rb'))

    # If the image is a NumPy array, it's already loaded
    if isinstance(image, np.ndarray):
        img = cv2.resize(image, (128, 128))
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Perform classification based on the selected algorithm
    if algorithm == 'GLCM':
        features = compute_glcm(img_gray, ispath=False)
    else:
        features = compute_lbp(img_gray, ispath=False)
    
    # Convert features to a DataFrame to match the format used in training
    features_df = pd.DataFrame([features])
    
    # Make predictions using the pre-trained classifiers
    if algorithm == 'GLCM':
        prediction = clf_glcm.predict(features_df)[0]
    else:
        prediction = clf_lbp.predict(features_df)[0]
    
    return prediction

# If the script is run directly, perform the classification, training, and testing steps. 
if __name__ == '__main__':
    # If the images are available, resize them and save them to the appropriate folders
    has_pics = False
    has_csv = True
    if has_pics:
        # read image data from ./grass/ folder
        if not os.path.exists('./grass_resized/'):
            os.makedirs('./grass_resized/')

        # rename the image file to 1.jpg, 2.jpg, 3.jpg, ...
        count = 1
        for file in os.listdir('./grass/'):
            if file.endswith('.jpg') or file.endswith('.jpeg') or file.endswith('.png'):
                resize_image('./grass/' + file, './grass_resized/' + str(count) + '.jpg')
                count += 1

        print('Done!')

        # save the resized image to ./wood_resized/ folder
        if not os.path.exists('./wood_resized/'):
            os.makedirs('./wood_resized/')

        # rename the image file to 1.jpg, 2.jpg, 3.jpg, ...
        count = 1
        for file in os.listdir('./wood/'):
            if file.endswith('.jpg') or file.endswith('.jpeg') or file.endswith('.png'):
                resize_image('./wood/' + file, './wood_resized/' + str(count) + '.jpg')
                count += 1
            
        print('Done!')

        # Divide the data into training and testing data: 70% training, 30% testing
        # Merge grass and wood data into training and testing data
        # Save the training data to ./train/ folder
        # Save the testing data to ./test/ folder
        import shutil

        if not os.path.exists('./train/'):
            os.makedirs('./train/')
        if not os.path.exists('./test/'):
            os.makedirs('./test/')

        # Rename files so that they do not overwrite each other
        for i in range(1, 36):
            shutil.copy('./grass_resized/' + str(i) + '.jpg', './train/' + str(i) + '.jpg')
        for i in range(36, 51):
            shutil.copy('./grass_resized/' +
                        str(i) + '.jpg', './test/' + str(i - 35) + '.jpg')
        for i in range(1, 36):
            shutil.copy('./wood_resized/' + str(i) + '.jpg', './train/' + str(i + 35) + '.jpg')
        for i in range(36, 51):
            shutil.copy('./wood_resized/' +
                        str(i) + '.jpg', './test/' + str(i - 20) + '.jpg')


        for i in range(1, 36):
            augment_image('./train/' + str(i) + '.jpg', './train/' + str(i + 70) + '.jpg')
        for i in range(36, 51):
            augment_image('./train/' + str(i) + '.jpg', './train/' + str(i + 70) + '.jpg')

        # Compute the LBP for each image in the training data
        data = []
        for i in range(1, 71):
            data.append(compute_lbp('./train/' + str(i) + '.jpg'))
        df = pd.DataFrame(data, columns=['lbp_' + str(i) for i in range(10)])
        df['class'] = ['grass']*35 + ['wood']*35
        df.to_csv('train_lbp.csv', index=False)

        # Compute the LBP for each image in the testing data
        data = []
        for i in range(1, 31):
            data.append(compute_lbp('./test/' + str(i) + '.jpg'))
        df = pd.DataFrame(data, columns=['lbp_' + str(i) for i in range(10)])
        df['class'] = ['grass']*15 + ['wood']*15
        df.to_csv('test_lbp.csv', index=False)

        # Compute the GLCM for each image in the training data
        data = []
        for i in range(1, 71):
            data.append(compute_glcm('./train/' + str(i) + '.jpg'))
        df = pd.DataFrame(data, columns=['contrast', 'correlation', 'energy', 'homogeneity'])
        df['class'] = ['grass']*35 + ['wood']*35
        df.to_csv('train_glcm.csv', index=False)

        # Compute the GLCM for each image in the testing data
        data = []
        for i in range(1, 31):
            data.append(compute_glcm('./test/' + str(i) + '.jpg'))
        df = pd.DataFrame(data, columns=['contrast', 'correlation', 'energy', 'homogeneity'])
        df['class'] = ['grass']*15 + ['wood']*15
        df.to_csv('test_glcm.csv', index=False)

    if has_csv:

        # Select Support Vector Machines (SVM) as the classifier.
        # Train the classifier using the training data.
        # Test the classifier using the testing data.
        from sklearn.svm import SVC
        from sklearn.model_selection import GridSearchCV
        from sklearn.metrics import accuracy_score
        from sklearn.metrics import precision_score
        import pandas as pd

        train_glcm = pd.read_csv('train_glcm.csv')
        test_glcm = pd.read_csv('test_glcm.csv')
        train_lbp = pd.read_csv('train_lbp.csv')
        test_lbp = pd.read_csv('test_lbp.csv')

        X_train_glcm = train_glcm.drop('class', axis=1)
        y_train_glcm = train_glcm['class']
        X_test_glcm = test_glcm.drop('class', axis=1)
        y_test_glcm = test_glcm['class']

        X_train_lbp = train_lbp.drop('class', axis=1)
        y_train_lbp = train_lbp['class']
        X_test_lbp = test_lbp.drop('class', axis=1)
        y_test_lbp = test_lbp['class']

        # Define the parameter grid for tuning
        param_grid = {
            'C': [0.1, 1, 10, 100],         # Regularization parameter
            'kernel': ['linear', 'rbf'],     # Kernels to explore: linear and RBF
            'gamma': [1, 0.1, 0.01, 0.001]  # Gamma values for RBF kernel
        }

        clf_glcm = SVC()

        # Set up GridSearchCV with 5-fold cross-validation
        grid_search = GridSearchCV(clf_glcm, param_grid, cv=5, scoring='accuracy', verbose=2, n_jobs=-1)

        # Perform the grid search to find the best hyperparameters
        grid_search.fit(X_train_glcm, y_train_glcm)

        # Output the best parameters from the search
        print("Best parameters for clf_glcm: ", grid_search.best_params_)

        # Use the best estimator found by GridSearchCV to make predictions
        clf_glcm = grid_search.best_estimator_

        clf_glcm.fit(X_train_glcm, y_train_glcm)
        y_pred_glcm = clf_glcm.predict(X_test_glcm)

        # calculate the accuracy
        print('Accuracy for GLCM features:', accuracy_score(y_test_glcm, y_pred_glcm))
        
        # calculate the precsion
        precision = precision_score(y_test_glcm, y_pred_glcm, average='weighted')
        print('Precision for GLCM features:', precision)

        clf_lbp = SVC()
        # Set up GridSearchCV with 5-fold cross-validation
        grid_search = GridSearchCV(clf_lbp, param_grid, cv=5, scoring='accuracy', verbose=2, n_jobs=-1)

        # Perform the grid search to find the best hyperparameters
        grid_search.fit(X_train_lbp, y_train_lbp)

        # Output the best parameters from the search
        print("Best parameters for clf_lbp: ", grid_search.best_params_)

        # Use the best estimator found by GridSearchCV to make predictions
        clf_lbp = grid_search.best_estimator_

        clf_lbp.fit(X_train_lbp, y_train_lbp)

        y_pred_lbp = clf_lbp.predict(X_test_lbp)

        # calculate the accuracy
        print('Accuracy for LBP features:', accuracy_score(y_test_lbp, y_pred_lbp))
        
        # calculate the precsion
        precision = precision_score(y_test_lbp, y_pred_lbp, average='weighted')
        print('Precision for LBP features:', precision)

        # Evaluate each classifier on the tesing set.
        # Compare the results.
        # Save the results to a CSV file.
        results = pd.DataFrame({'GLCM_accuracy': [accuracy_score(y_test_glcm, y_pred_glcm)], 'LBP_accuracy': [accuracy_score(y_test_lbp, y_pred_lbp)]})
        # Add the precision to the results
        results['GLCM_precision'] = precision_score(y_test_glcm, y_pred_glcm, average='weighted')
        results['LBP_precision'] = precision_score(y_test_lbp, y_pred_lbp, average='weighted')
        results.to_csv('results.csv', index=False)
        # save clf_glcm and clf_lbp as pickle files
        with open('clf_glcm.pkl', 'wb') as f:
            pickle.dump(clf_glcm, f)
        with open('clf_lbp.pkl', 'wb') as f:
            pickle.dump(clf_lbp, f)        


        #Use plots to visualize feature distributions and decision boundaries of the classifiers clf_glcm, clf_lbp
        import matplotlib.pyplot as plt
        import seaborn as sns

        sns.pairplot(train_glcm, hue='class')
        # save the plot to a file
        plt.savefig('train_glcm_distribution.png')
        plt.close()

        sns.pairplot(train_lbp, hue='class')
        # save the plot to a file
        plt.savefig('train_lbp_distribution.png')
        plt.close()

        # Use plots to visualize feature distributions and decision boundaries of the classifiers clf_glcm, clf_lbp using t-sne
        from sklearn.manifold import TSNE

        tsne = TSNE(n_components=2)
        X_train_glcm_tsne = tsne.fit_transform(X_train_glcm)
        X_train_lbp_tsne = tsne.fit_transform(X_train_lbp)

        plt.scatter(X_train_glcm_tsne[y_train_glcm == 'grass', 0], X_train_glcm_tsne[y_train_glcm == 'grass', 1], color='red', label='grass')
        plt.scatter(X_train_glcm_tsne[y_train_glcm == 'wood', 0], X_train_glcm_tsne[y_train_glcm == 'wood', 1], color='blue', label='wood')
        plt.legend()
        plt.title('GLCM features')
        # save the plot to a file
        plt.savefig('train_glcm_tsne.png')
        plt.close()

        plt.scatter(X_train_lbp_tsne[y_train_lbp == 'grass', 0], X_train_lbp_tsne[y_train_lbp == 'grass', 1], color='red', label='grass')
        plt.scatter(X_train_lbp_tsne[y_train_lbp == 'wood', 0], X_train_lbp_tsne[y_train_lbp == 'wood', 1], color='blue', label='wood')
        plt.legend()
        plt.title('LBP features')
        # save the plot to a file
        plt.savefig('train_lbp_tsne.png')
        plt.close()

        # plot t-sne it for the testing data
        tsne = TSNE(n_components=2, perplexity=5)
        X_test_glcm_tsne = tsne.fit_transform(X_test_glcm)
        X_test_lbp_tsne = tsne.fit_transform(X_test_lbp)

        plt.scatter(X_test_glcm_tsne[y_test_glcm == 'grass', 0], X_test_glcm_tsne[y_test_glcm == 'grass', 1], color='red', label='grass')
        plt.scatter(X_test_glcm_tsne[y_test_glcm == 'wood', 0], X_test_glcm_tsne[y_test_glcm == 'wood', 1], color='blue', label='wood')
        plt.legend()
        plt.title('GLCM features')
        plt.savefig('test_glcm_tsne.png')
        plt.close()

        plt.scatter(X_test_lbp_tsne[y_test_lbp == 'grass', 0], X_test_lbp_tsne[y_test_lbp == 'grass', 1], color='red', label='grass')
        plt.scatter(X_test_lbp_tsne[y_test_lbp == 'wood', 0], X_test_lbp_tsne[y_test_lbp == 'wood', 1], color='blue', label='wood')
        plt.legend()
        plt.title('LBP features')
        plt.savefig('test_lbp_tsne.png')
        plt.close()