File size: 6,101 Bytes
c971a0f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
from PIL import Image
import numpy as np
from skimage.feature import local_binary_pattern, graycomatrix, graycoprops
from sklearn.svm import LinearSVC
import os
from sklearn.metrics import accuracy_score, precision_score, \
    classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
import joblib

IMAGE_SIZE_GLCM = 256
IMAGE_SIZE_LBP = 128

# LBP parameters
RADIUS = 1
N_POINTS = 8 * RADIUS
LBP_METHOD = "uniform"


def compute_glcm_histogram_pil(image, distances=[1], angles=[0], levels=8,

                               symmetric=True):

    # Convert the PIL image to a NumPy array
    image_np = np.array(image)

    # Quantize the grayscale image to the specified number of levels
    image_np = (image_np * (levels - 1) / 255).astype(np.uint8)

    # Compute the GLCM using skimage's graycomatrix function
    glcm = graycomatrix(image_np,
                        distances=distances,
                        angles=angles,
                        levels=levels,
                        symmetric=symmetric,
                        normed=True)

    # Extract GLCM properties
    homogeneity = graycoprops(glcm, 'homogeneity')[0, 0]
    correlation = graycoprops(glcm, 'correlation')[0, 0]

    # Create the feature vector
    feature_vector = np.array([homogeneity, correlation])

    return feature_vector


def image_resize(img, n):
    # Crop the image to a square by finding the minimum dimension
    min_dimension = min(img.size)
    left = (img.width - min_dimension) / 2
    top = (img.height - min_dimension) / 2
    right = (img.width + min_dimension) / 2
    bottom = (img.height + min_dimension) / 2
    img = img.crop((left, top, right, bottom))
    img = img.resize((n, n))
    return img


def get_lbp_hist(gray_image, n_points, radius, method):
    # Compute LBP for the image
    lbp = local_binary_pattern(gray_image, n_points, radius, method)

    # Compute LBP histogram
    lbp_hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3),
                               range=(0, n_points + 2))

    # Normalize the histogram
    lbp_hist = lbp_hist.astype("float")
    lbp_hist /= (lbp_hist.sum() + 1e-6)  # Normalized histogram
    return lbp_hist


def get_features(input_folder, class_label, method):
    data = []
    labels = []
    filenames = []
    image_files = [f for f in os.listdir(input_folder) if f.lower().endswith((
        '.png', '.jpg', '.jpeg', '.bmp', '.tiff'))]

    print(f"Total images found: {len(image_files)}")

    for _, file_name in enumerate(sorted(image_files)):
        img_path = os.path.join(input_folder, file_name)
        try:
            img = Image.open(img_path)
            img.verify()
            img = Image.open(img_path)
            img_gray = img.convert("L")

            if method == "GLCM":
                img_resized = image_resize(img_gray, IMAGE_SIZE_GLCM)
                hist = compute_glcm_histogram_pil(img_resized)
            elif method == "LBP":
                img_resized = image_resize(img_gray, IMAGE_SIZE_LBP)
                hist = get_lbp_hist(np.array(img_resized), N_POINTS, RADIUS,
                                    LBP_METHOD)

            data.append(hist)
            labels.append(class_label)
            filenames.append(file_name)  # Store the filenames

        except (FileNotFoundError, PermissionError) as file_err:
            print(f"File error with {file_name}: {file_err}")
        except Image.UnidentifiedImageError:
            print(f"Unidentified image file: {file_name}. Skipping this file.")
        except Exception as e:
            print(f"Unexpected error processing {file_name}: {e}")

    return data, labels, filenames


def main():

    # Set method
    method = "LBP"

    # Define paths
    grass_data, grass_labels, grass_filenames = get_features(
        "./raw_data/raw_grass_dataset", "Grass", method)
    wood_data, wood_labels, wood_filenames = get_features(
        "./raw_data/raw_wood_dataset", "Wood", method)
    data = grass_data + wood_data
    labels = grass_labels + wood_labels
    filenames = grass_filenames + wood_filenames  # Combine filenames

    # Train-test split
    X_train, X_test, y_train, y_test, train_filenames, test_filenames = \
        train_test_split(data, labels, filenames, test_size=0.3,
                         random_state=9, stratify=labels)

    # Train the model
    model = LinearSVC(C=100, loss="squared_hinge")
    model.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = model.predict(X_test)

    # Calculate accuracy and precision
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')

    # Print the results
    print(f"Accuracy: {accuracy:.2f}")
    print(f"Precision: {precision:.2f}")

    # Get a classification report for additional metrics
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))
    # print(f"Radius: {RADIUS}, N: {N_POINTS}")

    # Calculate the confusion matrix
    conf_matrix = confusion_matrix(y_test, y_pred)

    # Print the confusion matrix
    print("Confusion Matrix:")
    print(conf_matrix)

    # Create a heatmap for visualization
    plt.figure(figsize=(6, 4))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
                xticklabels=["Grass", "Wood"], yticklabels=["Grass", "Wood"])
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.show()

    # Identify misclassified images
    misclassified = [fname for i, fname in enumerate(test_filenames)
                     if y_test[i] != y_pred[i]]

    print("Misclassified Images:")
    for fname in misclassified:
        print(fname)

    # Save model parameters for deployment
    joblib.dump(model, method + '_model.joblib')


if __name__ == "__main__":
    main()