from PIL import Image import numpy as np from skimage.feature import local_binary_pattern, graycomatrix, graycoprops from sklearn.svm import LinearSVC import os from sklearn.metrics import accuracy_score, precision_score, \ classification_report, confusion_matrix import matplotlib.pyplot as plt import seaborn as sns from sklearn.model_selection import train_test_split import joblib IMAGE_SIZE_GLCM = 256 IMAGE_SIZE_LBP = 128 # LBP parameters RADIUS = 1 N_POINTS = 8 * RADIUS LBP_METHOD = "uniform" def compute_glcm_histogram_pil(image, distances=[1], angles=[0], levels=8, symmetric=True): # Convert the PIL image to a NumPy array image_np = np.array(image) # Quantize the grayscale image to the specified number of levels image_np = (image_np * (levels - 1) / 255).astype(np.uint8) # Compute the GLCM using skimage's graycomatrix function glcm = graycomatrix(image_np, distances=distances, angles=angles, levels=levels, symmetric=symmetric, normed=True) # Extract GLCM properties homogeneity = graycoprops(glcm, 'homogeneity')[0, 0] correlation = graycoprops(glcm, 'correlation')[0, 0] # Create the feature vector feature_vector = np.array([homogeneity, correlation]) return feature_vector def image_resize(img, n): # Crop the image to a square by finding the minimum dimension min_dimension = min(img.size) left = (img.width - min_dimension) / 2 top = (img.height - min_dimension) / 2 right = (img.width + min_dimension) / 2 bottom = (img.height + min_dimension) / 2 img = img.crop((left, top, right, bottom)) img = img.resize((n, n)) return img def get_lbp_hist(gray_image, n_points, radius, method): # Compute LBP for the image lbp = local_binary_pattern(gray_image, n_points, radius, method) # Compute LBP histogram lbp_hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), range=(0, n_points + 2)) # Normalize the histogram lbp_hist = lbp_hist.astype("float") lbp_hist /= (lbp_hist.sum() + 1e-6) # Normalized histogram return lbp_hist def get_features(input_folder, class_label, method): data = [] labels = [] filenames = [] image_files = [f for f in os.listdir(input_folder) if f.lower().endswith(( '.png', '.jpg', '.jpeg', '.bmp', '.tiff'))] print(f"Total images found: {len(image_files)}") for _, file_name in enumerate(sorted(image_files)): img_path = os.path.join(input_folder, file_name) try: img = Image.open(img_path) img.verify() img = Image.open(img_path) img_gray = img.convert("L") if method == "GLCM": img_resized = image_resize(img_gray, IMAGE_SIZE_GLCM) hist = compute_glcm_histogram_pil(img_resized) elif method == "LBP": img_resized = image_resize(img_gray, IMAGE_SIZE_LBP) hist = get_lbp_hist(np.array(img_resized), N_POINTS, RADIUS, LBP_METHOD) data.append(hist) labels.append(class_label) filenames.append(file_name) # Store the filenames except (FileNotFoundError, PermissionError) as file_err: print(f"File error with {file_name}: {file_err}") except Image.UnidentifiedImageError: print(f"Unidentified image file: {file_name}. Skipping this file.") except Exception as e: print(f"Unexpected error processing {file_name}: {e}") return data, labels, filenames def main(): # Set method method = "LBP" # Define paths grass_data, grass_labels, grass_filenames = get_features( "./raw_data/raw_grass_dataset", "Grass", method) wood_data, wood_labels, wood_filenames = get_features( "./raw_data/raw_wood_dataset", "Wood", method) data = grass_data + wood_data labels = grass_labels + wood_labels filenames = grass_filenames + wood_filenames # Combine filenames # Train-test split X_train, X_test, y_train, y_test, train_filenames, test_filenames = \ train_test_split(data, labels, filenames, test_size=0.3, random_state=9, stratify=labels) # Train the model model = LinearSVC(C=100, loss="squared_hinge") model.fit(X_train, y_train) # Make predictions on the test set y_pred = model.predict(X_test) # Calculate accuracy and precision accuracy = accuracy_score(y_test, y_pred) precision = precision_score(y_test, y_pred, average='macro') # Print the results print(f"Accuracy: {accuracy:.2f}") print(f"Precision: {precision:.2f}") # Get a classification report for additional metrics print("\nClassification Report:") print(classification_report(y_test, y_pred)) # print(f"Radius: {RADIUS}, N: {N_POINTS}") # Calculate the confusion matrix conf_matrix = confusion_matrix(y_test, y_pred) # Print the confusion matrix print("Confusion Matrix:") print(conf_matrix) # Create a heatmap for visualization plt.figure(figsize=(6, 4)) sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=["Grass", "Wood"], yticklabels=["Grass", "Wood"]) plt.xlabel('Predicted') plt.ylabel('True') plt.title('Confusion Matrix') plt.show() # Identify misclassified images misclassified = [fname for i, fname in enumerate(test_filenames) if y_test[i] != y_pred[i]] print("Misclassified Images:") for fname in misclassified: print(fname) # Save model parameters for deployment joblib.dump(model, method + '_model.joblib') if __name__ == "__main__": main()