Spaces:
Build error
Build error
File size: 6,101 Bytes
c971a0f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 |
from PIL import Image
import numpy as np
from skimage.feature import local_binary_pattern, graycomatrix, graycoprops
from sklearn.svm import LinearSVC
import os
from sklearn.metrics import accuracy_score, precision_score, \
classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
import joblib
IMAGE_SIZE_GLCM = 256
IMAGE_SIZE_LBP = 128
# LBP parameters
RADIUS = 1
N_POINTS = 8 * RADIUS
LBP_METHOD = "uniform"
def compute_glcm_histogram_pil(image, distances=[1], angles=[0], levels=8,
symmetric=True):
# Convert the PIL image to a NumPy array
image_np = np.array(image)
# Quantize the grayscale image to the specified number of levels
image_np = (image_np * (levels - 1) / 255).astype(np.uint8)
# Compute the GLCM using skimage's graycomatrix function
glcm = graycomatrix(image_np,
distances=distances,
angles=angles,
levels=levels,
symmetric=symmetric,
normed=True)
# Extract GLCM properties
homogeneity = graycoprops(glcm, 'homogeneity')[0, 0]
correlation = graycoprops(glcm, 'correlation')[0, 0]
# Create the feature vector
feature_vector = np.array([homogeneity, correlation])
return feature_vector
def image_resize(img, n):
# Crop the image to a square by finding the minimum dimension
min_dimension = min(img.size)
left = (img.width - min_dimension) / 2
top = (img.height - min_dimension) / 2
right = (img.width + min_dimension) / 2
bottom = (img.height + min_dimension) / 2
img = img.crop((left, top, right, bottom))
img = img.resize((n, n))
return img
def get_lbp_hist(gray_image, n_points, radius, method):
# Compute LBP for the image
lbp = local_binary_pattern(gray_image, n_points, radius, method)
# Compute LBP histogram
lbp_hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3),
range=(0, n_points + 2))
# Normalize the histogram
lbp_hist = lbp_hist.astype("float")
lbp_hist /= (lbp_hist.sum() + 1e-6) # Normalized histogram
return lbp_hist
def get_features(input_folder, class_label, method):
data = []
labels = []
filenames = []
image_files = [f for f in os.listdir(input_folder) if f.lower().endswith((
'.png', '.jpg', '.jpeg', '.bmp', '.tiff'))]
print(f"Total images found: {len(image_files)}")
for _, file_name in enumerate(sorted(image_files)):
img_path = os.path.join(input_folder, file_name)
try:
img = Image.open(img_path)
img.verify()
img = Image.open(img_path)
img_gray = img.convert("L")
if method == "GLCM":
img_resized = image_resize(img_gray, IMAGE_SIZE_GLCM)
hist = compute_glcm_histogram_pil(img_resized)
elif method == "LBP":
img_resized = image_resize(img_gray, IMAGE_SIZE_LBP)
hist = get_lbp_hist(np.array(img_resized), N_POINTS, RADIUS,
LBP_METHOD)
data.append(hist)
labels.append(class_label)
filenames.append(file_name) # Store the filenames
except (FileNotFoundError, PermissionError) as file_err:
print(f"File error with {file_name}: {file_err}")
except Image.UnidentifiedImageError:
print(f"Unidentified image file: {file_name}. Skipping this file.")
except Exception as e:
print(f"Unexpected error processing {file_name}: {e}")
return data, labels, filenames
def main():
# Set method
method = "LBP"
# Define paths
grass_data, grass_labels, grass_filenames = get_features(
"./raw_data/raw_grass_dataset", "Grass", method)
wood_data, wood_labels, wood_filenames = get_features(
"./raw_data/raw_wood_dataset", "Wood", method)
data = grass_data + wood_data
labels = grass_labels + wood_labels
filenames = grass_filenames + wood_filenames # Combine filenames
# Train-test split
X_train, X_test, y_train, y_test, train_filenames, test_filenames = \
train_test_split(data, labels, filenames, test_size=0.3,
random_state=9, stratify=labels)
# Train the model
model = LinearSVC(C=100, loss="squared_hinge")
model.fit(X_train, y_train)
# Make predictions on the test set
y_pred = model.predict(X_test)
# Calculate accuracy and precision
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
# Print the results
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
# Get a classification report for additional metrics
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
# print(f"Radius: {RADIUS}, N: {N_POINTS}")
# Calculate the confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
# Print the confusion matrix
print("Confusion Matrix:")
print(conf_matrix)
# Create a heatmap for visualization
plt.figure(figsize=(6, 4))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
xticklabels=["Grass", "Wood"], yticklabels=["Grass", "Wood"])
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()
# Identify misclassified images
misclassified = [fname for i, fname in enumerate(test_filenames)
if y_test[i] != y_pred[i]]
print("Misclassified Images:")
for fname in misclassified:
print(fname)
# Save model parameters for deployment
joblib.dump(model, method + '_model.joblib')
if __name__ == "__main__":
main()
|