Jawedg commited on
Commit
be0426e
·
verified ·
1 Parent(s): 940ba45

Upload 8 files

Browse files
multiclass_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f251836551ff7f51f9d600a3fc2c7880c6d02c9d650bc962d1ef1c3290be702
3
+ size 5590092
script.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pickle
3
+ import cv2
4
+ import pandas as pd
5
+ import numpy as np
6
+ from utils.utils import extract_features_from_image, perform_pca, train_svm_model,standardize_features
7
+
8
+
9
+ def run_inference(TEST_IMAGE_PATH, svm_model, k, SUBMISSION_CSV_SAVE_PATH):
10
+
11
+ test_images = os.listdir(TEST_IMAGE_PATH)
12
+ test_images.sort()
13
+
14
+ image_feature_list = []
15
+
16
+ for test_image in test_images:
17
+
18
+ path_to_image = os.path.join(TEST_IMAGE_PATH, test_image)
19
+
20
+ image = cv2.imread(path_to_image)
21
+ image_features = extract_features_from_image(image)
22
+
23
+ image_feature_list.append(image_features)
24
+
25
+ features_multiclass = np.array(image_feature_list)
26
+ features_multiclass_standardized = standardize_features(features_multiclass)
27
+ features_multiclass_reduced = perform_pca(features_multiclass_standardized, k)
28
+
29
+
30
+ multiclass_predictions = svm_model.predict(features_multiclass_reduced)
31
+
32
+ df_predictions = pd.DataFrame(columns=["file_name", "category_id"])
33
+
34
+ for i in range(len(test_images)):
35
+ file_name = test_images[i]
36
+ new_row = pd.DataFrame({"file_name": file_name,
37
+ "category_id": multiclass_predictions[i]}, index=[0])
38
+ df_predictions = pd.concat([df_predictions, new_row], ignore_index=True)
39
+
40
+ df_predictions.to_csv(SUBMISSION_CSV_SAVE_PATH, index=False)
41
+
42
+
43
+
44
+
45
+ if __name__ == "__main__":
46
+
47
+ current_directory = os.path.dirname(os.path.abspath(__file__))
48
+ TEST_IMAGE_PATH = "/tmp/data/test_images"
49
+ MODEL_NAME = "multiclass_model.pkl"
50
+ MODEL_PATH = os.path.join(current_directory, MODEL_NAME)
51
+
52
+ k = 200
53
+ SUBMISSION_CSV_SAVE_PATH = os.path.join(current_directory, "submission.csv")
54
+
55
+ # load the model
56
+ with open(MODEL_PATH, 'rb') as file:
57
+ svm_model = pickle.load(file)
58
+
59
+
60
+ run_inference(TEST_IMAGE_PATH, svm_model, k, SUBMISSION_CSV_SAVE_PATH)
utils/__init__.py ADDED
File without changes
utils/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (171 Bytes). View file
 
utils/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (171 Bytes). View file
 
utils/__pycache__/utils.cpython-312.pyc ADDED
Binary file (8.98 kB). View file
 
utils/__pycache__/utils.cpython-39.pyc ADDED
Binary file (4.09 kB). View file
 
utils/utils.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ from skimage.feature.texture import graycomatrix, graycoprops
4
+ from skimage.feature import local_binary_pattern
5
+ from sklearn.decomposition import PCA
6
+ from sklearn.svm import SVC
7
+ from sklearn.model_selection import train_test_split
8
+ from sklearn.metrics import accuracy_score
9
+ from sklearn.preprocessing import StandardScaler
10
+
11
+ def rgb_histogram(image, bins=256):
12
+ hist_features = []
13
+ for i in range(3): # RGB Channels
14
+ hist, _ = np.histogram(image[:, :, i], bins=bins, range=(0, 256), density=True)
15
+ hist_features.append(hist)
16
+ return np.concatenate(hist_features)
17
+
18
+ def hu_moments(image):
19
+ # Convert to grayscale if the image is in RGB format
20
+ gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
21
+ moments = cv2.moments(gray)
22
+ hu_moments = cv2.HuMoments(moments).flatten()
23
+ return hu_moments
24
+
25
+ def glcm_features(image, distances=[1], angles=[0], levels=256, symmetric=True, normed=True):
26
+ gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
27
+ glcm = graycomatrix(gray, distances=distances, angles=angles, levels=levels, symmetric=symmetric, normed=normed)
28
+ contrast = graycoprops(glcm, 'contrast').flatten()
29
+ dissimilarity = graycoprops(glcm, 'dissimilarity').flatten()
30
+ homogeneity = graycoprops(glcm, 'homogeneity').flatten()
31
+ energy = graycoprops(glcm, 'energy').flatten()
32
+ correlation = graycoprops(glcm, 'correlation').flatten()
33
+ asm = graycoprops(glcm, 'ASM').flatten()
34
+ return np.concatenate([contrast, dissimilarity, homogeneity, energy, correlation, asm])
35
+
36
+ def local_binary_pattern_features(image, P=8, R=1):
37
+ gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
38
+ lbp = local_binary_pattern(gray, P, R, method='uniform')
39
+ (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, P + 3), range=(0, P + 2), density=True)
40
+ return hist
41
+
42
+ # Function to compute Edge Detection Features
43
+ def edge_detection(image):
44
+ # Convert to grayscale
45
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
46
+
47
+ # Apply Canny edge detection
48
+ edges = cv2.Canny(gray, 100, 200)
49
+
50
+ # Calculate edge density (proportion of edge pixels)
51
+ edge_density = np.sum(edges) / edges.size
52
+ return np.array([edge_density])
53
+
54
+ # Function to compute Color Moments
55
+ def color_moments(image):
56
+ # Convert to HSV color space
57
+ hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
58
+
59
+ moments = []
60
+ for i in range(3): # H, S, V channels
61
+ channel = hsv[:, :, i]
62
+ mean = np.mean(channel)
63
+ var = np.var(channel)
64
+ skew = np.mean((channel - mean) ** 3) / (np.std(channel) ** 3) # Skewness
65
+ moments.extend([mean, var, skew])
66
+
67
+ return np.array(moments)
68
+
69
+ # Function to compute Fourier Transform Features
70
+ def fourier_transform(image):
71
+ # Convert to grayscale
72
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
73
+
74
+ # Apply Fourier Transform
75
+ f = np.fft.fft2(gray)
76
+ fshift = np.fft.fftshift(f) # Shift the zero frequency component to the center
77
+
78
+ # Get magnitude spectrum
79
+ magnitude_spectrum = np.abs(fshift)
80
+
81
+ # Calculate statistics (mean, variance, entropy)
82
+ mean_freq = np.mean(magnitude_spectrum)
83
+ var_freq = np.var(magnitude_spectrum)
84
+ entropy_freq = -np.sum(magnitude_spectrum * np.log(magnitude_spectrum + 1e-10)) # Entropy
85
+
86
+ return np.array([mean_freq, var_freq, entropy_freq])
87
+
88
+ def extract_features_from_image(image):
89
+ # Extrait les caractéristiques de l'image comme précédemment
90
+ hist_features = rgb_histogram(image)
91
+ hu_features = hu_moments(image)
92
+ glcm_features_vector = glcm_features(image)
93
+ lbp_features = local_binary_pattern_features(image)
94
+ edge_features = edge_detection(image)
95
+ color_moments_feats = color_moments(image)
96
+ fourier_features = fourier_transform(image)
97
+
98
+ # Combine toutes les caractéristiques dans un tableau
99
+ image_features = np.concatenate([hist_features, hu_features, glcm_features_vector, lbp_features, edge_features, color_moments_feats, fourier_features])
100
+
101
+ return image_features
102
+
103
+ def standardize_features(features):
104
+ """
105
+ Standardize the features using StandardScaler.
106
+ """
107
+ scaler = StandardScaler()
108
+ return scaler.fit_transform(features)
109
+
110
+ def perform_pca(data, num_components):
111
+ """
112
+ Perform Principal Component Analysis (PCA) on the input data.
113
+
114
+ Parameters:
115
+ - data (numpy.ndarray): The input data with shape (n_samples, n_features).
116
+ - num_components (int): The number of principal components to retain.
117
+
118
+ Returns:
119
+ - data_reduced (numpy.ndarray): The data transformed into the reduced PCA space.
120
+ - top_k_eigenvectors (numpy.ndarray): The top k eigenvectors.
121
+ - sorted_eigenvalues (numpy.ndarray): The sorted eigenvalues.
122
+ """
123
+
124
+ # Step 1: Standardize the Data
125
+ mean = np.mean(data, axis=0)
126
+ std_dev = np.std(data, axis=0)
127
+ data_standardized = (data - mean) / std_dev
128
+
129
+ # Step 2: Compute the Covariance Matrix
130
+ covariance_matrix = np.cov(data_standardized, rowvar=False)
131
+
132
+ # Step 3: Calculate Eigenvalues and Eigenvectors
133
+ eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)
134
+
135
+ # Step 4: Sort Eigenvalues and Eigenvectors in descending order
136
+ sorted_indices = np.argsort(eigenvalues)[::-1]
137
+ sorted_eigenvalues = eigenvalues[sorted_indices]
138
+ sorted_eigenvectors = eigenvectors[:, sorted_indices]
139
+
140
+ # Step 5: Select the top k Eigenvectors
141
+ top_k_eigenvectors = sorted_eigenvectors[:, :num_components]
142
+
143
+ # Step 6: Transform the Data using the top k eigenvectors
144
+ data_reduced = np.dot(data_standardized, top_k_eigenvectors)
145
+
146
+ # Return the real part of the data (in case of numerical imprecision)
147
+ data_reduced = np.real(data_reduced)
148
+
149
+ return data_reduced
150
+
151
+
152
+ def train_svm_model(features, labels, test_size=0.2):
153
+ """
154
+ Trains an SVM model and returns the trained model.
155
+
156
+ Parameters:
157
+ - features: Feature matrix of shape (B, F)
158
+ - labels: Label matrix of shape (B, C) if one-hot encoded, or (B,) for single labels
159
+ - test_size: Proportion of the data to use for testing (default is 0.2)
160
+
161
+ Returns:
162
+ - svm_model: Trained SVM model
163
+ """
164
+ # Check if labels are one-hot encoded, convert if needed
165
+ if labels.ndim > 1 and labels.shape[1] > 1:
166
+ labels = np.argmax(labels, axis=1) # Convert one-hot to single label per sample
167
+
168
+ # Split the data into training and testing sets
169
+ X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=test_size, random_state=42)
170
+
171
+ # Create an SVM classifier (you can modify kernel or C as needed)
172
+ svm_model = SVC(kernel='rbf', C=1.0)
173
+
174
+ # Train the model
175
+ svm_model.fit(X_train, y_train)
176
+
177
+ # Make predictions on the test set
178
+ y_pred = svm_model.predict(X_test)
179
+
180
+ # Evaluate and print accuracy
181
+ accuracy = accuracy_score(y_test, y_pred)
182
+ print(f'Test Accuracy: {accuracy:.2f}')
183
+
184
+ # Return the trained model
185
+ return svm_model