Jawedg commited on
Commit
3439540
·
verified ·
1 Parent(s): 9a53dfc

Upload 8 files

Browse files
multiclass_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:370de33c15c7ab48760b081b281e9019c8f44ce10b5db38aa53f5771a61083f8
3
+ size 800800
script.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pickle
3
+ import cv2
4
+ import pandas as pd
5
+ import numpy as np
6
+ from utils.utils import extract_features_from_image, perform_pca, train_svm_model
7
+
8
+
9
+ def run_inference(TEST_IMAGE_PATH, svm_model, k, SUBMISSION_CSV_SAVE_PATH):
10
+
11
+ test_images = os.listdir(TEST_IMAGE_PATH)
12
+ test_images.sort()
13
+
14
+ image_feature_list = []
15
+
16
+ for test_image in test_images:
17
+
18
+ path_to_image = os.path.join(TEST_IMAGE_PATH, test_image)
19
+
20
+ image = cv2.imread(path_to_image)
21
+ image_features = extract_features_from_image(image)
22
+
23
+ image_feature_list.append(image_features)
24
+
25
+ features_multiclass = np.array(image_feature_list)
26
+ features_multiclass_reduced = perform_pca(features_multiclass, k)
27
+
28
+
29
+ multiclass_predictions = svm_model.predict(features_multiclass_reduced)
30
+
31
+ df_predictions = pd.DataFrame(columns=["file_name", "category_id"])
32
+
33
+ for i in range(len(test_images)):
34
+ file_name = test_images[i]
35
+ new_row = pd.DataFrame({"file_name": file_name,
36
+ "category_id": multiclass_predictions[i]}, index=[0])
37
+ df_predictions = pd.concat([df_predictions, new_row], ignore_index=True)
38
+
39
+ df_predictions.to_csv(SUBMISSION_CSV_SAVE_PATH, index=False)
40
+
41
+
42
+
43
+
44
+ if __name__ == "__main__":
45
+
46
+ current_directory = os.path.dirname(os.path.abspath(__file__))
47
+ TEST_IMAGE_PATH = "/tmp/data/test_images"
48
+ MODEL_NAME = "multiclass_model.pkl"
49
+ MODEL_PATH = os.path.join(current_directory, MODEL_NAME)
50
+
51
+ k = 100
52
+ SUBMISSION_CSV_SAVE_PATH = os.path.join(current_directory, "submission.csv")
53
+
54
+ # load the model
55
+ with open(MODEL_PATH, 'rb') as file:
56
+ svm_model = pickle.load(file)
57
+
58
+
59
+ run_inference(TEST_IMAGE_PATH, svm_model, k, SUBMISSION_CSV_SAVE_PATH)
utils/__init__.py ADDED
File without changes
utils/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (171 Bytes). View file
 
utils/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (171 Bytes). View file
 
utils/__pycache__/utils.cpython-312.pyc ADDED
Binary file (8.98 kB). View file
 
utils/__pycache__/utils.cpython-39.pyc ADDED
Binary file (4.09 kB). View file
 
utils/utils.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ from skimage.feature.texture import graycomatrix, graycoprops
4
+ from skimage.feature import local_binary_pattern
5
+ from sklearn.decomposition import PCA
6
+ from sklearn.svm import SVC
7
+ from sklearn.model_selection import train_test_split
8
+ from sklearn.metrics import accuracy_score
9
+ from sklearn.preprocessing import StandardScaler
10
+
11
+ def rgb_histogram(image, bins=256):
12
+ hist_features = []
13
+ for i in range(3): # RGB Channels
14
+ hist, _ = np.histogram(image[:, :, i], bins=bins, range=(0, 256), density=True)
15
+ hist_features.append(hist)
16
+ return np.concatenate(hist_features)
17
+
18
+ def hu_moments(image):
19
+ # Convert to grayscale if the image is in RGB format
20
+ gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
21
+ moments = cv2.moments(gray)
22
+ hu_moments = cv2.HuMoments(moments).flatten()
23
+ return hu_moments
24
+
25
+ def glcm_features(image, distances=[1], angles=[0], levels=256, symmetric=True, normed=True):
26
+ gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
27
+ glcm = graycomatrix(gray, distances=distances, angles=angles, levels=levels, symmetric=symmetric, normed=normed)
28
+ contrast = graycoprops(glcm, 'contrast').flatten()
29
+ dissimilarity = graycoprops(glcm, 'dissimilarity').flatten()
30
+ homogeneity = graycoprops(glcm, 'homogeneity').flatten()
31
+ energy = graycoprops(glcm, 'energy').flatten()
32
+ correlation = graycoprops(glcm, 'correlation').flatten()
33
+ asm = graycoprops(glcm, 'ASM').flatten()
34
+ return np.concatenate([contrast, dissimilarity, homogeneity, energy, correlation, asm])
35
+
36
+ def local_binary_pattern_features(image, P=8, R=1):
37
+ gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
38
+ lbp = local_binary_pattern(gray, P, R, method='uniform')
39
+ (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, P + 3), range=(0, P + 2), density=True)
40
+ return hist
41
+
42
+ # Function to compute Edge Detection Features
43
+ def edge_detection(image):
44
+ # Convert to grayscale
45
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
46
+
47
+ # Apply Canny edge detection
48
+ edges = cv2.Canny(gray, 100, 200)
49
+
50
+ # Calculate edge density (proportion of edge pixels)
51
+ edge_density = np.sum(edges) / edges.size
52
+ return np.array([edge_density])
53
+
54
+ # Function to compute Color Moments
55
+ def color_moments(image):
56
+ # Convert to HSV color space
57
+ hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
58
+
59
+ moments = []
60
+ for i in range(3): # H, S, V channels
61
+ channel = hsv[:, :, i]
62
+ mean = np.mean(channel)
63
+ var = np.var(channel)
64
+ skew = np.mean((channel - mean) ** 3) / (np.std(channel) ** 3) # Skewness
65
+ moments.extend([mean, var, skew])
66
+
67
+ return np.array(moments)
68
+
69
+ # Function to compute Fourier Transform Features
70
+ def fourier_transform(image):
71
+ # Convert to grayscale
72
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
73
+
74
+ # Apply Fourier Transform
75
+ f = np.fft.fft2(gray)
76
+ fshift = np.fft.fftshift(f) # Shift the zero frequency component to the center
77
+
78
+ # Get magnitude spectrum
79
+ magnitude_spectrum = np.abs(fshift)
80
+
81
+ # Calculate statistics (mean, variance, entropy)
82
+ mean_freq = np.mean(magnitude_spectrum)
83
+ var_freq = np.var(magnitude_spectrum)
84
+ entropy_freq = -np.sum(magnitude_spectrum * np.log(magnitude_spectrum + 1e-10)) # Entropy
85
+
86
+ return np.array([mean_freq, var_freq, entropy_freq])
87
+
88
+ def extract_features_from_image(image):
89
+ # Extrait les caractéristiques de l'image comme précédemment
90
+ hist_features = rgb_histogram(image)
91
+ hu_features = hu_moments(image)
92
+ glcm_features_vector = glcm_features(image)
93
+ lbp_features = local_binary_pattern_features(image)
94
+ edge_features = edge_detection(image)
95
+ color_moments_feats = color_moments(image)
96
+ fourier_features = fourier_transform(image)
97
+
98
+ # Combine toutes les caractéristiques dans un tableau
99
+ image_features = np.concatenate([hist_features, hu_features, glcm_features_vector, lbp_features])
100
+
101
+ return image_features
102
+
103
+
104
+ def perform_pca(data, num_components):
105
+ """
106
+ Perform Principal Component Analysis (PCA) on the input data.
107
+
108
+ Parameters:
109
+ - data (numpy.ndarray): The input data with shape (n_samples, n_features).
110
+ - num_components (int): The number of principal components to retain.
111
+
112
+ Returns:
113
+ - data_reduced (numpy.ndarray): The data transformed into the reduced PCA space.
114
+ - top_k_eigenvectors (numpy.ndarray): The top k eigenvectors.
115
+ - sorted_eigenvalues (numpy.ndarray): The sorted eigenvalues.
116
+ """
117
+
118
+ # Step 1: Standardize the Data
119
+ mean = np.mean(data, axis=0)
120
+ std_dev = np.std(data, axis=0)
121
+ data_standardized = (data - mean) / std_dev
122
+
123
+ # Step 2: Compute the Covariance Matrix
124
+ covariance_matrix = np.cov(data_standardized, rowvar=False)
125
+
126
+ # Step 3: Calculate Eigenvalues and Eigenvectors
127
+ eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)
128
+
129
+ # Step 4: Sort Eigenvalues and Eigenvectors in descending order
130
+ sorted_indices = np.argsort(eigenvalues)[::-1]
131
+ sorted_eigenvalues = eigenvalues[sorted_indices]
132
+ sorted_eigenvectors = eigenvectors[:, sorted_indices]
133
+
134
+ # Step 5: Select the top k Eigenvectors
135
+ top_k_eigenvectors = sorted_eigenvectors[:, :num_components]
136
+
137
+ # Step 6: Transform the Data using the top k eigenvectors
138
+ data_reduced = np.dot(data_standardized, top_k_eigenvectors)
139
+
140
+ # Return the real part of the data (in case of numerical imprecision)
141
+ data_reduced = np.real(data_reduced)
142
+
143
+ return data_reduced
144
+
145
+
146
+ def train_svm_model(features, labels, test_size=0.2):
147
+ """
148
+ Trains an SVM model and returns the trained model.
149
+
150
+ Parameters:
151
+ - features: Feature matrix of shape (B, F)
152
+ - labels: Label matrix of shape (B, C) if one-hot encoded, or (B,) for single labels
153
+ - test_size: Proportion of the data to use for testing (default is 0.2)
154
+
155
+ Returns:
156
+ - svm_model: Trained SVM model
157
+ """
158
+ # Check if labels are one-hot encoded, convert if needed
159
+ if labels.ndim > 1 and labels.shape[1] > 1:
160
+ labels = np.argmax(labels, axis=1) # Convert one-hot to single label per sample
161
+
162
+ # Split the data into training and testing sets
163
+ X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=test_size, random_state=42)
164
+
165
+ # Create an SVM classifier (you can modify kernel or C as needed)
166
+ svm_model = SVC(kernel='rbf', C=1.0)
167
+
168
+ # Train the model
169
+ svm_model.fit(X_train, y_train)
170
+
171
+ # Make predictions on the test set
172
+ y_pred = svm_model.predict(X_test)
173
+
174
+ # Evaluate and print accuracy
175
+ accuracy = accuracy_score(y_test, y_pred)
176
+ print(f'Test Accuracy: {accuracy:.2f}')
177
+
178
+ # Return the trained model
179
+ return svm_model