sharvari0b26 commited on
Commit
fc858b4
·
verified ·
1 Parent(s): ce5acd6

Upload 3 files

Browse files
Files changed (3) hide show
  1. multiclass_model.pkl +2 -2
  2. script.py +13 -23
  3. utils.py +180 -0
multiclass_model.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d799eecd128c540ab311a7cb77db6ae088d9b8159a2a6d7f04238ea7859e4d6
3
- size 1178808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a97e0d9147fd9f3a5750bf863d4fc36eb3de0a60dd4b8952cb7daca408acdc6
3
+ size 665737
script.py CHANGED
@@ -3,39 +3,29 @@ import pickle
3
  import cv2
4
  import pandas as pd
5
  import numpy as np
6
- from utils.utils import extract_features_from_image, perform_pca, train_svm_model
7
 
8
 
9
- def run_inference(TEST_IMAGE_PATH, svm_model, k, SUBMISSION_CSV_SAVE_PATH):
10
-
11
- test_images = os.listdir(TEST_IMAGE_PATH)
12
- test_images.sort()
13
 
14
  image_feature_list = []
15
-
16
  for test_image in test_images:
17
-
18
  path_to_image = os.path.join(TEST_IMAGE_PATH, test_image)
19
-
20
  image = cv2.imread(path_to_image)
21
- image_features = extract_features_from_image(image)
22
-
23
- image_feature_list.append(image_features)
24
-
25
  features_multiclass = np.array(image_feature_list)
26
-
27
- features_multiclass_reduced = perform_pca(features_multiclass, k)
28
-
29
- multiclass_predictions = svm_model.predict(features_multiclass_reduced)
30
 
31
- df_predictions = pd.DataFrame(columns=["file_name", "category_id"])
 
 
 
 
 
32
 
33
- for i in range(len(test_images)):
34
- file_name = test_images[i]
35
- new_row = pd.DataFrame({"file_name": file_name,
36
- "category_id": multiclass_predictions[i]}, index=[0])
37
- df_predictions = pd.concat([df_predictions, new_row], ignore_index=True)
38
-
39
  df_predictions.to_csv(SUBMISSION_CSV_SAVE_PATH, index=False)
40
 
41
 
 
3
  import cv2
4
  import pandas as pd
5
  import numpy as np
6
+ from utils.utils import extract_features_from_image
7
 
8
 
9
+ def run_inference(TEST_IMAGE_PATH, pipeline_model, SUBMISSION_CSV_SAVE_PATH):
10
+ test_images = sorted(os.listdir(TEST_IMAGE_PATH))
 
 
11
 
12
  image_feature_list = []
13
+
14
  for test_image in test_images:
 
15
  path_to_image = os.path.join(TEST_IMAGE_PATH, test_image)
 
16
  image = cv2.imread(path_to_image)
17
+ features = extract_features_from_image(image)
18
+ image_feature_list.append(features)
19
+
 
20
  features_multiclass = np.array(image_feature_list)
 
 
 
 
21
 
22
+ multiclass_predictions = pipeline_model.predict(features_multiclass)
23
+
24
+ df_predictions = pd.DataFrame({
25
+ "file_name": test_images,
26
+ "category_id": multiclass_predictions
27
+ })
28
 
 
 
 
 
 
 
29
  df_predictions.to_csv(SUBMISSION_CSV_SAVE_PATH, index=False)
30
 
31
 
utils.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ from skimage.feature.texture import graycomatrix, graycoprops
4
+ from skimage.feature import local_binary_pattern ,hog
5
+ from sklearn.decomposition import PCA
6
+ from sklearn.svm import SVC
7
+ from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
8
+ from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
9
+ from sklearn.feature_selection import SelectKBest, f_classif
10
+ from sklearn.preprocessing import StandardScaler
11
+ from sklearn.pipeline import Pipeline
12
+
13
+
14
+ def rgb_histogram(image, bins=32):
15
+ features = []
16
+
17
+ # Convert to float32 for stability
18
+ image = image.astype(np.float32)
19
+
20
+ # RGB histograms
21
+ for i in range(3):
22
+ hist = cv2.calcHist([image], [i], None, [bins], [0, 256])
23
+ hist = cv2.normalize(hist, hist).flatten()
24
+ features.extend(hist)
25
+
26
+ # HSV histograms
27
+ hsv = cv2.cvtColor(image.astype(np.uint8), cv2.COLOR_RGB2HSV)
28
+ for i, (low, high) in enumerate(zip([0, 0, 0], [180, 256, 256])):
29
+ hist = cv2.calcHist([hsv], [i], None, [bins], [low, high])
30
+ hist = cv2.normalize(hist, hist).flatten()
31
+ features.extend(hist)
32
+
33
+ # Color moments (mean, std, skew)
34
+ for i in range(3):
35
+ channel = image[:, :, i]
36
+ mean = np.mean(channel)
37
+ std = np.std(channel)
38
+ skew = np.cbrt(np.mean((channel - mean) ** 3))
39
+ features.extend([mean, std, skew])
40
+
41
+ return np.array(features)
42
+
43
+
44
+ def hu_moments(image):
45
+ gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
46
+ moments = cv2.moments(gray)
47
+ hu = cv2.HuMoments(moments).flatten()
48
+ hu = -np.sign(hu) * np.log10(np.abs(hu) + 1e-10)
49
+ # Clip extreme values to reduce sensitivity to noise
50
+ hu = np.clip(hu, -10, 10)
51
+ return hu
52
+
53
+
54
+ def glcm_features(image, distances=[1, 2], angles=[0, np.pi/4, np.pi/2], levels=64):
55
+ gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
56
+ gray = (gray // (256 // levels)).astype(np.uint8) # quantization
57
+ features = []
58
+
59
+ for d in distances:
60
+ for a in angles:
61
+ glcm = graycomatrix(gray, distances=[d], angles=[a], levels=levels, symmetric=True, normed=True)
62
+ props = ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation']
63
+ for p in props:
64
+ val = graycoprops(glcm, p).flatten()
65
+ features.extend(val)
66
+
67
+ return np.array(features)
68
+
69
+
70
+ def local_binary_pattern_features(image, P=8, R=1):
71
+ gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
72
+ lbp = local_binary_pattern(gray, P, R, method='uniform')
73
+ hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, P + 3), range=(0, P + 2), density=True)
74
+ return hist
75
+
76
+
77
+ # Edge Density (Canny-based)
78
+ def edge_density(image, low_threshold=50, high_threshold=150):
79
+ gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
80
+ edges = cv2.Canny(gray, low_threshold, high_threshold)
81
+ density = np.sum(edges > 0) / edges.size
82
+ return np.array([density])
83
+
84
+
85
+ def hog_features(image, pixels_per_cell=(16,16), cells_per_block=(2,2), orientations=9):
86
+ image_resized = cv2.resize(image, (128, 128))
87
+ gray = cv2.cvtColor(image_resized, cv2.COLOR_RGB2GRAY)
88
+ hog_feat = hog(gray,
89
+ orientations=orientations,
90
+ pixels_per_cell=pixels_per_cell,
91
+ cells_per_block=cells_per_block,
92
+ block_norm='L2-Hys',
93
+ transform_sqrt=True,
94
+ feature_vector=True)
95
+ return hog_feat
96
+
97
+
98
+ def extract_features_from_image(image):
99
+ hist = rgb_histogram(image)
100
+ hu = hu_moments(image)
101
+ glcm = glcm_features(image)
102
+ lbp = local_binary_pattern_features(image)
103
+ edge = edge_density(image)
104
+ hog_f = hog_features(image)
105
+
106
+ return np.concatenate([hist, hu, glcm, lbp, edge, hog_f])
107
+
108
+ def perform_pca(data, num_components):
109
+ # Clean data
110
+ data = np.nan_to_num(data, nan=0.0, posinf=0.0, neginf=0.0)
111
+
112
+ # Standardize
113
+ scaler = StandardScaler()
114
+ data_standardized = scaler.fit_transform(data)
115
+
116
+ # Apply PCA
117
+ k = min(num_components, data.shape[1])
118
+ pca = PCA(n_components=k)
119
+ data_reduced = pca.fit_transform(data_standardized)
120
+
121
+ print(f"PCA: Reduced from {data.shape[1]} to {k} components")
122
+ print(f"Explained variance: {np.sum(pca.explained_variance_ratio_):.4f}")
123
+
124
+ return data_reduced
125
+
126
+ def train_svm_model(features, labels,
127
+ test_size=0.2,
128
+ random_state=42,
129
+ use_selectkbest=True,
130
+ k_best=500,
131
+ n_pca_components=100,
132
+ do_gridsearch=False):
133
+ """
134
+ Returns:
135
+ pipeline: trained sklearn Pipeline (scaler -> optional SelectKBest -> PCA -> SVC)
136
+ X_test, y_test, y_pred for quick evaluation
137
+ grid_search (if do_gridsearch True), else None
138
+ """
139
+ if labels.ndim > 1 and labels.shape[1] > 1:
140
+ labels = np.argmax(labels, axis=1)
141
+
142
+ # stratified split
143
+ X_train, X_test, y_train, y_test = train_test_split(
144
+ features, labels, test_size=test_size, random_state=random_state, stratify=labels)
145
+
146
+ # build pipeline steps
147
+ steps = []
148
+ steps.append(('scaler', StandardScaler()))
149
+ if use_selectkbest:
150
+ steps.append(('select', SelectKBest(score_func=f_classif, k=min(k_best, X_train.shape[1]))))
151
+ steps.append(('pca', PCA(n_components=min(n_pca_components, X_train.shape[1]))))
152
+ steps.append(('svc', SVC(kernel='linear', probability=True, class_weight='balanced', random_state=random_state)))
153
+ pipeline = Pipeline(steps)
154
+
155
+ grid_search = None
156
+ if do_gridsearch:
157
+ param_grid = {
158
+ 'select__k': [int(min(200, X_train.shape[1])), int(min(500, X_train.shape[1])), int(min(1000, X_train.shape[1]))] if use_selectkbest else [],
159
+ 'pca__n_components': [50, 100, 200],
160
+ 'svc__C': [0.1, 1, 5, 10]
161
+ }
162
+ # remove empty keys if use_selectkbest is False
163
+ param_grid = {k: v for k, v in param_grid.items() if v}
164
+ cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state)
165
+ grid_search = GridSearchCV(pipeline, param_grid, cv=cv, n_jobs=-1, scoring='accuracy', verbose=2)
166
+ grid_search.fit(X_train, y_train)
167
+ best_model = grid_search.best_estimator_
168
+ pipeline = best_model
169
+ else:
170
+ pipeline.fit(X_train, y_train)
171
+
172
+ # Evaluate
173
+ y_pred = pipeline.predict(X_test)
174
+ acc = accuracy_score(y_test, y_pred)
175
+ print(f"Test Accuracy: {acc:.4f}")
176
+ print(classification_report(y_test, y_pred))
177
+ print("Confusion matrix:\n", confusion_matrix(y_test, y_pred))
178
+
179
+ return pipeline, (X_test, y_test, y_pred), grid_search
180
+