sharvari0b26 commited on
Commit
bfff813
·
1 Parent(s): 617684f

Add changes

Browse files
multiclass_model.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d799eecd128c540ab311a7cb77db6ae088d9b8159a2a6d7f04238ea7859e4d6
3
- size 1178808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a97e0d9147fd9f3a5750bf863d4fc36eb3de0a60dd4b8952cb7daca408acdc6
3
+ size 665737
phase_1a_sample_solution_multiclass.ipynb CHANGED
@@ -9,7 +9,7 @@
9
  },
10
  {
11
  "cell_type": "code",
12
- "execution_count": 23,
13
  "metadata": {},
14
  "outputs": [
15
  {
@@ -18,7 +18,7 @@
18
  "<module 'submission.utils.utils' from 'c:\\\\Users\\\\sharv\\\\Documents\\\\TUHH\\\\sem-3\\\\intelligent systems in medicine\\\\project\\\\baselines\\\\phase_1a\\\\submission\\\\utils\\\\utils.py'>"
19
  ]
20
  },
21
- "execution_count": 23,
22
  "metadata": {},
23
  "output_type": "execute_result"
24
  }
@@ -39,14 +39,14 @@
39
  },
40
  {
41
  "cell_type": "code",
42
- "execution_count": 24,
43
  "metadata": {},
44
  "outputs": [
45
  {
46
  "name": "stdout",
47
  "output_type": "stream",
48
  "text": [
49
- "Features shape: (2845, 2213)\n",
50
  "Labels shape: (2845,)\n",
51
  "[1 1 1 ... 1 2 1]\n"
52
  ]
@@ -96,40 +96,12 @@
96
  "cell_type": "markdown",
97
  "metadata": {},
98
  "source": [
99
- "## B.2. Use Prinicpal Component Anaylsis to reduce dimensionality"
100
  ]
101
  },
102
  {
103
  "cell_type": "code",
104
- "execution_count": null,
105
- "metadata": {},
106
- "outputs": [
107
- {
108
- "name": "stdout",
109
- "output_type": "stream",
110
- "text": [
111
- "PCA: Reduced from 433 to 100 components\n",
112
- "Explained variance: 0.9929\n"
113
- ]
114
- }
115
- ],
116
- "source": [
117
- "# k = 100\n",
118
- "# features_multiclass_reduced = utils.perform_pca(features_multiclass, k)\n",
119
- "\n",
120
- "# did not perform psc for training"
121
- ]
122
- },
123
- {
124
- "cell_type": "markdown",
125
- "metadata": {},
126
- "source": [
127
- "# C. Train Classification Model for Multiclass"
128
- ]
129
- },
130
- {
131
- "cell_type": "code",
132
- "execution_count": 25,
133
  "metadata": {},
134
  "outputs": [
135
  {
@@ -139,18 +111,18 @@
139
  "Test Accuracy: 0.9666\n",
140
  " precision recall f1-score support\n",
141
  "\n",
142
- " 0 0.98 0.95 0.96 167\n",
143
- " 1 0.95 0.98 0.97 253\n",
144
- " 2 0.99 0.96 0.97 149\n",
145
  "\n",
146
  " accuracy 0.97 569\n",
147
- " macro avg 0.97 0.96 0.97 569\n",
148
  "weighted avg 0.97 0.97 0.97 569\n",
149
  "\n",
150
  "Confusion matrix:\n",
151
  " [[158 9 0]\n",
152
- " [ 2 249 2]\n",
153
- " [ 1 5 143]]\n"
154
  ]
155
  }
156
  ],
@@ -176,12 +148,12 @@
176
  }
177
  ],
178
  "source": [
179
- "print(multiclass_model)\n"
180
  ]
181
  },
182
  {
183
  "cell_type": "code",
184
- "execution_count": 26,
185
  "metadata": {},
186
  "outputs": [],
187
  "source": [
@@ -193,13 +165,6 @@
193
  "with open(os.path.join(SAVE_PATH, \"multiclass_model.pkl\"), \"wb\") as f:\n",
194
  " pickle.dump(multiclass_model, f)\n"
195
  ]
196
- },
197
- {
198
- "cell_type": "code",
199
- "execution_count": null,
200
- "metadata": {},
201
- "outputs": [],
202
- "source": []
203
  }
204
  ],
205
  "metadata": {
 
9
  },
10
  {
11
  "cell_type": "code",
12
+ "execution_count": 39,
13
  "metadata": {},
14
  "outputs": [
15
  {
 
18
  "<module 'submission.utils.utils' from 'c:\\\\Users\\\\sharv\\\\Documents\\\\TUHH\\\\sem-3\\\\intelligent systems in medicine\\\\project\\\\baselines\\\\phase_1a\\\\submission\\\\utils\\\\utils.py'>"
19
  ]
20
  },
21
+ "execution_count": 39,
22
  "metadata": {},
23
  "output_type": "execute_result"
24
  }
 
39
  },
40
  {
41
  "cell_type": "code",
42
+ "execution_count": 40,
43
  "metadata": {},
44
  "outputs": [
45
  {
46
  "name": "stdout",
47
  "output_type": "stream",
48
  "text": [
49
+ "Features shape: (2845, 2013)\n",
50
  "Labels shape: (2845,)\n",
51
  "[1 1 1 ... 1 2 1]\n"
52
  ]
 
96
  "cell_type": "markdown",
97
  "metadata": {},
98
  "source": [
99
+ "# B. Train Classification Model for Multiclass"
100
  ]
101
  },
102
  {
103
  "cell_type": "code",
104
+ "execution_count": 41,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  "metadata": {},
106
  "outputs": [
107
  {
 
111
  "Test Accuracy: 0.9666\n",
112
  " precision recall f1-score support\n",
113
  "\n",
114
+ " 0 0.97 0.95 0.96 167\n",
115
+ " 1 0.95 0.98 0.96 253\n",
116
+ " 2 0.99 0.97 0.98 149\n",
117
  "\n",
118
  " accuracy 0.97 569\n",
119
+ " macro avg 0.97 0.97 0.97 569\n",
120
  "weighted avg 0.97 0.97 0.97 569\n",
121
  "\n",
122
  "Confusion matrix:\n",
123
  " [[158 9 0]\n",
124
+ " [ 5 247 1]\n",
125
+ " [ 0 4 145]]\n"
126
  ]
127
  }
128
  ],
 
148
  }
149
  ],
150
  "source": [
151
+ "print(multiclass_model)"
152
  ]
153
  },
154
  {
155
  "cell_type": "code",
156
+ "execution_count": 43,
157
  "metadata": {},
158
  "outputs": [],
159
  "source": [
 
165
  "with open(os.path.join(SAVE_PATH, \"multiclass_model.pkl\"), \"wb\") as f:\n",
166
  " pickle.dump(multiclass_model, f)\n"
167
  ]
 
 
 
 
 
 
 
168
  }
169
  ],
170
  "metadata": {
utils/utils.py CHANGED
@@ -10,26 +10,33 @@ from sklearn.feature_selection import SelectKBest, f_classif
10
  from sklearn.preprocessing import StandardScaler
11
  from sklearn.pipeline import Pipeline
12
 
 
13
  def rgb_histogram(image, bins=32):
14
  features = []
 
 
 
 
 
15
  for i in range(3):
16
  hist = cv2.calcHist([image], [i], None, [bins], [0, 256])
17
  hist = cv2.normalize(hist, hist).flatten()
18
  features.extend(hist)
19
-
20
- hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
21
- h_hist = cv2.calcHist([hsv], [0], None, [bins], [0, 180])
22
- s_hist = cv2.calcHist([hsv], [1], None, [bins], [0, 256])
23
- v_hist = cv2.calcHist([hsv], [2], None, [bins], [0, 256])
24
- for hist in (h_hist, s_hist, v_hist):
25
  hist = cv2.normalize(hist, hist).flatten()
26
  features.extend(hist)
27
-
 
28
  for i in range(3):
29
- channel = image[:, :, i].astype(np.float32)
30
- features.append(np.mean(channel))
31
- features.append(np.std(channel))
32
- features.append(np.median(channel))
 
33
 
34
  return np.array(features)
35
 
@@ -37,108 +44,66 @@ def rgb_histogram(image, bins=32):
37
  def hu_moments(image):
38
  gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
39
  moments = cv2.moments(gray)
40
- hu_moments = cv2.HuMoments(moments).flatten()
41
- hu_moments = -np.sign(hu_moments) * np.log10(np.abs(hu_moments) + 1e-10)
42
- return hu_moments
 
 
 
43
 
44
- def glcm_features_improved(image):
45
  gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
46
-
47
- gray = (gray // 4).astype(np.uint8)
48
-
49
  features = []
50
- for distance in [1, 3]:
51
- for angle in [0, np.pi/4, np.pi/2, 3*np.pi/4]:
52
- glcm = graycomatrix(gray, distances=[distance], angles=[angle],
53
- levels=64, symmetric=True, normed=True)
54
-
55
  props = ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation']
56
- for prop in props:
57
- feature_val = graycoprops(glcm, prop).flatten()
58
- features.extend(feature_val)
59
-
60
  return np.array(features)
61
 
 
62
  def local_binary_pattern_features(image, P=8, R=1):
63
  gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
64
  lbp = local_binary_pattern(gray, P, R, method='uniform')
65
- (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, P + 3), range=(0, P + 2), density=True)
66
  return hist
67
 
 
 
68
  def edge_density(image, low_threshold=50, high_threshold=150):
69
  gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
70
  edges = cv2.Canny(gray, low_threshold, high_threshold)
71
  density = np.sum(edges > 0) / edges.size
72
  return np.array([density])
73
 
74
- def hog_features(image, pixels_per_cell=(32, 32), cells_per_block=(1, 1), orientations=8):
 
75
  image_resized = cv2.resize(image, (128, 128))
76
  gray = cv2.cvtColor(image_resized, cv2.COLOR_RGB2GRAY)
77
-
78
- # More detailed HOG parameters
79
  hog_feat = hog(gray,
80
- orientations=9,
81
- pixels_per_cell=(16, 16),
82
- cells_per_block=(2, 2),
83
  block_norm='L2-Hys',
84
- feature_vector=True,
85
- transform_sqrt=True)
86
  return hog_feat
87
 
88
- def spatial_pyramid_features(image, levels=2):
89
- features = []
90
- h, w = image.shape[:2]
91
-
92
- for level in range(levels):
93
- num_rows = 2 ** level
94
- num_cols = 2 ** level
95
-
96
- for i in range(num_rows):
97
- for j in range(num_cols):
98
- row_start = int(i * h / num_rows)
99
- row_end = int((i + 1) * h / num_rows)
100
- col_start = int(j * w / num_cols)
101
- col_end = int((j + 1) * w / num_cols)
102
-
103
- patch = image[row_start:row_end, col_start:col_end]
104
- if patch.size > 0:
105
- patch_features = rgb_histogram(patch, bins=32)
106
- features.extend(patch_features)
107
-
108
- return np.array(features)
109
 
110
  def extract_features_from_image(image):
111
- """
112
- Select best features using correlation removal and ANOVA F-test
113
- """
114
- #1. RGB Histogram
115
- hist_features = rgb_histogram(image)
116
-
117
- # 2. Hu Moments
118
- hu_features = hu_moments(image)
119
-
120
- # 3. GLCM Features with multiple distances/angles
121
- glcm_features_vector = glcm_features_improved(image)
122
-
123
- # 4. Improved HOG
124
- hog_feat = hog_features(image)
125
-
126
- # 5. Spatial pyramid (level 1 only for efficiency)
127
- spatial_feat = spatial_pyramid_features(image, levels=1)
128
-
129
- # Remove less important features to reduce noise
130
- # Consider removing edge_density or LBP if they don't help
131
-
132
- # Concatenate selected features
133
- image_features = np.concatenate([
134
- hist_features,
135
- hu_features,
136
- glcm_features_vector,
137
- hog_feat,
138
- spatial_feat
139
- ])
140
-
141
- return image_features
142
 
143
  def perform_pca(data, num_components):
144
  # Clean data
 
10
  from sklearn.preprocessing import StandardScaler
11
  from sklearn.pipeline import Pipeline
12
 
13
+
14
  def rgb_histogram(image, bins=32):
15
  features = []
16
+
17
+ # Convert to float32 for stability
18
+ image = image.astype(np.float32)
19
+
20
+ # RGB histograms
21
  for i in range(3):
22
  hist = cv2.calcHist([image], [i], None, [bins], [0, 256])
23
  hist = cv2.normalize(hist, hist).flatten()
24
  features.extend(hist)
25
+
26
+ # HSV histograms
27
+ hsv = cv2.cvtColor(image.astype(np.uint8), cv2.COLOR_RGB2HSV)
28
+ for i, (low, high) in enumerate(zip([0, 0, 0], [180, 256, 256])):
29
+ hist = cv2.calcHist([hsv], [i], None, [bins], [low, high])
 
30
  hist = cv2.normalize(hist, hist).flatten()
31
  features.extend(hist)
32
+
33
+ # Color moments (mean, std, skew)
34
  for i in range(3):
35
+ channel = image[:, :, i]
36
+ mean = np.mean(channel)
37
+ std = np.std(channel)
38
+ skew = np.cbrt(np.mean((channel - mean) ** 3))
39
+ features.extend([mean, std, skew])
40
 
41
  return np.array(features)
42
 
 
44
  def hu_moments(image):
45
  gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
46
  moments = cv2.moments(gray)
47
+ hu = cv2.HuMoments(moments).flatten()
48
+ hu = -np.sign(hu) * np.log10(np.abs(hu) + 1e-10)
49
+ # Clip extreme values to reduce sensitivity to noise
50
+ hu = np.clip(hu, -10, 10)
51
+ return hu
52
+
53
 
54
+ def glcm_features(image, distances=[1, 2], angles=[0, np.pi/4, np.pi/2], levels=64):
55
  gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
56
+ gray = (gray // (256 // levels)).astype(np.uint8) # quantization
 
 
57
  features = []
58
+
59
+ for d in distances:
60
+ for a in angles:
61
+ glcm = graycomatrix(gray, distances=[d], angles=[a], levels=levels, symmetric=True, normed=True)
 
62
  props = ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation']
63
+ for p in props:
64
+ val = graycoprops(glcm, p).flatten()
65
+ features.extend(val)
66
+
67
  return np.array(features)
68
 
69
+
70
  def local_binary_pattern_features(image, P=8, R=1):
71
  gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
72
  lbp = local_binary_pattern(gray, P, R, method='uniform')
73
+ hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, P + 3), range=(0, P + 2), density=True)
74
  return hist
75
 
76
+
77
+ # Edge Density (Canny-based)
78
  def edge_density(image, low_threshold=50, high_threshold=150):
79
  gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
80
  edges = cv2.Canny(gray, low_threshold, high_threshold)
81
  density = np.sum(edges > 0) / edges.size
82
  return np.array([density])
83
 
84
+
85
+ def hog_features(image, pixels_per_cell=(16,16), cells_per_block=(2,2), orientations=9):
86
  image_resized = cv2.resize(image, (128, 128))
87
  gray = cv2.cvtColor(image_resized, cv2.COLOR_RGB2GRAY)
 
 
88
  hog_feat = hog(gray,
89
+ orientations=orientations,
90
+ pixels_per_cell=pixels_per_cell,
91
+ cells_per_block=cells_per_block,
92
  block_norm='L2-Hys',
93
+ transform_sqrt=True,
94
+ feature_vector=True)
95
  return hog_feat
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
  def extract_features_from_image(image):
99
+ hist = rgb_histogram(image)
100
+ hu = hu_moments(image)
101
+ glcm = glcm_features(image)
102
+ lbp = local_binary_pattern_features(image)
103
+ edge = edge_density(image)
104
+ hog_f = hog_features(image)
105
+
106
+ return np.concatenate([hist, hu, glcm, lbp, edge, hog_f])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  def perform_pca(data, num_components):
109
  # Clean data