Kalhar.Pandya commited on
Commit
600cada
·
1 Parent(s): 61bb710
__pycache__/feature_extractor.cpython-312.pyc ADDED
Binary file (6.93 kB). View file
 
app.py CHANGED
@@ -7,8 +7,8 @@ import gradio as gr
7
  # Import the feature extraction function from feature_extractor.py
8
  from feature_extractor import extract_features_from_image
9
 
10
- # Global variables for the classifier, class names, and training log
11
- classifier = None
12
  class_names = []
13
  training_log = ""
14
 
@@ -16,26 +16,34 @@ training_log = ""
16
  # Model Loading
17
  # ---------------------------------------------------------------------
18
  def load_model(model_filename):
19
- global classifier, class_names, training_log
20
  if os.path.exists(model_filename):
21
- print("Found existing SVM model. Loading...")
22
  with open(model_filename, "rb") as f:
23
  model_data = pickle.load(f)
24
- classifier = model_data['classifier']
25
  class_names = model_data['class_names']
26
  training_log += "Loaded model from disk.\n"
27
- print("Loaded SVM model from disk.")
28
  else:
29
  print(f"Model file {model_filename} not found. Please train the model first.")
30
 
31
- def classify_new_image(input_image_path):
 
 
 
32
  """
33
- Expects input_image_path as a file path. Loads the image,
34
- processes it, and returns the final prediction and probabilities.
 
35
  """
36
- global classifier, training_log, class_names
37
  progress_log = training_log + "\nStarting classification...\n"
38
 
 
 
 
 
39
  # Load image using OpenCV from file path
40
  image = cv2.imread(input_image_path)
41
  if image is None:
@@ -114,43 +122,30 @@ def classify_new_image(input_image_path):
114
  print(prob_dict)
115
  return final_prediction, prob_dict
116
 
117
-
118
- # Gradio Interface Setup using file paths
119
- if __name__ == "__main__":
120
- model_filename = "svm_model_color.pkl"
121
- load_model(model_filename)
122
-
123
- iface = gr.Interface(
124
- fn=classify_new_image,
125
- inputs=gr.Image(type="filepath"),
126
- outputs=[
127
- gr.Label(label="Predicted Class"),
128
- gr.Label(label="Probabilities")
129
- ],
130
- title="Stone, Wood, Brick Classifier",
131
- description=("Upload an image of stone, wood, or brick to classify it.\n\n"
132
- "The image is processed by subdividing it into patches and aggregating the predictions. "
133
- "Progress logs are printed to the terminal.")
134
- )
135
- iface.launch(share=True)
136
-
137
  # ---------------------------------------------------------------------
138
- # Gradio Interface Setup
139
  # ---------------------------------------------------------------------
140
  if __name__ == "__main__":
141
- model_filename = "svm_model2.pkl"
142
  load_model(model_filename)
143
 
 
 
 
 
144
  iface = gr.Interface(
145
  fn=classify_new_image,
146
- inputs=gr.Image(type="filepath"),
 
 
 
147
  outputs=[
148
  gr.Label(label="Predicted Class"),
149
  gr.Label(label="Probabilities")
150
  ],
151
  title="Stone, Wood, Brick Classifier",
152
- description=("Upload an image of stone, wood, or brick to classify it.\n\n"
153
  "The image is processed by subdividing it into patches and aggregating the predictions. "
154
  "Progress logs are printed to the terminal.")
155
  )
156
- iface.launch()
 
7
  # Import the feature extraction function from feature_extractor.py
8
  from feature_extractor import extract_features_from_image
9
 
10
+ # Global variables for the models, class names, and training log
11
+ models = {} # This will be a dictionary with keys: 'svm', 'rf', 'combined'
12
  class_names = []
13
  training_log = ""
14
 
 
16
  # Model Loading
17
  # ---------------------------------------------------------------------
18
  def load_model(model_filename):
19
+ global models, class_names, training_log
20
  if os.path.exists(model_filename):
21
+ print("Found existing model file. Loading...")
22
  with open(model_filename, "rb") as f:
23
  model_data = pickle.load(f)
24
+ models = model_data['models'] # Expecting a dict: {'svm': ..., 'rf': ..., 'combined': ...}
25
  class_names = model_data['class_names']
26
  training_log += "Loaded model from disk.\n"
27
+ print("Loaded models from disk.")
28
  else:
29
  print(f"Model file {model_filename} not found. Please train the model first.")
30
 
31
+ # ---------------------------------------------------------------------
32
+ # Gradio Classification Function with Model Selection
33
+ # ---------------------------------------------------------------------
34
+ def classify_new_image(input_image_path, model_choice):
35
  """
36
+ Expects input_image_path as a file path and model_choice as one of the keys in models.
37
+ Loads the image, processes it by extracting patches and computing predictions on each patch,
38
+ aggregates patch predictions, and returns the final predicted class and probabilities.
39
  """
40
+ global models, training_log, class_names
41
  progress_log = training_log + "\nStarting classification...\n"
42
 
43
+ if model_choice not in models:
44
+ raise ValueError(f"Model choice '{model_choice}' not found. Available choices: {list(models.keys())}")
45
+ classifier = models[model_choice]
46
+
47
  # Load image using OpenCV from file path
48
  image = cv2.imread(input_image_path)
49
  if image is None:
 
122
  print(prob_dict)
123
  return final_prediction, prob_dict
124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  # ---------------------------------------------------------------------
126
+ # Gradio Interface Setup using file paths and model selection
127
  # ---------------------------------------------------------------------
128
  if __name__ == "__main__":
129
+ model_filename = "./svm_rf_combined.pkl" # Adjust filename as needed
130
  load_model(model_filename)
131
 
132
+ # Create a dropdown for model selection.
133
+ # If models dict is not yet populated, we set default choices.
134
+ model_choices = list(models.keys()) if models else ['svm', 'rf', 'combined']
135
+
136
  iface = gr.Interface(
137
  fn=classify_new_image,
138
+ inputs=[
139
+ gr.Image(type="filepath", label="Input Image"),
140
+ gr.Dropdown(choices=model_choices, label="Select Model", value=model_choices[0])
141
+ ],
142
  outputs=[
143
  gr.Label(label="Predicted Class"),
144
  gr.Label(label="Probabilities")
145
  ],
146
  title="Stone, Wood, Brick Classifier",
147
+ description=("Upload an image and select a classifier model (svm, rf, combined) to classify it.\n\n"
148
  "The image is processed by subdividing it into patches and aggregating the predictions. "
149
  "Progress logs are printed to the terminal.")
150
  )
151
+ iface.launch(share=True)
svm_model_color.pkl → svm_rf_combined.pkl RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:421f593486e03e780e4376677331aa39bc65dc7d128152e19f9f6178ad9e4a69
3
- size 23294
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b503cc65a0f39fb172da810a87a81b2bdc62c578f5fed681a16b472116d7733
3
+ size 11463562
train.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import numpy as np
4
+ import pickle
5
+ import sys
6
+ import threading
7
+ import concurrent.futures
8
+
9
+ from sklearn.model_selection import train_test_split
10
+ from sklearn.svm import SVC
11
+ from sklearn.metrics import classification_report, confusion_matrix
12
+
13
+ # Import the feature extraction function (from feature_extractor.py)
14
+ from feature_extractor import extract_features_from_image
15
+
16
+ training_log = ""
17
+
18
+ # Helper function for parallel processing
19
+ def process_image(file_path, class_name):
20
+ """
21
+ Reads the image, extracts combined features, and returns (features, label).
22
+ Returns (None, None) if image reading fails.
23
+ """
24
+ image = cv2.imread(file_path, cv2.IMREAD_COLOR)
25
+ if image is None:
26
+ print(f"Warning: Could not read {file_path}")
27
+ return None, None
28
+
29
+ feats = extract_features_from_image(image)
30
+ return feats['combined_features'], class_name
31
+
32
+ # ---------------------------------------------------------------------
33
+ # 1. Data Loading with Parallel Feature Extraction
34
+ # ---------------------------------------------------------------------
35
+ def load_dataset(dataset_folder, max_workers=4):
36
+ """
37
+ Expects dataset_folder to contain subfolders (one per class).
38
+ Each subfolder has images of that class. This function:
39
+ - Reads each image (in parallel)
40
+ - Extracts a feature vector
41
+ - Returns arrays of feature vectors (X) and labels (y).
42
+ """
43
+ X = []
44
+ y = []
45
+ classes = [] # list of class names
46
+
47
+ print(f"Scanning dataset folder: {dataset_folder}")
48
+ for class_name in os.listdir(dataset_folder):
49
+ class_path = os.path.join(dataset_folder, class_name)
50
+ if not os.path.isdir(class_path):
51
+ continue
52
+ classes.append(class_name)
53
+
54
+ print(f"\nProcessing class: {class_name}")
55
+ image_files = [
56
+ f for f in os.listdir(class_path)
57
+ if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.tiff'))
58
+ ]
59
+ total_images = len(image_files)
60
+ image_count = 0
61
+
62
+ # Use ThreadPoolExecutor for parallel extraction
63
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
64
+ future_to_file = {}
65
+ for filename in image_files:
66
+ file_path = os.path.join(class_path, filename)
67
+ # Submit tasks
68
+ future = executor.submit(process_image, file_path, class_name)
69
+ future_to_file[future] = filename
70
+
71
+ for future in concurrent.futures.as_completed(future_to_file):
72
+ filename = future_to_file[future]
73
+ features, label = future.result()
74
+ if features is not None:
75
+ X.append(features)
76
+ y.append(label)
77
+ image_count += 1
78
+ print(
79
+ f"\rProcessed {image_count}/{total_images} images in '{class_name}'",
80
+ end="", flush=True
81
+ )
82
+
83
+ print(f"\nCompleted class: {class_name} with {image_count} images.")
84
+
85
+ X = np.array(X, dtype=np.float32)
86
+ y = np.array(y)
87
+ print(f"Finished loading dataset. Total classes: {len(classes)}. Total images: {len(X)}.")
88
+
89
+ return X, y, classes
90
+
91
+
92
+ from sklearn.svm import LinearSVC
93
+ from sklearn.ensemble import BaggingClassifier
94
+ from sklearn.metrics import classification_report, confusion_matrix
95
+ from sklearn.model_selection import train_test_split
96
+ from sklearn.preprocessing import StandardScaler
97
+ from sklearn.pipeline import make_pipeline
98
+
99
+ from sklearn.ensemble import VotingClassifier, BaggingClassifier, RandomForestClassifier
100
+ from sklearn.pipeline import make_pipeline
101
+ from sklearn.svm import LinearSVC
102
+ from sklearn.preprocessing import StandardScaler
103
+ from sklearn.model_selection import train_test_split
104
+ from sklearn.metrics import classification_report, confusion_matrix
105
+
106
+ def train_classifiers(X, y):
107
+ """
108
+ Splits data into training/test sets, trains:
109
+ - A Bagging ensemble of LinearSVC classifiers (with scaling)
110
+ - A RandomForestClassifier
111
+ - A VotingClassifier that combines both
112
+
113
+ Returns:
114
+ models (dict): A dictionary with keys 'svm', 'rf', 'combined'
115
+ test_data (tuple): (X_test, y_test)
116
+ """
117
+ # Split dataset: 80% train, 20% test
118
+ X_train, X_test, y_train, y_test = train_test_split(
119
+ X, y, test_size=0.2, random_state=31, shuffle=True
120
+ )
121
+
122
+ # --- Train SVM Ensemble ---
123
+ svm_pipeline = make_pipeline(StandardScaler(),
124
+ LinearSVC(random_state=31))
125
+
126
+ svm_ensemble = BaggingClassifier(
127
+ estimator=svm_pipeline,
128
+ n_estimators=10, # Adjust for speed/accuracy trade-off
129
+ n_jobs=-1,
130
+ verbose=1
131
+ )
132
+
133
+ print("Training SVM ensemble classifier...")
134
+ svm_ensemble.fit(X_train, y_train)
135
+
136
+ print("\nSVM Ensemble Classification Report:")
137
+ y_pred_svm = svm_ensemble.predict(X_test)
138
+ print(classification_report(y_test, y_pred_svm))
139
+ print("Confusion Matrix:")
140
+ print(confusion_matrix(y_test, y_pred_svm))
141
+
142
+ # --- Train RandomForest ---
143
+ rf = RandomForestClassifier(
144
+ n_estimators=100, # Adjust as needed
145
+ random_state=31,
146
+ n_jobs=-1
147
+ )
148
+
149
+ print("\nTraining RandomForest classifier...")
150
+ rf.fit(X_train, y_train)
151
+
152
+ print("\nRandomForest Classification Report:")
153
+ y_pred_rf = rf.predict(X_test)
154
+ print(classification_report(y_test, y_pred_rf))
155
+ print("Confusion Matrix:")
156
+ print(confusion_matrix(y_test, y_pred_rf))
157
+
158
+ # --- Train Combined Voting Classifier ---
159
+ combined_clf = VotingClassifier(
160
+ estimators=[('svm', svm_ensemble), ('rf', rf)],
161
+ voting='soft'
162
+ )
163
+
164
+ print("\nTraining Combined Voting classifier...")
165
+ combined_clf.fit(X_train, y_train)
166
+
167
+ print("\nCombined Voting Classifier Report:")
168
+ y_pred_combined = combined_clf.predict(X_test)
169
+ print(classification_report(y_test, y_pred_combined))
170
+ print("Confusion Matrix:")
171
+ print(confusion_matrix(y_test, y_pred_combined))
172
+
173
+ models = {
174
+ 'svm': svm_ensemble,
175
+ 'rf': rf,
176
+ 'combined': combined_clf
177
+ }
178
+
179
+ return models, (X_test, y_test)
180
+
181
+
182
+
183
+ # ---------------------------------------------------------------------
184
+ # 3. Training Thread
185
+ # ---------------------------------------------------------------------
186
+ def train_model_thread(dataset_folder, model_filename, max_workers=4):
187
+ global training_log
188
+
189
+ training_log += "Starting training...\n"
190
+ print("Starting training...")
191
+
192
+ # (A) Load Data (in parallel)
193
+ X, y, classes = load_dataset(dataset_folder, max_workers=max_workers)
194
+
195
+ # (B) Train Classifier and Print Metrics
196
+ models, _ = train_classifiers(X, y)
197
+ print("Training complete.")
198
+ training_log += "Training complete.\n"
199
+
200
+ # (C) Save the Model
201
+ model_data = {'models': models, 'class_names': classes}
202
+ with open(model_filename, "wb") as f:
203
+ pickle.dump(model_data, f)
204
+ training_log += f"Model saved to {model_filename}\n"
205
+ print(f"Model saved to {model_filename}")
206
+
207
+ # ---------------------------------------------------------------------
208
+ # Main
209
+ # ---------------------------------------------------------------------
210
+ if __name__ == "__main__":
211
+ dataset_folder = "./../images_dataset" # Adjust path as needed
212
+ model_filename = "svm_rf_combined.pkl"
213
+
214
+ # Launch training in a separate thread
215
+ # You can tune 'max_workers' to the number of desired threads.
216
+ max_workers = 32
217
+
218
+ training_thread = threading.Thread(
219
+ target=train_model_thread,
220
+ args=(dataset_folder, model_filename, max_workers)
221
+ )
222
+ training_thread.start()
223
+ # Wait until training finishes
224
+ training_thread.join()