Vinh.Vu commited on
Commit
88c1060
·
1 Parent(s): 686e5bb

Update the train_cnn

Browse files
Files changed (1) hide show
  1. 03-train_cnn.py +106 -53
03-train_cnn.py CHANGED
@@ -1,13 +1,56 @@
1
  import os
2
  import numpy as np
3
-
4
  # TensorFlow and tf.keras
5
  import tensorflow as tf
6
  print('TensorFlow version: ', tf.__version__)
7
 
8
- dataset_path = '.\\split_dataset\\'
9
 
10
- tmp_debug_path = '.\\tmp_debug'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  print('Creating Directory: ' + tmp_debug_path)
12
  os.makedirs(tmp_debug_path, exist_ok=True)
13
 
@@ -88,25 +131,26 @@ test_generator = test_datagen.flow_from_directory(
88
  shuffle = False
89
  )
90
 
91
- # Build model with frozen base for Phase 1
92
- efficient_net = EfficientNetB0(
93
- weights = 'imagenet',
94
- input_shape = (input_size, input_size, 3),
95
- include_top = False,
96
- pooling = None # We'll add our own pooling
97
- )
 
98
 
99
- # Freeze the base model for Phase 1
100
- efficient_net.trainable = False
101
 
102
- model = Sequential()
103
- model.add(efficient_net)
104
- model.add(GlobalAveragePooling2D())
105
- model.add(BatchNormalization())
106
- model.add(Dense(units = 256, activation = 'relu'))
107
- model.add(Dropout(0.5))
108
- model.add(Dense(units = 1, activation = 'sigmoid'))
109
- model.summary()
110
 
111
  checkpoint_filepath = '.\\tmp_checkpoint'
112
  print('Creating Directory: ' + checkpoint_filepath)
@@ -116,11 +160,13 @@ os.makedirs(checkpoint_filepath, exist_ok=True)
116
  # Phase 1: Train head only (base frozen), higher learning rate
117
  # ============================================================
118
  print('\n=== Phase 1: Training head (base frozen) ===')
119
- model.compile(
120
- optimizer = Adam(learning_rate=1e-3),
121
- loss='binary_crossentropy',
122
- metrics=['accuracy']
123
- )
 
 
124
 
125
  phase1_callbacks = [
126
  EarlyStopping(
@@ -146,27 +192,29 @@ phase1_callbacks = [
146
  )
147
  ]
148
 
149
- history_phase1 = model.fit(
150
- train_generator,
151
- epochs = 15,
152
- steps_per_epoch = len(train_generator),
153
- validation_data = val_generator,
154
- validation_steps = len(val_generator),
155
- class_weight = class_weight,
156
- callbacks = phase1_callbacks
157
- )
 
158
 
159
  # ============================================================
160
  # Phase 2: Unfreeze all layers, fine-tune with very low lr
161
  # ============================================================
162
  print('\n=== Phase 2: Fine-tuning entire model ===')
163
  efficient_net.trainable = True
164
-
165
- model.compile(
166
- optimizer = Adam(learning_rate=1e-5),
167
- loss='binary_crossentropy',
168
- metrics=['accuracy']
169
- )
 
170
 
171
  phase2_callbacks = [
172
  EarlyStopping(
@@ -192,30 +240,35 @@ phase2_callbacks = [
192
  )
193
  ]
194
 
195
- history_phase2 = model.fit(
196
- train_generator,
197
- epochs = 30,
198
- steps_per_epoch = len(train_generator),
199
- validation_data = val_generator,
200
- validation_steps = len(val_generator),
201
- class_weight = class_weight,
202
- callbacks = phase2_callbacks
203
- )
 
204
 
205
  # Load the best model from Phase 2
206
- best_model = load_model(os.path.join(checkpoint_filepath, 'best_model.keras'))
 
207
 
208
  # Also save a copy for the app
209
  best_model.save('best_model.keras')
210
 
211
  # Evaluate on test set
212
  print('\n=== Evaluation on Test Set ===')
 
213
  test_generator.reset()
214
- test_loss, test_accuracy = best_model.evaluate(test_generator, steps=len(test_generator), verbose=1)
 
215
 
216
  # Generate predictions
217
  test_generator.reset()
218
- preds = best_model.predict(test_generator, verbose=1)
 
219
  pred_labels = (preds.flatten() > 0.5).astype(int)
220
  true_labels = test_generator.classes
221
 
@@ -251,4 +304,4 @@ test_results = pd.DataFrame({
251
  "Predicted_Label": pred_labels,
252
  "True_Label": true_labels
253
  })
254
- print(test_results)
 
1
  import os
2
  import numpy as np
3
+ import pandas as pd
4
  # TensorFlow and tf.keras
5
  import tensorflow as tf
6
  print('TensorFlow version: ', tf.__version__)
7
 
 
8
 
9
+
10
+ def configure_training_device():
11
+ print('\n=== Device Check ===')
12
+ print('Built with CUDA:', tf.test.is_built_with_cuda())
13
+ print('Built with GPU support:', tf.test.is_built_with_gpu_support())
14
+ build_info = tf.sysconfig.get_build_info()
15
+ print('TensorFlow CUDA version:', build_info.get('cuda_version', 'unknown'))
16
+ print('TensorFlow cuDNN version:', build_info.get('cudnn_version', 'unknown'))
17
+
18
+ gpus = tf.config.list_physical_devices('GPU')
19
+ cpus = tf.config.list_physical_devices('CPU')
20
+
21
+ if gpus:
22
+ print(f'Physical GPUs detected: {len(gpus)}')
23
+ for index, gpu in enumerate(gpus):
24
+ print(f' GPU {index}: {gpu}')
25
+ try:
26
+ tf.config.experimental.set_memory_growth(gpu, True)
27
+ print(f' Memory growth enabled for GPU {index}')
28
+ except RuntimeError as exc:
29
+ print(f' Could not enable memory growth for GPU {index}: {exc}')
30
+
31
+ logical_gpus = tf.config.list_logical_devices('GPU')
32
+ print(f'Logical GPUs available: {len(logical_gpus)}')
33
+ for index, gpu in enumerate(logical_gpus):
34
+ print(f' Logical GPU {index}: {gpu}')
35
+ print(f'CPUs available: {len(cpus)}')
36
+ print('Training device selected: /GPU:0')
37
+ print('GPU training enabled: YES')
38
+ return '/GPU:0'
39
+
40
+ print('Physical GPUs detected: 0')
41
+ print('Logical GPUs available: 0')
42
+ print(f'CPUs available: {len(cpus)}')
43
+ print('Training device selected: /CPU:0')
44
+ print('GPU training enabled: NO')
45
+ print('WARNING: No NVIDIA GPU is visible to TensorFlow. Training will run on CPU.')
46
+ return '/CPU:0'
47
+
48
+
49
+ TRAINING_DEVICE = configure_training_device()
50
+
51
+ dataset_path = './split_dataset/'
52
+
53
+ tmp_debug_path = './tmp_debug'
54
  print('Creating Directory: ' + tmp_debug_path)
55
  os.makedirs(tmp_debug_path, exist_ok=True)
56
 
 
131
  shuffle = False
132
  )
133
 
134
+ with tf.device(TRAINING_DEVICE):
135
+ # Build model with frozen base for Phase 1
136
+ efficient_net = EfficientNetB0(
137
+ weights = 'imagenet',
138
+ input_shape = (input_size, input_size, 3),
139
+ include_top = False,
140
+ pooling = None # We'll add our own pooling
141
+ )
142
 
143
+ # Freeze the base model for Phase 1
144
+ efficient_net.trainable = False
145
 
146
+ model = Sequential()
147
+ model.add(efficient_net)
148
+ model.add(GlobalAveragePooling2D())
149
+ model.add(BatchNormalization())
150
+ model.add(Dense(units = 256, activation = 'relu'))
151
+ model.add(Dropout(0.5))
152
+ model.add(Dense(units = 1, activation = 'sigmoid'))
153
+ model.summary()
154
 
155
  checkpoint_filepath = '.\\tmp_checkpoint'
156
  print('Creating Directory: ' + checkpoint_filepath)
 
160
  # Phase 1: Train head only (base frozen), higher learning rate
161
  # ============================================================
162
  print('\n=== Phase 1: Training head (base frozen) ===')
163
+ print('Phase 1 device:', TRAINING_DEVICE)
164
+ with tf.device(TRAINING_DEVICE):
165
+ model.compile(
166
+ optimizer = Adam(learning_rate=1e-3),
167
+ loss='binary_crossentropy',
168
+ metrics=['accuracy']
169
+ )
170
 
171
  phase1_callbacks = [
172
  EarlyStopping(
 
192
  )
193
  ]
194
 
195
+ with tf.device(TRAINING_DEVICE):
196
+ history_phase1 = model.fit(
197
+ train_generator,
198
+ epochs = 15,
199
+ steps_per_epoch = len(train_generator),
200
+ validation_data = val_generator,
201
+ validation_steps = len(val_generator),
202
+ class_weight = class_weight,
203
+ callbacks = phase1_callbacks
204
+ )
205
 
206
  # ============================================================
207
  # Phase 2: Unfreeze all layers, fine-tune with very low lr
208
  # ============================================================
209
  print('\n=== Phase 2: Fine-tuning entire model ===')
210
  efficient_net.trainable = True
211
+ print('Phase 2 device:', TRAINING_DEVICE)
212
+ with tf.device(TRAINING_DEVICE):
213
+ model.compile(
214
+ optimizer = Adam(learning_rate=1e-5),
215
+ loss='binary_crossentropy',
216
+ metrics=['accuracy']
217
+ )
218
 
219
  phase2_callbacks = [
220
  EarlyStopping(
 
240
  )
241
  ]
242
 
243
+ with tf.device(TRAINING_DEVICE):
244
+ history_phase2 = model.fit(
245
+ train_generator,
246
+ epochs = 30,
247
+ steps_per_epoch = len(train_generator),
248
+ validation_data = val_generator,
249
+ validation_steps = len(val_generator),
250
+ class_weight = class_weight,
251
+ callbacks = phase2_callbacks
252
+ )
253
 
254
  # Load the best model from Phase 2
255
+ with tf.device(TRAINING_DEVICE):
256
+ best_model = load_model(os.path.join(checkpoint_filepath, 'best_model.keras'))
257
 
258
  # Also save a copy for the app
259
  best_model.save('best_model.keras')
260
 
261
  # Evaluate on test set
262
  print('\n=== Evaluation on Test Set ===')
263
+ print('Evaluation device:', TRAINING_DEVICE)
264
  test_generator.reset()
265
+ with tf.device(TRAINING_DEVICE):
266
+ test_loss, test_accuracy = best_model.evaluate(test_generator, steps=len(test_generator), verbose=1)
267
 
268
  # Generate predictions
269
  test_generator.reset()
270
+ with tf.device(TRAINING_DEVICE):
271
+ preds = best_model.predict(test_generator, verbose=1)
272
  pred_labels = (preds.flatten() > 0.5).astype(int)
273
  true_labels = test_generator.classes
274
 
 
304
  "Predicted_Label": pred_labels,
305
  "True_Label": true_labels
306
  })
307
+ print(test_results)