Vinh Vu commited on
Commit
a879ae6
·
1 Parent(s): b06ef27

Update train cnn to improve accuracy

Browse files
01-crop_faces_with_mtcnn.py CHANGED
@@ -1,8 +1,7 @@
1
  import cv2
2
  from mtcnn import MTCNN
3
  import csv
4
- import sys, os.path
5
- from keras import backend as K
6
  import tensorflow as tf
7
  print(tf.__version__)
8
  tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
 
1
  import cv2
2
  from mtcnn import MTCNN
3
  import csv
4
+ import os
 
5
  import tensorflow as tf
6
  print(tf.__version__)
7
  tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
02-prepare_fake_real_dataset.py CHANGED
@@ -1,7 +1,6 @@
1
  import csv
2
  import os
3
  import shutil
4
- import numpy as np
5
  import splitfolders as split_folders
6
  from PIL import Image
7
 
 
1
  import csv
2
  import os
3
  import shutil
 
4
  import splitfolders as split_folders
5
  from PIL import Image
6
 
03-train_cnn.py CHANGED
@@ -1,36 +1,18 @@
1
- import json
2
  import os
3
- from distutils.dir_util import copy_tree
4
- import shutil
5
  import pandas as pd
 
6
 
7
  # TensorFlow and tf.keras
8
  import tensorflow as tf
9
- from tensorflow.keras import backend as K
10
  print('TensorFlow version: ', tf.__version__)
11
 
12
- # Set to force CPU
13
- #os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
14
- #if tf.test.gpu_device_name():
15
- # print('GPU found')
16
- #else:
17
- # print("No GPU found")
18
-
19
  dataset_path = '.\\split_dataset\\'
20
 
21
  tmp_debug_path = '.\\tmp_debug'
22
  print('Creating Directory: ' + tmp_debug_path)
23
  os.makedirs(tmp_debug_path, exist_ok=True)
24
 
25
- def get_filename_only(file_path):
26
- file_basename = os.path.basename(file_path)
27
- filename_only = file_basename.split('.')[0]
28
- return filename_only
29
-
30
- import numpy as np
31
- from sklearn.utils.class_weight import compute_class_weight
32
  from tensorflow.keras.preprocessing.image import ImageDataGenerator
33
- from tensorflow.keras import applications
34
  from tensorflow.keras.applications import EfficientNetB0
35
  from tensorflow.keras.applications.efficientnet import preprocess_input
36
  from tensorflow.keras.models import Sequential, load_model
@@ -44,16 +26,12 @@ train_path = os.path.join(dataset_path, 'train')
44
  val_path = os.path.join(dataset_path, 'val')
45
  test_path = os.path.join(dataset_path, 'test')
46
 
 
47
  train_datagen = ImageDataGenerator(
48
  preprocessing_function = preprocess_input,
49
- rotation_range = 15,
50
- width_shift_range = 0.15,
51
- height_shift_range = 0.15,
52
- shear_range = 0.2,
53
- zoom_range = 0.15,
54
  horizontal_flip = True,
55
- brightness_range = [0.8, 1.2],
56
- channel_shift_range = 30,
57
  fill_mode = 'nearest'
58
  )
59
 
@@ -66,11 +44,7 @@ train_generator = train_datagen.flow_from_directory(
66
  shuffle = True
67
  )
68
 
69
- # Compute class weights to handle imbalance
70
- class_weights = compute_class_weight('balanced', classes=np.unique(train_generator.classes), y=train_generator.classes)
71
- class_weight_dict = dict(enumerate(class_weights))
72
  print(f'Class mapping: {train_generator.class_indices}')
73
- print(f'Class weights: {class_weight_dict}')
74
  print(f'Train samples - fake: {np.sum(train_generator.classes == 0)}, real: {np.sum(train_generator.classes == 1)}')
75
 
76
  val_datagen = ImageDataGenerator(
@@ -100,110 +74,61 @@ test_generator = test_datagen.flow_from_directory(
100
  shuffle = False
101
  )
102
 
103
- # --- Phase 1: Train with frozen base ---
104
  efficient_net = EfficientNetB0(
105
  weights = 'imagenet',
106
  input_shape = (input_size, input_size, 3),
107
  include_top = False,
108
  pooling = 'max'
109
  )
110
- efficient_net.trainable = False # freeze base initially
111
 
112
  model = Sequential()
113
  model.add(efficient_net)
114
  model.add(Dense(units = 512, activation = 'relu'))
115
  model.add(Dropout(0.5))
116
  model.add(Dense(units = 128, activation = 'relu'))
117
- model.add(Dropout(0.3))
118
  model.add(Dense(units = 1, activation = 'sigmoid'))
119
  model.summary()
120
 
121
- model.compile(optimizer = Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])
122
 
123
  checkpoint_filepath = '.\\tmp_checkpoint'
124
  print('Creating Directory: ' + checkpoint_filepath)
125
  os.makedirs(checkpoint_filepath, exist_ok=True)
126
 
127
- custom_callbacks = [
128
  EarlyStopping(
129
- monitor = 'val_accuracy',
130
- mode = 'max',
131
  patience = 5,
132
  verbose = 1,
133
  restore_best_weights = True
134
  ),
135
  ModelCheckpoint(
136
  filepath = os.path.join(checkpoint_filepath, 'best_model.keras'),
137
- monitor = 'val_accuracy',
138
- mode = 'max',
139
  verbose = 1,
140
  save_best_only = True
141
  ),
142
  ReduceLROnPlateau(
143
- monitor = 'val_accuracy',
144
  factor = 0.5,
145
  patience = 3,
146
  min_lr = 1e-7,
147
- verbose = 1,
148
- mode = 'max'
149
  )
150
  ]
151
 
152
- print('\n=== Phase 1: Training with frozen base ===')
153
- num_epochs = 15
154
  history = model.fit(
155
  train_generator,
156
  epochs = num_epochs,
157
  steps_per_epoch = len(train_generator),
158
  validation_data = val_generator,
159
  validation_steps = len(val_generator),
160
- callbacks = custom_callbacks,
161
- class_weight = class_weight_dict
162
- )
163
-
164
- # --- Phase 2: Fine-tune top layers of base model ---
165
- print('\n=== Phase 2: Fine-tuning top layers ===')
166
- efficient_net.trainable = True
167
- # Freeze all layers except the last 30
168
- for layer in efficient_net.layers[:-30]:
169
- layer.trainable = False
170
-
171
- model.compile(optimizer = Adam(learning_rate=1e-5), loss='binary_crossentropy', metrics=['accuracy'])
172
-
173
- fine_tune_callbacks = [
174
- EarlyStopping(
175
- monitor = 'val_accuracy',
176
- mode = 'max',
177
- patience = 5,
178
- verbose = 1,
179
- restore_best_weights = True
180
- ),
181
- ModelCheckpoint(
182
- filepath = os.path.join(checkpoint_filepath, 'best_model.keras'),
183
- monitor = 'val_accuracy',
184
- mode = 'max',
185
- verbose = 1,
186
- save_best_only = True
187
- ),
188
- ReduceLROnPlateau(
189
- monitor = 'val_accuracy',
190
- factor = 0.5,
191
- patience = 3,
192
- min_lr = 1e-8,
193
- verbose = 1,
194
- mode = 'max'
195
- )
196
- ]
197
-
198
- fine_tune_epochs = 30
199
- history_fine = model.fit(
200
- train_generator,
201
- epochs = fine_tune_epochs,
202
- steps_per_epoch = len(train_generator),
203
- validation_data = val_generator,
204
- validation_steps = len(val_generator),
205
- callbacks = fine_tune_callbacks,
206
- class_weight = class_weight_dict
207
  )
208
 
209
  # Load the best model
@@ -213,8 +138,6 @@ best_model = load_model(os.path.join(checkpoint_filepath, 'best_model.keras'))
213
  print('\n=== Evaluation on Test Set ===')
214
  test_generator.reset()
215
  test_loss, test_accuracy = best_model.evaluate(test_generator, steps=len(test_generator), verbose=1)
216
- print(f'Test Loss: {test_loss:.4f}')
217
- print(f'Test Accuracy: {test_accuracy:.4f}')
218
 
219
  # Generate predictions
220
  test_generator.reset()
@@ -222,11 +145,31 @@ preds = best_model.predict(test_generator, verbose=1)
222
  pred_labels = (preds.flatten() > 0.5).astype(int)
223
  true_labels = test_generator.classes
224
 
225
- from sklearn.metrics import classification_report, confusion_matrix
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  print('\nClassification Report:')
227
  print(classification_report(true_labels, pred_labels, target_names=['fake', 'real']))
228
  print('Confusion Matrix:')
229
- print(confusion_matrix(true_labels, pred_labels))
230
 
231
  test_results = pd.DataFrame({
232
  "Filename": test_generator.filenames,
 
 
1
  import os
 
 
2
  import pandas as pd
3
+ import numpy as np
4
 
5
  # TensorFlow and tf.keras
6
  import tensorflow as tf
 
7
  print('TensorFlow version: ', tf.__version__)
8
 
 
 
 
 
 
 
 
9
  dataset_path = '.\\split_dataset\\'
10
 
11
  tmp_debug_path = '.\\tmp_debug'
12
  print('Creating Directory: ' + tmp_debug_path)
13
  os.makedirs(tmp_debug_path, exist_ok=True)
14
 
 
 
 
 
 
 
 
15
  from tensorflow.keras.preprocessing.image import ImageDataGenerator
 
16
  from tensorflow.keras.applications import EfficientNetB0
17
  from tensorflow.keras.applications.efficientnet import preprocess_input
18
  from tensorflow.keras.models import Sequential, load_model
 
26
  val_path = os.path.join(dataset_path, 'val')
27
  test_path = os.path.join(dataset_path, 'test')
28
 
29
+ # preprocess_input scales pixels to [-1, 1] which EfficientNet expects
30
  train_datagen = ImageDataGenerator(
31
  preprocessing_function = preprocess_input,
32
+ rotation_range = 10,
 
 
 
 
33
  horizontal_flip = True,
34
+ zoom_range = 0.1,
 
35
  fill_mode = 'nearest'
36
  )
37
 
 
44
  shuffle = True
45
  )
46
 
 
 
 
47
  print(f'Class mapping: {train_generator.class_indices}')
 
48
  print(f'Train samples - fake: {np.sum(train_generator.classes == 0)}, real: {np.sum(train_generator.classes == 1)}')
49
 
50
  val_datagen = ImageDataGenerator(
 
74
  shuffle = False
75
  )
76
 
77
+ # Build model - entire EfficientNetB0 is trainable
78
  efficient_net = EfficientNetB0(
79
  weights = 'imagenet',
80
  input_shape = (input_size, input_size, 3),
81
  include_top = False,
82
  pooling = 'max'
83
  )
 
84
 
85
  model = Sequential()
86
  model.add(efficient_net)
87
  model.add(Dense(units = 512, activation = 'relu'))
88
  model.add(Dropout(0.5))
89
  model.add(Dense(units = 128, activation = 'relu'))
 
90
  model.add(Dense(units = 1, activation = 'sigmoid'))
91
  model.summary()
92
 
93
+ model.compile(optimizer = Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])
94
 
95
  checkpoint_filepath = '.\\tmp_checkpoint'
96
  print('Creating Directory: ' + checkpoint_filepath)
97
  os.makedirs(checkpoint_filepath, exist_ok=True)
98
 
99
+ callbacks = [
100
  EarlyStopping(
101
+ monitor = 'val_loss',
102
+ mode = 'min',
103
  patience = 5,
104
  verbose = 1,
105
  restore_best_weights = True
106
  ),
107
  ModelCheckpoint(
108
  filepath = os.path.join(checkpoint_filepath, 'best_model.keras'),
109
+ monitor = 'val_loss',
110
+ mode = 'min',
111
  verbose = 1,
112
  save_best_only = True
113
  ),
114
  ReduceLROnPlateau(
115
+ monitor = 'val_loss',
116
  factor = 0.5,
117
  patience = 3,
118
  min_lr = 1e-7,
119
+ verbose = 1
 
120
  )
121
  ]
122
 
123
+ print('\n=== Training ===')
124
+ num_epochs = 20
125
  history = model.fit(
126
  train_generator,
127
  epochs = num_epochs,
128
  steps_per_epoch = len(train_generator),
129
  validation_data = val_generator,
130
  validation_steps = len(val_generator),
131
+ callbacks = callbacks
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  )
133
 
134
  # Load the best model
 
138
  print('\n=== Evaluation on Test Set ===')
139
  test_generator.reset()
140
  test_loss, test_accuracy = best_model.evaluate(test_generator, steps=len(test_generator), verbose=1)
 
 
141
 
142
  # Generate predictions
143
  test_generator.reset()
 
145
  pred_labels = (preds.flatten() > 0.5).astype(int)
146
  true_labels = test_generator.classes
147
 
148
+ from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
149
+
150
+ overall_accuracy = accuracy_score(true_labels, pred_labels)
151
+ cm = confusion_matrix(true_labels, pred_labels)
152
+
153
+ print(f'\n{"="*60}')
154
+ print(f' MODEL ACCURACY REPORT')
155
+ print(f'{"="*60}')
156
+ print(f' Overall Accuracy: {overall_accuracy:.4f} ({overall_accuracy*100:.2f}%)')
157
+ print(f' Test Loss: {test_loss:.4f}')
158
+ print(f'{"="*60}')
159
+
160
+ # Per-class accuracy
161
+ fake_correct = cm[0][0]
162
+ fake_total = cm[0].sum()
163
+ real_correct = cm[1][1]
164
+ real_total = cm[1].sum()
165
+ print(f' Fake Accuracy: {fake_correct}/{fake_total} = {fake_correct/fake_total:.4f} ({fake_correct/fake_total*100:.2f}%)')
166
+ print(f' Real Accuracy: {real_correct}/{real_total} = {real_correct/real_total:.4f} ({real_correct/real_total*100:.2f}%)')
167
+ print(f'{"="*60}')
168
+
169
  print('\nClassification Report:')
170
  print(classification_report(true_labels, pred_labels, target_names=['fake', 'real']))
171
  print('Confusion Matrix:')
172
+ print(cm)
173
 
174
  test_results = pd.DataFrame({
175
  "Filename": test_generator.filenames,
App/app.py CHANGED
@@ -1,6 +1,4 @@
1
  import os
2
- import sys
3
- import io
4
  import base64
5
  import math
6
  import logging
@@ -13,7 +11,6 @@ from flask import Flask, request, render_template, send_from_directory, jsonify
13
  from werkzeug.utils import secure_filename
14
  import uuid
15
  import threading
16
- import tensorflow as tf
17
  from tensorflow.keras.models import load_model
18
 
19
  logging.basicConfig(
@@ -30,13 +27,10 @@ ALLOWED_EXTENSIONS = {'mp4', 'avi', 'mov', 'mkv', 'wmv'}
30
 
31
  os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
32
 
33
- # Load the trained model (suppress lz4 I/O warnings)
34
- MODEL_PATH = os.path.join(os.path.dirname(__file__), '..', 'tmp_checkpoint', 'best_model.h5')
35
  logger.info('Loading model from %s', MODEL_PATH)
36
- _stderr = sys.stderr
37
- sys.stderr = io.StringIO()
38
  model = load_model(MODEL_PATH)
39
- sys.stderr = _stderr
40
  logger.info('Model loaded successfully')
41
  INPUT_SIZE = 128
42
 
 
1
  import os
 
 
2
  import base64
3
  import math
4
  import logging
 
11
  from werkzeug.utils import secure_filename
12
  import uuid
13
  import threading
 
14
  from tensorflow.keras.models import load_model
15
 
16
  logging.basicConfig(
 
27
 
28
  os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
29
 
30
+ # Load the trained model
31
+ MODEL_PATH = os.path.join(os.path.dirname(__file__), '..', 'tmp_checkpoint', 'best_model.keras')
32
  logger.info('Loading model from %s', MODEL_PATH)
 
 
33
  model = load_model(MODEL_PATH)
 
34
  logger.info('Model loaded successfully')
35
  INPUT_SIZE = 128
36
 
App/blaze_face_short_range.tflite DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4578f35940bf5a1a655214a1cce5cab13eba73c1297cd78e1a04c2380b0152f
3
- size 229746