pavankumarvk commited on
Commit
be37324
Β·
verified Β·
1 Parent(s): 0caada5

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +30 -41
pipeline.py CHANGED
@@ -29,22 +29,11 @@ efficientnet_model = tf.keras.layers.TFSMLayer(
29
 
30
  # ─────────────────────────────────────────────────────────────────────────────
31
  # Audio Model: Wav2Vec2 fine-tuned for deepfake detection
32
- #
33
- # Why replace RawNet2?
34
- # RawNet2 was trained on ASVspoof 2019 β€” a dataset that predates modern TTS
35
- # systems (ElevenLabs, Vall-E, XTTS, Bark, etc.). It has never seen this
36
- # class of audio and consistently misclassifies it as "Real".
37
- #
38
- # Why Wav2Vec2?
39
- # "mo-thecreator/deepfake-audio-detection" is a Wav2Vec2-base model
40
- # fine-tuned on FakeAVCeleb + ASVspoof 2021 LA, covering:
41
- # - Genuine human speech
42
- # - Neural TTS (modern AI voices)
43
- # - Voice conversion / cloning
44
- # - Replay / splicing attacks
45
  # ─────────────────────────────────────────────────────────────────────────────
46
  AUDIO_MODEL_ID = "mo-thecreator/deepfake-audio-detection"
47
- AUDIO_SAMPLE_RATE = 16000 # Wav2Vec2 expects 16kHz
48
 
49
  print(f"Loading audio model: {AUDIO_MODEL_ID} ...")
50
  audio_feature_extractor = AutoFeatureExtractor.from_pretrained(AUDIO_MODEL_ID)
@@ -61,13 +50,18 @@ LABEL_MAP = {
61
  }
62
 
63
  # ─── Confidence thresholds ────────────────────────────────────────────────────
64
- # High confidence real β†’ Genuine Human Voice
65
- # High confidence fake β†’ Fake / Manipulated Audio
66
- # Low confidence both β†’ AI Synthesized / Voice Cloned
67
- # Modern TTS confuses the model β€” it sits in the uncertain middle zone.
68
- # That low-confidence signature IS the AI synthesis detection signal.
69
- REAL_THRESHOLD = 0.75
70
- FAKE_THRESHOLD = 0.70
 
 
 
 
 
71
 
72
 
73
  def convert_to_mp4(input_path):
@@ -173,12 +167,12 @@ def deepfakes_video_predict(input_video):
173
 
174
  real_mean = np.mean(real_res)
175
  fake_mean = np.mean(fake_res)
176
- print(f"Real Faces: {real_mean:.4f} | Fake Faces: {fake_mean:.4f}")
177
 
178
  if real_mean >= 0.5:
179
- return "The video is REAL.\nDeepfakes Confidence: " + str(round(100 - real_mean * 100, 3)) + "%"
180
  else:
181
- return "The video is FAKE.\nDeepfakes Confidence: " + str(round(fake_mean * 100, 3)) + "%"
182
 
183
 
184
  def deepfakes_image_predict(input_image):
@@ -187,36 +181,31 @@ def deepfakes_image_predict(input_image):
187
  pred = efficientnet_model(np.expand_dims(face2, axis=0))
188
  pred = list(pred.values())[0].numpy()[0]
189
  real, fake = pred[0], pred[1]
 
 
190
  if real > 0.5:
191
- return "The image is REAL.\nDeepfakes Confidence: " + str(round(100 - real * 100, 3)) + "%"
192
  else:
193
- return "The image is FAKE.\nDeepfakes Confidence: " + str(round(fake * 100, 3)) + "%"
194
 
195
 
196
  def classify_audio_3class(real_prob: float, fake_prob: float) -> str:
197
  """
198
- Map 2-class probabilities β†’ 3-class human-readable result.
199
 
200
- real_prob >= REAL_THRESHOLD β†’ Genuine Human Voice
201
- fake_prob >= FAKE_THRESHOLD β†’ Fake / Manipulated Audio
202
- both below threshold β†’ AI Synthesized / Voice Cloned
 
203
  """
204
  print(f"[Audio] real_prob={real_prob:.4f} fake_prob={fake_prob:.4f}")
205
 
206
  if real_prob >= REAL_THRESHOLD:
207
- return f"βœ… Real Human Voice\nConfidence: {round(real_prob * 100, 2)}%"
208
-
209
  elif fake_prob >= FAKE_THRESHOLD:
210
- return f"🚨 Fake / Manipulated Audio\nConfidence: {round(fake_prob * 100, 2)}%"
211
-
212
  else:
213
- # Neither class wins confidently β†’ hallmark of modern TTS / voice cloning
214
- ai_conf = round(max(fake_prob, 1 - real_prob) * 100, 2)
215
- return (
216
- f"πŸ€– AI Synthesized / Voice Cloned\n"
217
- f"Confidence: {ai_conf}%\n"
218
- f"(Model uncertainty indicates modern neural TTS or voice cloning)"
219
- )
220
 
221
 
222
  def deepfakes_audio_predict(input_audio):
 
29
 
30
  # ─────────────────────────────────────────────────────────────────────────────
31
  # Audio Model: Wav2Vec2 fine-tuned for deepfake detection
32
+ # "mo-thecreator/deepfake-audio-detection"
33
+ # Fine-tuned on FakeAVCeleb + ASVspoof 2021 LA
 
 
 
 
 
 
 
 
 
 
 
34
  # ─────────────────────────────────────────────────────────────────────────────
35
  AUDIO_MODEL_ID = "mo-thecreator/deepfake-audio-detection"
36
+ AUDIO_SAMPLE_RATE = 16000
37
 
38
  print(f"Loading audio model: {AUDIO_MODEL_ID} ...")
39
  audio_feature_extractor = AutoFeatureExtractor.from_pretrained(AUDIO_MODEL_ID)
 
50
  }
51
 
52
  # ─── Confidence thresholds ────────────────────────────────────────────────────
53
+ # REAL_THRESHOLD = 0.55 (loose)
54
+ # Lowered so genuine human voices are not incorrectly rejected.
55
+ # The model only needs to be 55% confident to call it real.
56
+ #
57
+ # FAKE_THRESHOLD = 0.90 (strict)
58
+ # Raised so real voices are never falsely flagged as fake.
59
+ # The model must be 90% confident before labelling audio as manipulated.
60
+ #
61
+ # Zone between the two β†’ AI Synthesized / Voice Cloned
62
+ # ─────────────────────────────────────────────────────────────────────────────
63
+ REAL_THRESHOLD = 0.55
64
+ FAKE_THRESHOLD = 0.90
65
 
66
 
67
  def convert_to_mp4(input_path):
 
167
 
168
  real_mean = np.mean(real_res)
169
  fake_mean = np.mean(fake_res)
170
+ print(f"[Video] Real={real_mean:.4f} | Fake={fake_mean:.4f}")
171
 
172
  if real_mean >= 0.5:
173
+ return "βœ… The video is REAL."
174
  else:
175
+ return "🚨 The video is FAKE."
176
 
177
 
178
  def deepfakes_image_predict(input_image):
 
181
  pred = efficientnet_model(np.expand_dims(face2, axis=0))
182
  pred = list(pred.values())[0].numpy()[0]
183
  real, fake = pred[0], pred[1]
184
+ print(f"[Image] Real={real:.4f} | Fake={fake:.4f}")
185
+
186
  if real > 0.5:
187
+ return "βœ… The image is REAL."
188
  else:
189
+ return "🚨 The image is FAKE."
190
 
191
 
192
  def classify_audio_3class(real_prob: float, fake_prob: float) -> str:
193
  """
194
+ Map 2-class probabilities β†’ 3-class result.
195
 
196
+ Threshold logic:
197
+ real_prob >= 0.55 β†’ Real Human Voice (loose β€” avoids false negatives)
198
+ fake_prob >= 0.90 β†’ Fake / Manipulated (strict β€” avoids false positives)
199
+ in between β†’ AI Synthesized / Cloned
200
  """
201
  print(f"[Audio] real_prob={real_prob:.4f} fake_prob={fake_prob:.4f}")
202
 
203
  if real_prob >= REAL_THRESHOLD:
204
+ return "βœ… Real Human Voice"
 
205
  elif fake_prob >= FAKE_THRESHOLD:
206
+ return "🚨 Fake / Manipulated Audio"
 
207
  else:
208
+ return "πŸ€– AI Synthesized / Voice Cloned"
 
 
 
 
 
 
209
 
210
 
211
  def deepfakes_audio_predict(input_audio):