S-Vetrivel commited on
Commit
cb14a1d
·
1 Parent(s): 0d64f25

Upgrade: Switch to MMS-300M (XLS-R) for robust multilingual deepfake detection

Browse files
Files changed (2) hide show
  1. app/infer.py +15 -17
  2. verify_nii_model.py +41 -0
app/infer.py CHANGED
@@ -14,15 +14,16 @@ class VoiceClassifier:
14
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
  print(f"Loading Deepfake Detection model on {self.device}...")
16
 
17
- # Load Fine-Tuned Deepfake Detection Model
18
- self.model_name = "mo-thecreator/Deepfake-audio-detection"
 
19
 
20
  try:
21
- self.feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(self.model_name)
22
  self.model = AutoModelForAudioClassification.from_pretrained(self.model_name)
23
  self.model.to(self.device)
24
  self.model.eval()
25
- print(f"Model {self.model_name} loaded successfully.")
26
  # Labels: {0: 'fake', 1: 'real'} usually for this model
27
  print(f"Labels: {self.model.config.id2label}")
28
  except Exception as e:
@@ -154,31 +155,28 @@ class VoiceClassifier:
154
  is_english = language.lower() in ["english", "en"]
155
 
156
  # 3. Final Decision
157
- # We demand HIGHER evidence for AI (Conservatism)
158
 
159
  # Base threshold
160
- threshold = 0.65
161
 
162
  # Dynamic Thresholding based on Heuristics
163
  if len(ai_flags) >= 2:
164
  # Strong heuristic evidence (e.g. robotic pitch + flat spectrum)
165
- # We lower the bar for the model
166
  threshold = 0.50
167
  elif len(ai_flags) == 1:
168
  # Some heuristic evidence
169
- threshold = 0.60
170
  else:
171
  # ZERO heuristic evidence (Pitch/Flatness look human)
172
  # The model is alone in its accusation.
173
  if not is_english:
174
- # Foreign language + No Heuristics = FALSE POSITIVE likely.
175
- # We force Human verdict unless we want to be extremely risky.
176
- # Current decision: Force Human to protect against bias.
177
- print("DEBUG: Non-English audio with NO heuristic AI flags. Forcing Human verdict.")
178
- prob_fake_adjusted = 0.0
179
  else:
180
  # English + No Heuristics.
181
- # Model must be overwhelmingly confident (>98%) to override heuristics.
182
  threshold = 0.98
183
 
184
  if prob_fake_adjusted > threshold:
@@ -188,14 +186,14 @@ class VoiceClassifier:
188
  prediction = "HUMAN"
189
  confidence = 1.0 - prob_fake_adjusted
190
 
191
- # 4. Language Awareness Dampening (for the resulting score)
192
  if prediction == "AI_GENERATED" and not is_english:
193
- confidence *= 0.9 # Extra caution for non-English
194
 
195
  # Construct Explanation
196
  if prediction == "AI_GENERATED":
197
  reasons = ai_flags
198
- if not reasons: reasons.append("high confidence from deepfake classifier")
199
  explanation = f"AI detected ({confidence*100:.1f}%). Indicators: {', '.join(reasons)}."
200
  else:
201
  reasons = human_flags
 
14
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
  print(f"Loading Deepfake Detection model on {self.device}...")
16
 
17
+ # Load MMS-300M Anti-Deepfake Model (XLS-R based)
18
+ self.model_name = "nii-yamagishilab/mms-300m-anti-deepfake"
19
+ self.feature_extractor_name = "facebook/mms-300m"
20
 
21
  try:
22
+ self.feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(self.feature_extractor_name)
23
  self.model = AutoModelForAudioClassification.from_pretrained(self.model_name)
24
  self.model.to(self.device)
25
  self.model.eval()
26
+ print(f"Model {self.model_name} loaded successfully (MMS Backbone).")
27
  # Labels: {0: 'fake', 1: 'real'} usually for this model
28
  print(f"Labels: {self.model.config.id2label}")
29
  except Exception as e:
 
155
  is_english = language.lower() in ["english", "en"]
156
 
157
  # 3. Final Decision
158
+ # We demand HIGHER evidence for AI (Conservatism) but trust MMS more.
159
 
160
  # Base threshold
161
+ threshold = 0.60
162
 
163
  # Dynamic Thresholding based on Heuristics
164
  if len(ai_flags) >= 2:
165
  # Strong heuristic evidence (e.g. robotic pitch + flat spectrum)
 
166
  threshold = 0.50
167
  elif len(ai_flags) == 1:
168
  # Some heuristic evidence
169
+ threshold = 0.55
170
  else:
171
  # ZERO heuristic evidence (Pitch/Flatness look human)
172
  # The model is alone in its accusation.
173
  if not is_english:
174
+ # Foreign language + No Heuristics.
175
+ # MMS is multilingual, so we don't zero it out, but we require HIGH confidence.
176
+ print("DEBUG: Non-English audio with NO heuristic AI flags. Requiring high MMS confidence.")
177
+ threshold = 0.90 # High bar, but possible (unlike previous 0.0 force)
 
178
  else:
179
  # English + No Heuristics.
 
180
  threshold = 0.98
181
 
182
  if prob_fake_adjusted > threshold:
 
186
  prediction = "HUMAN"
187
  confidence = 1.0 - prob_fake_adjusted
188
 
189
+ # 4. Language Awareness Dampening (MMS is robust, lesser dampening)
190
  if prediction == "AI_GENERATED" and not is_english:
191
+ confidence *= 0.95 # Slight caution only
192
 
193
  # Construct Explanation
194
  if prediction == "AI_GENERATED":
195
  reasons = ai_flags
196
+ if not reasons: reasons.append("high confidence from MMS (XLS-R) classifier")
197
  explanation = f"AI detected ({confidence*100:.1f}%). Indicators: {', '.join(reasons)}."
198
  else:
199
  reasons = human_flags
verify_nii_model.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import torch
3
+ import numpy as np
4
+ from transformers import AutoFeatureExtractor, AutoModelForAudioClassification, Wav2Vec2FeatureExtractor
5
+
6
+ def verify_nii_model():
7
+ model_id = "nii-yamagishilab/mms-300m-anti-deepfake"
8
+ base_id = "facebook/mms-300m"
9
+
10
+ print(f"Loading Feature Extractor from {base_id}...")
11
+ try:
12
+ # MMS uses Wav2Vec2FeatureExtractor
13
+ feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(base_id)
14
+ print("Feature Extractor loaded.")
15
+
16
+ print(f"Loading Model from {model_id}...")
17
+ model = AutoModelForAudioClassification.from_pretrained(model_id)
18
+ print("Model loaded successfully!")
19
+
20
+ # Check standard config
21
+ print(f"Labels: {model.config.id2label}")
22
+
23
+ # Test with dummy audio
24
+ dummy_audio = np.random.uniform(-1, 1, 16000) # Random noise
25
+ inputs = feature_extractor(dummy_audio, sampling_rate=16000, return_tensors="pt")
26
+
27
+ with torch.no_grad():
28
+ logits = model(**inputs).logits
29
+ probs = torch.softmax(logits, dim=-1)
30
+ print(f"Dummy output probabilities: {probs}")
31
+ predicted_id = torch.argmax(logits, dim=-1).item()
32
+ label = model.config.id2label.get(predicted_id, str(predicted_id))
33
+ print(f"Prediction: {label}")
34
+
35
+ except Exception as e:
36
+ print(f"Error: {e}")
37
+ import traceback
38
+ traceback.print_exc()
39
+
40
+ if __name__ == "__main__":
41
+ verify_nii_model()