Spaces:

Zeyadd-Mostaffa
/

deepfake_audio_model

Sleeping

App Files Files Community

ZeyadMostafa22 commited on Feb 11, 2025

Commit

4dc47c8

1 Parent(s): c629c7c

finall

Browse files

Files changed (1) hide show

app.py +12 -14

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ import torch
 import torchaudio
 import numpy as np
 from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
-import torch.nn.functional as F
 import torchaudio.transforms as T
 MODEL_ID = "Zeyadd-Mostaffa/wav2vec_checkpoints"
@@ -19,13 +18,15 @@ model.to(device)
 label_names = ["fake", "real"]  # According to your label2id = {"fake": 0, "real": 1}
 def classify_audio(audio_file):
     """
     audio_file: path to the uploaded file (WAV, MP3, etc.)
-    Returns: predicted label and confidence score
     """
     # 2) Load the audio file
     waveform, sr = torchaudio.load(audio_file)
     # If stereo, pick one channel or average
@@ -39,13 +40,14 @@ def classify_audio(audio_file):
         waveform = resampler(waveform)
         sr = 16000
     # 3) Preprocess with feature_extractor
     inputs = feature_extractor(
         waveform.numpy(),
         sampling_rate=sr,
         return_tensors="pt",
         truncation=True,
-        max_length=int(16000 * 6.0),  # 6 second max
     )
     # Move everything to device
@@ -53,24 +55,20 @@ def classify_audio(audio_file):
     with torch.no_grad():
         logits = model(input_values).logits
-        # 4) Calculate probabilities using softmax
-        probabilities = F.softmax(logits, dim=-1)
-        # Get predicted label and confidence
-        confidence, pred_id = torch.max(probabilities, dim=-1)
-        predicted_label = label_names[pred_id.item()]
-    # 5) Return label and confidence percentage
-    return f"Prediction: {predicted_label}, Confidence: {confidence.item() * 100:.2f}%"
-# 6) Build Gradio interface
 demo = gr.Interface(
     fn=classify_audio,
-    inputs=gr.Audio(type="filepath"),
     outputs="text",
     title="Wav2Vec2 Deepfake Detection",
-    description="Upload an audio sample to check if it is fake or real, along with confidence."
 )
 if __name__ == "__main__":

 import torchaudio
 import numpy as np
 from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
 import torchaudio.transforms as T
 MODEL_ID = "Zeyadd-Mostaffa/wav2vec_checkpoints"
 label_names = ["fake", "real"]  # According to your label2id = {"fake": 0, "real": 1}
 def classify_audio(audio_file):
     """
     audio_file: path to the uploaded file (WAV, MP3, etc.)
+    Returns: "fake" or "real"
     """
     # 2) Load the audio file
+    # torchaudio returns (waveform, sample_rate)
     waveform, sr = torchaudio.load(audio_file)
     # If stereo, pick one channel or average
         waveform = resampler(waveform)
         sr = 16000
     # 3) Preprocess with feature_extractor
     inputs = feature_extractor(
         waveform.numpy(),
         sampling_rate=sr,
         return_tensors="pt",
         truncation=True,
+        max_length=int(16000* 6.0),  # 6 second max
     )
     # Move everything to device
     with torch.no_grad():
         logits = model(input_values).logits
+        pred_id = torch.argmax(logits, dim=-1).item()
+    # 4) Return label text
+    predicted_label = label_names[pred_id]
+    return predicted_label
+# 5) Build Gradio interface
 demo = gr.Interface(
     fn=classify_audio,
+    inputs=gr.Audio( type="filepath"),
     outputs="text",
     title="Wav2Vec2 Deepfake Detection",
+    description="Upload an audio sample to check if it is fake or real."
 )
 if __name__ == "__main__":