Vansh180 commited on
Commit
93f625f
·
verified ·
1 Parent(s): 9e541f4

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +5 -4
  2. app.py +80 -0
  3. requirements.txt +6 -0
README.md CHANGED
@@ -1,12 +1,13 @@
1
  ---
2
  title: Deepfake Audio Detector
3
- emoji: 📊
4
- colorFrom: pink
5
  colorTo: blue
6
  sdk: gradio
7
- sdk_version: 6.9.0
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
  ---
2
  title: Deepfake Audio Detector
3
+ emoji: 🎙️
4
+ colorFrom: red
5
  colorTo: blue
6
  sdk: gradio
 
7
  app_file: app.py
8
  pinned: false
9
  ---
10
 
11
+ # Deepfake Audio Detector
12
+
13
+ Upload an audio clip and classify it as bonafide or spoof.
app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import numpy as np
4
+ import torch
5
+ import torchaudio
6
+ import gradio as gr
7
+ from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
8
+
9
+ MODEL_REPO_ID = "Vansh180/deepfake-audio-wav2vec2"
10
+ HF_TOKEN = os.getenv("HF_TOKEN")
11
+
12
+ device = "cuda" if torch.cuda.is_available() else "cpu"
13
+
14
+ feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_REPO_ID, token=HF_TOKEN)
15
+ model = AutoModelForAudioClassification.from_pretrained(MODEL_REPO_ID, token=HF_TOKEN)
16
+ model.to(device)
17
+ model.eval()
18
+
19
+ TARGET_SR = feature_extractor.sampling_rate if hasattr(feature_extractor, "sampling_rate") else 16000
20
+ MAX_SECONDS = 5
21
+ MAX_LEN = TARGET_SR * MAX_SECONDS
22
+
23
+ def predict_audio(audio_file):
24
+ if audio_file is None:
25
+ return {"error": "No audio uploaded"}
26
+
27
+ wav, sr = torchaudio.load(audio_file)
28
+
29
+ if wav.shape[0] > 1:
30
+ wav = wav.mean(dim=0, keepdim=True)
31
+
32
+ if sr != TARGET_SR:
33
+ wav = torchaudio.functional.resample(wav, sr, TARGET_SR)
34
+
35
+ wav = wav.squeeze(0)
36
+
37
+ if wav.numel() < MAX_LEN:
38
+ wav = torch.nn.functional.pad(wav, (0, MAX_LEN - wav.numel()))
39
+ else:
40
+ wav = wav[:MAX_LEN]
41
+
42
+ inputs = feature_extractor(
43
+ wav.numpy().astype(np.float32),
44
+ sampling_rate=TARGET_SR,
45
+ return_tensors="pt",
46
+ padding=True,
47
+ truncation=True,
48
+ max_length=MAX_LEN
49
+ )
50
+
51
+ input_values = inputs["input_values"].to(device)
52
+ attention_mask = inputs.get("attention_mask")
53
+ if attention_mask is not None:
54
+ attention_mask = attention_mask.to(device)
55
+
56
+ with torch.no_grad():
57
+ outputs = model(input_values=input_values, attention_mask=attention_mask)
58
+ probs = torch.softmax(outputs.logits, dim=1)[0].cpu().numpy()
59
+
60
+ pred_id = int(np.argmax(probs))
61
+ pred_label = model.config.id2label[pred_id]
62
+
63
+ return {
64
+ "predicted_label": pred_label,
65
+ "confidence": float(probs[pred_id]),
66
+ "scores": {
67
+ model.config.id2label[i]: float(probs[i]) for i in range(len(probs))
68
+ }
69
+ }
70
+
71
+ demo = gr.Interface(
72
+ fn=predict_audio,
73
+ inputs=gr.Audio(type="filepath", label="Upload audio"),
74
+ outputs=gr.JSON(label="Prediction"),
75
+ title="Deepfake Audio Detector",
76
+ description="Upload an audio clip to classify it as bonafide or spoof."
77
+ )
78
+
79
+ if __name__ == "__main__":
80
+ demo.launch(server_name="0.0.0.0", server_port=7860)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio
2
+ transformers
3
+ torch
4
+ torchaudio
5
+ numpy
6
+ huggingface_hub