kimnamjoon0007 commited on
Commit
876b3e1
·
verified ·
1 Parent(s): 685cac8

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +32 -79
app.py CHANGED
@@ -1,6 +1,6 @@
1
  """
2
- AI Voice Detection - Hugging Face Spaces Demo
3
- Detects AI-generated vs Human voices in multilingual audio
4
  """
5
 
6
  import os
@@ -37,30 +37,29 @@ class W2VBertDeepfakeDetector(nn.Module):
37
  return logits
38
 
39
 
40
- # Load model
41
- print("Loading model...")
42
  backbone = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-large-xlsr-53")
43
  model = W2VBertDeepfakeDetector(backbone, num_labels=2)
44
 
 
45
  try:
46
  from huggingface_hub import hf_hub_download
47
  model_path = hf_hub_download(repo_id=MODEL_REPO, filename="best_model.pt")
48
  state_dict = torch.load(model_path, map_location="cpu")
49
  model.load_state_dict(state_dict)
50
- print(f"✓ Loaded model from {MODEL_REPO}")
51
  except Exception as e:
52
- print(f"Warning: Could not load from HF Hub: {e}")
53
- if os.path.exists("best_model.pt"):
54
- model.load_state_dict(torch.load("best_model.pt", map_location="cpu"))
55
- print("✓ Loaded model from local file")
56
 
57
  model.to(DEVICE)
58
  model.eval()
59
- print(f"Model ready on {DEVICE}")
60
 
61
 
62
  def load_audio(audio_path):
63
- """Load and preprocess audio file."""
64
  audio_segment = AudioSegment.from_file(audio_path)
65
  samples = np.array(audio_segment.get_array_of_samples()).astype(np.float32)
66
 
@@ -80,93 +79,47 @@ def load_audio(audio_path):
80
  return torch.from_numpy(samples).float()
81
 
82
 
83
- def classify_audio(audio_input):
84
- """Main classification function."""
85
- if audio_input is None:
86
- return "⚠️ Please upload or record an audio file."
87
 
88
  try:
89
- # Handle tuple input from microphone (sample_rate, audio_array)
90
- if isinstance(audio_input, tuple):
91
- import scipy.io.wavfile as wav
92
- sr, audio_data = audio_input
93
- temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
94
- wav.write(temp_file.name, sr, audio_data)
95
- audio_path = temp_file.name
96
- else:
97
- audio_path = audio_input
98
-
99
- # Load and process
100
  waveform = load_audio(audio_path)
101
  input_values = waveform.unsqueeze(0).to(DEVICE)
102
 
103
- # Inference
104
  with torch.no_grad():
105
  logits = model(input_values)
106
  probs = torch.softmax(logits, dim=-1)
107
- pred_class = torch.argmax(probs, dim=-1).item()
108
- confidence = probs[0, pred_class].item()
109
 
110
- human_prob = probs[0, 0].item() * 100
111
- ai_prob = probs[0, 1].item() * 100
112
 
113
- if pred_class == 1:
114
- verdict = "🤖 AI-GENERATED"
115
- color = "red"
116
  else:
117
- verdict = "👤 HUMAN"
118
- color = "green"
 
 
 
119
 
120
- result = f"""
121
- ## Result: {verdict}
122
-
123
- **Confidence: {confidence:.1%}**
124
-
125
- ---
126
-
127
- | Category | Probability |
128
- |----------|-------------|
129
- | 👤 Human | {human_prob:.1f}% |
130
- | 🤖 AI-Generated | {ai_prob:.1f}% |
131
-
132
- ---
133
- *Model: Wav2Vec2-large-xlsr-53 fine-tuned for voice detection*
134
- """
135
- return result
136
-
137
  except Exception as e:
138
- return f"Error processing audio: {str(e)}"
139
-
140
- finally:
141
- if isinstance(audio_input, tuple) and 'audio_path' in locals():
142
- try:
143
- os.remove(audio_path)
144
- except:
145
- pass
146
 
147
 
148
- # Simple Gradio Interface
149
  demo = gr.Interface(
150
- fn=classify_audio,
151
- inputs=gr.Audio(
152
- label="Upload or Record Audio",
153
- type="filepath",
154
- sources=["upload", "microphone"]
155
- ),
156
- outputs=gr.Markdown(label="Result"),
157
  title="🎤 AI Voice Detection",
158
- description="""
159
- **Detect if audio is AI-generated or Human speech**
160
-
161
- Supported languages: Tamil, English, Hindi, Malayalam, Telugu
162
-
163
- Upload an audio file (MP3, WAV, etc.) or record directly using your microphone.
164
- """,
165
  examples=[],
166
- theme=gr.themes.Soft(),
167
- allow_flagging="never"
168
  )
169
 
170
- # Launch for HuggingFace Spaces
171
  if __name__ == "__main__":
172
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  """
2
+ AI Voice Detection - Hugging Face Spaces
3
+ Detects AI-generated vs Human voices
4
  """
5
 
6
  import os
 
37
  return logits
38
 
39
 
40
+ # Load model at startup
41
+ print("Loading Wav2Vec2 backbone...")
42
  backbone = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-large-xlsr-53")
43
  model = W2VBertDeepfakeDetector(backbone, num_labels=2)
44
 
45
+ print(f"Loading classifier weights from {MODEL_REPO}...")
46
  try:
47
  from huggingface_hub import hf_hub_download
48
  model_path = hf_hub_download(repo_id=MODEL_REPO, filename="best_model.pt")
49
  state_dict = torch.load(model_path, map_location="cpu")
50
  model.load_state_dict(state_dict)
51
+ print("✓ Model loaded successfully")
52
  except Exception as e:
53
+ print(f"Error loading model: {e}")
54
+ raise
 
 
55
 
56
  model.to(DEVICE)
57
  model.eval()
58
+ print(f"Ready on {DEVICE}")
59
 
60
 
61
  def load_audio(audio_path):
62
+ """Load and preprocess audio."""
63
  audio_segment = AudioSegment.from_file(audio_path)
64
  samples = np.array(audio_segment.get_array_of_samples()).astype(np.float32)
65
 
 
79
  return torch.from_numpy(samples).float()
80
 
81
 
82
+ def classify(audio_path):
83
+ """Classify audio as AI or Human."""
84
+ if audio_path is None:
85
+ return "Please upload an audio file"
86
 
87
  try:
 
 
 
 
 
 
 
 
 
 
 
88
  waveform = load_audio(audio_path)
89
  input_values = waveform.unsqueeze(0).to(DEVICE)
90
 
 
91
  with torch.no_grad():
92
  logits = model(input_values)
93
  probs = torch.softmax(logits, dim=-1)
94
+ pred = torch.argmax(probs, dim=-1).item()
95
+ conf = probs[0, pred].item()
96
 
97
+ human_pct = probs[0, 0].item() * 100
98
+ ai_pct = probs[0, 1].item() * 100
99
 
100
+ if pred == 1:
101
+ result = f"🤖 **AI-GENERATED** ({conf:.1%} confidence)"
 
102
  else:
103
+ result = f"👤 **HUMAN** ({conf:.1%} confidence)"
104
+
105
+ details = f"\n\n**Scores:** Human {human_pct:.1f}% | AI {ai_pct:.1f}%"
106
+
107
+ return result + details
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  except Exception as e:
110
+ return f"Error: {str(e)}"
 
 
 
 
 
 
 
111
 
112
 
113
+ # Create Gradio app
114
  demo = gr.Interface(
115
+ fn=classify,
116
+ inputs=gr.Audio(type="filepath", label="Upload Audio"),
117
+ outputs=gr.Textbox(label="Result", lines=3),
 
 
 
 
118
  title="🎤 AI Voice Detection",
119
+ description="Upload an audio file to detect if it's AI-generated or human speech.\n\nSupports: Tamil, English, Hindi, Malayalam, Telugu",
 
 
 
 
 
 
120
  examples=[],
121
+ cache_examples=False,
 
122
  )
123
 
 
124
  if __name__ == "__main__":
125
  demo.launch(server_name="0.0.0.0", server_port=7860)