detection_final2

Sleeping

App Files Files Community

kimnamjoon0007 commited on Jan 30

Commit

876b3e1

verified ·

1 Parent(s): 685cac8

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +32 -79

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """
-AI Voice Detection - Hugging Face Spaces Demo
-Detects AI-generated vs Human voices in multilingual audio
 """
 import os
@@ -37,30 +37,29 @@ class W2VBertDeepfakeDetector(nn.Module):
         return logits
-# Load model
-print("Loading model...")
 backbone = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-large-xlsr-53")
 model = W2VBertDeepfakeDetector(backbone, num_labels=2)
 try:
     from huggingface_hub import hf_hub_download
     model_path = hf_hub_download(repo_id=MODEL_REPO, filename="best_model.pt")
     state_dict = torch.load(model_path, map_location="cpu")
     model.load_state_dict(state_dict)
-    print(f"✓ Loaded model from {MODEL_REPO}")
 except Exception as e:
-    print(f"Warning: Could not load from HF Hub: {e}")
-    if os.path.exists("best_model.pt"):
-        model.load_state_dict(torch.load("best_model.pt", map_location="cpu"))
-        print("✓ Loaded model from local file")
 model.to(DEVICE)
 model.eval()
-print(f"Model ready on {DEVICE}")
 def load_audio(audio_path):
-    """Load and preprocess audio file."""
     audio_segment = AudioSegment.from_file(audio_path)
     samples = np.array(audio_segment.get_array_of_samples()).astype(np.float32)
@@ -80,93 +79,47 @@ def load_audio(audio_path):
     return torch.from_numpy(samples).float()
-def classify_audio(audio_input):
-    """Main classification function."""
-    if audio_input is None:
-        return "⚠️ Please upload or record an audio file."
     try:
-        # Handle tuple input from microphone (sample_rate, audio_array)
-        if isinstance(audio_input, tuple):
-            import scipy.io.wavfile as wav
-            sr, audio_data = audio_input
-            temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
-            wav.write(temp_file.name, sr, audio_data)
-            audio_path = temp_file.name
-        else:
-            audio_path = audio_input
-        # Load and process
         waveform = load_audio(audio_path)
         input_values = waveform.unsqueeze(0).to(DEVICE)
-        # Inference
         with torch.no_grad():
             logits = model(input_values)
             probs = torch.softmax(logits, dim=-1)
-            pred_class = torch.argmax(probs, dim=-1).item()
-            confidence = probs[0, pred_class].item()
-        human_prob = probs[0, 0].item() * 100
-        ai_prob = probs[0, 1].item() * 100
-        if pred_class == 1:
-            verdict = "🤖 AI-GENERATED"
-            color = "red"
         else:
-            verdict = "👤 HUMAN"
-            color = "green"
-        result = f"""
-## Result: {verdict}
-**Confidence: {confidence:.1%}**
----
-| Category | Probability |
-|----------|-------------|
-| 👤 Human | {human_prob:.1f}% |
-| 🤖 AI-Generated | {ai_prob:.1f}% |
----
-*Model: Wav2Vec2-large-xlsr-53 fine-tuned for voice detection*
-"""
-        return result
     except Exception as e:
-        return f"❌ Error processing audio: {str(e)}"
-    finally:
-        if isinstance(audio_input, tuple) and 'audio_path' in locals():
-            try:
-                os.remove(audio_path)
-            except:
-                pass
-# Simple Gradio Interface
 demo = gr.Interface(
-    fn=classify_audio,
-    inputs=gr.Audio(
-        label="Upload or Record Audio",
-        type="filepath",
-        sources=["upload", "microphone"]
-    ),
-    outputs=gr.Markdown(label="Result"),
     title="🎤 AI Voice Detection",
-    description="""
-    **Detect if audio is AI-generated or Human speech**
-    Supported languages: Tamil, English, Hindi, Malayalam, Telugu
-    Upload an audio file (MP3, WAV, etc.) or record directly using your microphone.
-    """,
     examples=[],
-    theme=gr.themes.Soft(),
-    allow_flagging="never"
 )
-# Launch for HuggingFace Spaces
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)

 """
+AI Voice Detection - Hugging Face Spaces
+Detects AI-generated vs Human voices
 """
 import os
         return logits
+# Load model at startup
+print("Loading Wav2Vec2 backbone...")
 backbone = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-large-xlsr-53")
 model = W2VBertDeepfakeDetector(backbone, num_labels=2)
+print(f"Loading classifier weights from {MODEL_REPO}...")
 try:
     from huggingface_hub import hf_hub_download
     model_path = hf_hub_download(repo_id=MODEL_REPO, filename="best_model.pt")
     state_dict = torch.load(model_path, map_location="cpu")
     model.load_state_dict(state_dict)
+    print("✓ Model loaded successfully")
 except Exception as e:
+    print(f"Error loading model: {e}")
+    raise
 model.to(DEVICE)
 model.eval()
+print(f"Ready on {DEVICE}")
 def load_audio(audio_path):
+    """Load and preprocess audio."""
     audio_segment = AudioSegment.from_file(audio_path)
     samples = np.array(audio_segment.get_array_of_samples()).astype(np.float32)
     return torch.from_numpy(samples).float()
+def classify(audio_path):
+    """Classify audio as AI or Human."""
+    if audio_path is None:
+        return "Please upload an audio file"
     try:
         waveform = load_audio(audio_path)
         input_values = waveform.unsqueeze(0).to(DEVICE)
         with torch.no_grad():
             logits = model(input_values)
             probs = torch.softmax(logits, dim=-1)
+            pred = torch.argmax(probs, dim=-1).item()
+            conf = probs[0, pred].item()
+        human_pct = probs[0, 0].item() * 100
+        ai_pct = probs[0, 1].item() * 100
+        if pred == 1:
+            result = f"🤖 **AI-GENERATED** ({conf:.1%} confidence)"
         else:
+            result = f"👤 **HUMAN** ({conf:.1%} confidence)"
+        details = f"\n\n**Scores:** Human {human_pct:.1f}% | AI {ai_pct:.1f}%"
+        return result + details
     except Exception as e:
+        return f"Error: {str(e)}"
+# Create Gradio app
 demo = gr.Interface(
+    fn=classify,
+    inputs=gr.Audio(type="filepath", label="Upload Audio"),
+    outputs=gr.Textbox(label="Result", lines=3),
     title="🎤 AI Voice Detection",
+    description="Upload an audio file to detect if it's AI-generated or human speech.\n\nSupports: Tamil, English, Hindi, Malayalam, Telugu",
     examples=[],
+    cache_examples=False,
 )
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)