detection_final2

Sleeping

App Files Files Community

kimnamjoon0007 commited on Jan 30

Commit

551ad23

verified ·

1 Parent(s): aba1139

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +134 -42

app.py CHANGED Viewed

@@ -1,22 +1,27 @@
 """
-AI Voice Detection - Hugging Face Spaces
-Detects AI-generated vs Human voices
 """
 import os
 import tempfile
 import numpy as np
 import torch
 import torch.nn as nn
-import gradio as gr
 from transformers import Wav2Vec2Model
 from pydub import AudioSegment
 import librosa
 # Configuration
 MODEL_REPO = "kimnamjoon0007/lkht-v440"
 TARGET_SR = 16000
 MAX_DURATION = 10.0
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -37,20 +42,19 @@ class W2VBertDeepfakeDetector(nn.Module):
         return logits
-# Load model at startup
-print("Loading Wav2Vec2 backbone...")
 backbone = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-large-xlsr-53")
 model = W2VBertDeepfakeDetector(backbone, num_labels=2)
-print(f"Loading classifier weights from {MODEL_REPO}...")
 try:
     from huggingface_hub import hf_hub_download
     model_path = hf_hub_download(repo_id=MODEL_REPO, filename="best_model.pt")
     state_dict = torch.load(model_path, map_location="cpu")
     model.load_state_dict(state_dict)
-    print("✓ Model loaded successfully")
 except Exception as e:
-    print(f"Error loading model: {e}")
     raise
 model.to(DEVICE)
@@ -58,34 +62,130 @@ model.eval()
 print(f"Ready on {DEVICE}")
 def load_audio(audio_path):
-    """Load and preprocess audio."""
     audio_segment = AudioSegment.from_file(audio_path)
     samples = np.array(audio_segment.get_array_of_samples()).astype(np.float32)
     if audio_segment.channels > 1:
         samples = samples.reshape(-1, audio_segment.channels).mean(axis=1)
     samples /= 32767.0
     sr = audio_segment.frame_rate
     if sr != TARGET_SR:
         samples = librosa.resample(samples, orig_sr=sr, target_sr=TARGET_SR)
     max_len = int(MAX_DURATION * TARGET_SR)
     if len(samples) > max_len:
         samples = samples[:max_len]
     return torch.from_numpy(samples).float()
-def classify(audio_path):
-    """Classify audio as AI or Human."""
-    if audio_path is None:
-        return "Please upload an audio file"
     try:
-        waveform = load_audio(audio_path)
         input_values = waveform.unsqueeze(0).to(DEVICE)
         with torch.no_grad():
@@ -94,32 +194,24 @@ def classify(audio_path):
             pred = torch.argmax(probs, dim=-1).item()
             conf = probs[0, pred].item()
-        human_pct = probs[0, 0].item() * 100
-        ai_pct = probs[0, 1].item() * 100
-        if pred == 1:
-            result = f"🤖 **AI-GENERATED** ({conf:.1%} confidence)"
         else:
-            result = f"👤 **HUMAN** ({conf:.1%} confidence)"
-        details = f"\n\n**Scores:** Human {human_pct:.1f}% | AI {ai_pct:.1f}%"
-        return result + details
-    except Exception as e:
-        return f"Error: {str(e)}"
-# Create Gradio app
-demo = gr.Interface(
-    fn=classify,
-    inputs=gr.Audio(type="filepath", label="Upload Audio"),
-    outputs=gr.Textbox(label="Result", lines=3),
-    title="🎤 AI Voice Detection",
-    description="Upload an audio file to detect if it's AI-generated or human speech.\n\nSupports: Tamil, English, Hindi, Malayalam, Telugu",
-    examples=[],
-    cache_examples=False,
-)
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 """
+AI Voice Detection API - HuggingFace Spaces
+Pure FastAPI - No Gradio
 """
 import os
+import base64
 import tempfile
 import numpy as np
 import torch
 import torch.nn as nn
+from fastapi import FastAPI, Header, HTTPException
+from fastapi.responses import HTMLResponse
+from pydantic import BaseModel
 from transformers import Wav2Vec2Model
 from pydub import AudioSegment
 import librosa
+import uvicorn
 # Configuration
 MODEL_REPO = "kimnamjoon0007/lkht-v440"
 TARGET_SR = 16000
 MAX_DURATION = 10.0
+API_KEY = "sk_test_123456789"
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         return logits
+# Load model
+print("Loading model...")
 backbone = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-large-xlsr-53")
 model = W2VBertDeepfakeDetector(backbone, num_labels=2)
 try:
     from huggingface_hub import hf_hub_download
     model_path = hf_hub_download(repo_id=MODEL_REPO, filename="best_model.pt")
     state_dict = torch.load(model_path, map_location="cpu")
     model.load_state_dict(state_dict)
+    print(f"✓ Model loaded from {MODEL_REPO}")
 except Exception as e:
+    print(f"Error: {e}")
     raise
 model.to(DEVICE)
 print(f"Ready on {DEVICE}")
+# FastAPI app
+app = FastAPI(title="AI Voice Detection API", version="2.0")
+class DetectionRequest(BaseModel):
+    language: str
+    audioFormat: str
+    audioBase64: str
+class DetectionResponse(BaseModel):
+    status: str
+    language: str
+    classification: str
+    confidenceScore: float
+    explanation: str
 def load_audio(audio_path):
     audio_segment = AudioSegment.from_file(audio_path)
     samples = np.array(audio_segment.get_array_of_samples()).astype(np.float32)
     if audio_segment.channels > 1:
         samples = samples.reshape(-1, audio_segment.channels).mean(axis=1)
     samples /= 32767.0
     sr = audio_segment.frame_rate
     if sr != TARGET_SR:
         samples = librosa.resample(samples, orig_sr=sr, target_sr=TARGET_SR)
     max_len = int(MAX_DURATION * TARGET_SR)
     if len(samples) > max_len:
         samples = samples[:max_len]
     return torch.from_numpy(samples).float()
+@app.get("/", response_class=HTMLResponse)
+def home():
+    space_url = os.getenv("SPACE_HOST", "localhost:7860")
+    return f"""
+<!DOCTYPE html>
+<html>
+<head>
+    <title>AI Voice Detection API</title>
+    <style>
+        body {{ font-family: system-ui; max-width: 800px; margin: 50px auto; padding: 20px; background: #1a1a2e; color: #eee; }}
+        h1 {{ color: #00d4ff; }}
+        .box {{ background: #16213e; padding: 20px; border-radius: 10px; margin: 20px 0; }}
+        code {{ background: #0f3460; padding: 2px 8px; border-radius: 4px; }}
+        pre {{ background: #0f3460; padding: 15px; border-radius: 8px; overflow-x: auto; white-space: pre-wrap; }}
+        .key {{ color: #00ff88; font-size: 1.2em; }}
+    </style>
+</head>
+<body>
+    <h1>🎤 AI Voice Detection API</h1>
+    <div class="box">
+        <h2>API Endpoint</h2>
+        <p><code>POST https://{space_url}/api/voice-detection</code></p>
+    </div>
+    <div class="box">
+        <h2>API Key</h2>
+        <p class="key"><code>{API_KEY}</code></p>
+    </div>
+    <div class="box">
+        <h2>CURL Example</h2>
+        <pre>curl -X POST "https://{space_url}/api/voice-detection" \\
+  -H "Content-Type: application/json" \\
+  -H "x-api-key: {API_KEY}" \\
+  -d '{{
+    "language": "English",
+    "audioFormat": "mp3",
+    "audioBase64": "YOUR_BASE64_AUDIO"
+  }}'</pre>
+    </div>
+    <div class="box">
+        <h2>Response Format</h2>
+        <pre>{{
+  "status": "success",
+  "language": "English",
+  "classification": "AI_GENERATED" or "HUMAN",
+  "confidenceScore": 0.97,
+  "explanation": "Detected synthetic voice characteristics"
+}}</pre>
+    </div>
+    <div class="box">
+        <h2>Supported Languages</h2>
+        <p>Tamil, English, Hindi, Malayalam, Telugu</p>
+    </div>
+</body>
+</html>
+"""
+@app.get("/health")
+def health():
+    return {"status": "healthy", "model_loaded": True, "device": str(DEVICE)}
+@app.post("/api/voice-detection")
+def detect_voice(request: DetectionRequest, x_api_key: str = Header(None)):
+    # Validate API key
+    if x_api_key != API_KEY:
+        raise HTTPException(status_code=401, detail="Invalid API key")
+    # Validate format
+    if request.audioFormat.lower() != "mp3":
+        raise HTTPException(status_code=400, detail="Only mp3 format supported")
+    # Decode audio
     try:
+        audio_bytes = base64.b64decode(request.audioBase64)
+    except:
+        raise HTTPException(status_code=400, detail="Invalid base64")
+    # Save temp file
+    temp_file = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
+    temp_file.write(audio_bytes)
+    temp_file.close()
+    try:
+        # Process
+        waveform = load_audio(temp_file.name)
         input_values = waveform.unsqueeze(0).to(DEVICE)
         with torch.no_grad():
             pred = torch.argmax(probs, dim=-1).item()
             conf = probs[0, pred].item()
+        classification = "AI_GENERATED" if pred == 1 else "HUMAN"
+        if classification == "AI_GENERATED":
+            explanation = "Detected synthetic voice characteristics and artificial patterns"
         else:
+            explanation = "Detected natural speech patterns and organic voice characteristics"
+        return DetectionResponse(
+            status="success",
+            language=request.language,
+            classification=classification,
+            confidenceScore=round(conf, 2),
+            explanation=explanation
+        )
+    finally:
+        os.remove(temp_file.name)
 if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=7860)