Spaces:
Sleeping
Sleeping
File size: 5,993 Bytes
3c18ee6 551ad23 3c18ee6 551ad23 3c18ee6 551ad23 3c18ee6 551ad23 3c18ee6 551ad23 3c18ee6 166d169 3c18ee6 551ad23 3c18ee6 551ad23 3c18ee6 551ad23 876b3e1 3c18ee6 876b3e1 3c18ee6 551ad23 3c18ee6 166d169 3c18ee6 551ad23 6bed605 551ad23 3c18ee6 551ad23 3c18ee6 551ad23 6bed605 551ad23 6bed605 3c18ee6 6bed605 3c18ee6 6bed605 876b3e1 551ad23 166d169 3c18ee6 551ad23 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 | """
AI Voice Detection API - HuggingFace Spaces
Pure FastAPI - No Gradio
"""
import os
import base64
import tempfile
import numpy as np
import torch
import torch.nn as nn
from fastapi import FastAPI, Header, HTTPException
from fastapi.responses import HTMLResponse
from pydantic import BaseModel
from transformers import Wav2Vec2Model
from pydub import AudioSegment
import librosa
import uvicorn
# Configuration
MODEL_REPO = "kimnamjoon0007/lkht-v440"
TARGET_SR = 16000
MAX_DURATION = 10.0
API_KEY = "sk_test_123456789"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class W2VBertDeepfakeDetector(nn.Module):
def __init__(self, backbone, num_labels=2):
super().__init__()
self.backbone = backbone
hidden_size = backbone.config.hidden_size
self.dropout = nn.Dropout(0.1)
self.classifier = nn.Linear(hidden_size, num_labels)
def forward(self, input_values, attention_mask=None):
outputs = self.backbone(input_values=input_values, attention_mask=attention_mask)
hidden_states = outputs.last_hidden_state
pooled = hidden_states.mean(dim=1)
pooled = self.dropout(pooled)
logits = self.classifier(pooled)
return logits
# Load model
print("Loading model...")
backbone = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-large-xlsr-53")
model = W2VBertDeepfakeDetector(backbone, num_labels=2)
try:
from huggingface_hub import hf_hub_download
model_path = hf_hub_download(repo_id=MODEL_REPO, filename="best_model.pt")
state_dict = torch.load(model_path, map_location="cpu")
model.load_state_dict(state_dict)
print(f"✓ Model loaded from {MODEL_REPO}")
except Exception as e:
print(f"Error: {e}")
raise
model.to(DEVICE)
model.eval()
print(f"Ready on {DEVICE}")
# FastAPI app
app = FastAPI(title="AI Voice Detection API", version="2.0")
class DetectionRequest(BaseModel):
language: str
audioFormat: str
audioBase64: str
class DetectionResponse(BaseModel):
status: str
classification: str
confidenceScore: float
def load_audio(audio_path):
audio_segment = AudioSegment.from_file(audio_path)
samples = np.array(audio_segment.get_array_of_samples()).astype(np.float32)
if audio_segment.channels > 1:
samples = samples.reshape(-1, audio_segment.channels).mean(axis=1)
samples /= 32767.0
sr = audio_segment.frame_rate
if sr != TARGET_SR:
samples = librosa.resample(samples, orig_sr=sr, target_sr=TARGET_SR)
max_len = int(MAX_DURATION * TARGET_SR)
if len(samples) > max_len:
samples = samples[:max_len]
return torch.from_numpy(samples).float()
@app.get("/", response_class=HTMLResponse)
def home():
space_url = os.getenv("SPACE_HOST", "localhost:7860")
return f"""
<!DOCTYPE html>
<html>
<head>
<title>AI Voice Detection API</title>
<style>
body {{ font-family: system-ui; max-width: 800px; margin: 50px auto; padding: 20px; background: #1a1a2e; color: #eee; }}
h1 {{ color: #00d4ff; }}
.box {{ background: #16213e; padding: 20px; border-radius: 10px; margin: 20px 0; }}
code {{ background: #0f3460; padding: 2px 8px; border-radius: 4px; }}
pre {{ background: #0f3460; padding: 15px; border-radius: 8px; overflow-x: auto; white-space: pre-wrap; }}
.key {{ color: #00ff88; font-size: 1.2em; }}
</style>
</head>
<body>
<h1>🎤 AI Voice Detection API</h1>
<div class="box">
<h2>API Endpoint</h2>
<p><code>POST https://{space_url}/api/voice-detection</code></p>
</div>
<div class="box">
<h2>API Key</h2>
<p class="key"><code>{API_KEY}</code></p>
</div>
<div class="box">
<h2>CURL Example</h2>
<pre>curl -X POST "https://{space_url}/api/voice-detection" \\
-H "Content-Type: application/json" \\
-H "x-api-key: {API_KEY}" \\
-d '{{
"language": "English",
"audioFormat": "mp3",
"audioBase64": "YOUR_BASE64_AUDIO"
}}'</pre>
</div>
<div class="box">
<h2>Response Format</h2>
<pre>{{
"status": "success",
"classification": "AI_GENERATED" or "HUMAN",
"confidenceScore": 0.97
}}</pre>
</div>
<div class="box">
<h2>Supported Languages</h2>
<p>Tamil, English, Hindi, Malayalam, Telugu</p>
</div>
</body>
</html>
"""
@app.get("/health")
def health():
return {"status": "healthy", "model_loaded": True, "device": str(DEVICE)}
@app.post("/api/voice-detection")
def detect_voice(request: DetectionRequest, x_api_key: str = Header(None)):
# Validate API key
if x_api_key != API_KEY:
raise HTTPException(status_code=401, detail="Invalid API key")
# Decode audio
try:
audio_bytes = base64.b64decode(request.audioBase64)
except:
raise HTTPException(status_code=400, detail="Invalid base64")
# Save temp file
temp_file = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
temp_file.write(audio_bytes)
temp_file.close()
try:
# Process
waveform = load_audio(temp_file.name)
input_values = waveform.unsqueeze(0).to(DEVICE)
with torch.no_grad():
logits = model(input_values)
probs = torch.softmax(logits, dim=-1)
pred = torch.argmax(probs, dim=-1).item()
conf = probs[0, pred].item()
classification = "AI_GENERATED" if pred == 1 else "HUMAN"
return DetectionResponse(
status="success",
classification=classification,
confidenceScore=round(conf, 2),
)
finally:
os.remove(temp_file.name)
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)
|