""" AI Voice Detection API - HuggingFace Spaces Pure FastAPI - No Gradio """ import os import base64 import tempfile import numpy as np import torch import torch.nn as nn from fastapi import FastAPI, Header, HTTPException from fastapi.responses import HTMLResponse from pydantic import BaseModel from transformers import Wav2Vec2Model from pydub import AudioSegment import librosa import uvicorn # Configuration MODEL_REPO = "kimnamjoon0007/lkht-v440" TARGET_SR = 16000 MAX_DURATION = 10.0 API_KEY = "sk_test_123456789" DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") class W2VBertDeepfakeDetector(nn.Module): def __init__(self, backbone, num_labels=2): super().__init__() self.backbone = backbone hidden_size = backbone.config.hidden_size self.dropout = nn.Dropout(0.1) self.classifier = nn.Linear(hidden_size, num_labels) def forward(self, input_values, attention_mask=None): outputs = self.backbone(input_values=input_values, attention_mask=attention_mask) hidden_states = outputs.last_hidden_state pooled = hidden_states.mean(dim=1) pooled = self.dropout(pooled) logits = self.classifier(pooled) return logits # Load model print("Loading model...") backbone = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-large-xlsr-53") model = W2VBertDeepfakeDetector(backbone, num_labels=2) try: from huggingface_hub import hf_hub_download model_path = hf_hub_download(repo_id=MODEL_REPO, filename="best_model.pt") state_dict = torch.load(model_path, map_location="cpu") model.load_state_dict(state_dict) print(f"✓ Model loaded from {MODEL_REPO}") except Exception as e: print(f"Error: {e}") raise model.to(DEVICE) model.eval() print(f"Ready on {DEVICE}") # FastAPI app app = FastAPI(title="AI Voice Detection API", version="2.0") class DetectionRequest(BaseModel): language: str audioFormat: str audioBase64: str class DetectionResponse(BaseModel): status: str classification: str confidenceScore: float def load_audio(audio_path): audio_segment = AudioSegment.from_file(audio_path) samples = np.array(audio_segment.get_array_of_samples()).astype(np.float32) if audio_segment.channels > 1: samples = samples.reshape(-1, audio_segment.channels).mean(axis=1) samples /= 32767.0 sr = audio_segment.frame_rate if sr != TARGET_SR: samples = librosa.resample(samples, orig_sr=sr, target_sr=TARGET_SR) max_len = int(MAX_DURATION * TARGET_SR) if len(samples) > max_len: samples = samples[:max_len] return torch.from_numpy(samples).float() @app.get("/", response_class=HTMLResponse) def home(): space_url = os.getenv("SPACE_HOST", "localhost:7860") return f""" AI Voice Detection API

🎤 AI Voice Detection API

API Endpoint

POST https://{space_url}/api/voice-detection

API Key

{API_KEY}

CURL Example

curl -X POST "https://{space_url}/api/voice-detection" \\
  -H "Content-Type: application/json" \\
  -H "x-api-key: {API_KEY}" \\
  -d '{{
    "language": "English",
    "audioFormat": "mp3",
    "audioBase64": "YOUR_BASE64_AUDIO"
  }}'

Response Format

{{
  "status": "success",
  "classification": "AI_GENERATED" or "HUMAN",
  "confidenceScore": 0.97
}}

Supported Languages

Tamil, English, Hindi, Malayalam, Telugu

""" @app.get("/health") def health(): return {"status": "healthy", "model_loaded": True, "device": str(DEVICE)} @app.post("/api/voice-detection") def detect_voice(request: DetectionRequest, x_api_key: str = Header(None)): # Validate API key if x_api_key != API_KEY: raise HTTPException(status_code=401, detail="Invalid API key") # Decode audio try: audio_bytes = base64.b64decode(request.audioBase64) except: raise HTTPException(status_code=400, detail="Invalid base64") # Save temp file temp_file = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) temp_file.write(audio_bytes) temp_file.close() try: # Process waveform = load_audio(temp_file.name) input_values = waveform.unsqueeze(0).to(DEVICE) with torch.no_grad(): logits = model(input_values) probs = torch.softmax(logits, dim=-1) pred = torch.argmax(probs, dim=-1).item() conf = probs[0, pred].item() classification = "AI_GENERATED" if pred == 1 else "HUMAN" return DetectionResponse( status="success", classification=classification, confidenceScore=round(conf, 2), ) finally: os.remove(temp_file.name) if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=7860)