Spaces:
Running
Running
File size: 4,522 Bytes
9301dd7 f3ff9bf e8d09f3 f3ff9bf e8d09f3 4b23c1b c336244 f3ff9bf 779e17c f3ff9bf 9301dd7 18828c4 9301dd7 18828c4 c336244 9301dd7 c336244 18828c4 f3ff9bf 18828c4 f3ff9bf 18828c4 c336244 2a2e4a4 9301dd7 18828c4 9301dd7 18828c4 9301dd7 18828c4 9301dd7 18828c4 9301dd7 2a2e4a4 f3ff9bf c336244 f3ff9bf c336244 f3ff9bf 18828c4 97dd4a0 c336244 97dd4a0 c336244 97dd4a0 c336244 97dd4a0 4b23c1b 97dd4a0 c336244 97dd4a0 c336244 97dd4a0 c336244 0c8ad6a c336244 9301dd7 c336244 9301dd7 7e73c0d c336244 9301dd7 c336244 18828c4 7e73c0d f3ff9bf 97dd4a0 c336244 9301dd7 c336244 7e73c0d c336244 7e73c0d 18828c4 7e73c0d c336244 4b23c1b 9301dd7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import os
import base64
import io
import logging
import numpy as np
import torch
import librosa
import uvicorn
from fastapi import FastAPI, HTTPException, Security, Depends, Header
from pydantic import BaseModel
from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
# ======================================================
# CONFIG & HACKATHON SETTINGS
# ======================================================
HF_TOKEN = os.getenv("HF_Token")
API_KEY_VALUE = "sk_test_123456789" # Set your secret key here
# Using the high-accuracy deepfake detection model
MODEL_ID = "Hemgg/Deepfake-audio-detection"
TARGET_SR = 16000
LABEL_MAP = {0: "AI_GENERATED", 1: "HUMAN"}
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("hcl-voice-detection")
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# ======================================================
# MODEL LOADING
# ======================================================
try:
feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_ID, token=HF_TOKEN)
model = AutoModelForAudioClassification.from_pretrained(MODEL_ID, token=HF_TOKEN).to(DEVICE)
model.eval()
logger.info("Model loaded successfully.")
except Exception as e:
logger.error(f"Critical Error: Failed to load model: {e}")
model = None
# ======================================================
# API SETUP
# ======================================================
app = FastAPI(title="HCL AI Voice Detection API")
class VoiceRequest(BaseModel):
language: str
audioFormat: str
audioBase64: str
# Security Layer: Checks for 'x-api-key' in headers
async def verify_api_key(x_api_key: str = Header(None)):
if x_api_key != API_KEY_VALUE:
# Standard Hackathon error response for auth
raise HTTPException(status_code=403, detail="Invalid API key or malformed request")
return x_api_key
# ======================================================
# CORE LOGIC
# ======================================================
def preprocess_audio(b64_string: str):
try:
# Clean potential data prefixes
if "," in b64_string:
b64_string = b64_string.split(",")[1]
# Base64 Decoding
audio_bytes = base64.b64decode(b64_string)
# Load via librosa for robust MP3 support
with io.BytesIO(audio_bytes) as bio:
audio, sr = librosa.load(bio, sr=TARGET_SR)
# Padding/Normalization
if len(audio) < TARGET_SR:
audio = np.pad(audio, (0, TARGET_SR - len(audio)))
return audio.astype(np.float32)
except Exception as e:
logger.error(f"Preprocessing error: {e}")
raise ValueError("Invalid audio data")
def generate_explanation(classification: str, confidence: float):
if classification == "AI_GENERATED":
return "Unnatural pitch consistency and robotic speech patterns detected in the spectral analysis."
return "Natural prosody and human-like frequency variance identified."
# ======================================================
# ENDPOINTS
# ======================================================
@app.post("/api/voice-detection")
async def voice_detection(
request: VoiceRequest,
auth: str = Depends(verify_api_key)
):
if model is None:
return {"status": "error", "message": "Model not available"}
try:
# 1. Audio Processing
waveform = preprocess_audio(request.audioBase64)
# 2. Inference
inputs = feature_extractor(waveform, sampling_rate=TARGET_SR, return_tensors="pt").to(DEVICE)
with torch.no_grad():
logits = model(**inputs).logits
probs = torch.softmax(logits, dim=-1)
confidence, pred_idx = torch.max(probs, dim=-1)
classification = LABEL_MAP.get(int(pred_idx.item()), "UNKNOWN")
score = round(float(confidence.item()), 2)
# 3. Response Generation (Matches Hackathon Format)
return {
"status": "success",
"language": request.language,
"classification": classification,
"confidenceScore": score,
"explanation": generate_explanation(classification, score)
}
except Exception as e:
logger.error(f"Inference error: {e}")
return {
"status": "error",
"message": "Malformed request or processing error"
}
if __name__ == "__main__":
uvicorn.run("app:app", host="0.0.0.0", port=7860) |