Spaces:

RJ40under40
/

AudioClassifier

Running

App Files Files Community

AudioClassifier / app.py

RJ40under40

Update app.py

c336244 verified 10 days ago

raw

history blame contribute delete

4.52 kB

	import os
	import base64
	import io
	import logging
	import numpy as np
	import torch
	import librosa
	import uvicorn
	from fastapi import FastAPI, HTTPException, Security, Depends, Header
	from pydantic import BaseModel
	from transformers import AutoFeatureExtractor, AutoModelForAudioClassification

	# ======================================================
	# CONFIG & HACKATHON SETTINGS
	# ======================================================
	HF_TOKEN = os.getenv("HF_Token")
	API_KEY_VALUE = "sk_test_123456789" # Set your secret key here

	# Using the high-accuracy deepfake detection model
	MODEL_ID = "Hemgg/Deepfake-audio-detection"
	TARGET_SR = 16000
	LABEL_MAP = {0: "AI_GENERATED", 1: "HUMAN"}

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger("hcl-voice-detection")

	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

	# ======================================================
	# MODEL LOADING
	# ======================================================
	try:
	feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_ID, token=HF_TOKEN)
	model = AutoModelForAudioClassification.from_pretrained(MODEL_ID, token=HF_TOKEN).to(DEVICE)
	model.eval()
	logger.info("Model loaded successfully.")
	except Exception as e:
	logger.error(f"Critical Error: Failed to load model: {e}")
	model = None

	# ======================================================
	# API SETUP
	# ======================================================
	app = FastAPI(title="HCL AI Voice Detection API")

	class VoiceRequest(BaseModel):
	language: str
	audioFormat: str
	audioBase64: str

	# Security Layer: Checks for 'x-api-key' in headers
	async def verify_api_key(x_api_key: str = Header(None)):
	if x_api_key != API_KEY_VALUE:
	# Standard Hackathon error response for auth
	raise HTTPException(status_code=403, detail="Invalid API key or malformed request")
	return x_api_key

	# ======================================================
	# CORE LOGIC
	# ======================================================
	def preprocess_audio(b64_string: str):
	try:
	# Clean potential data prefixes
	if "," in b64_string:
	b64_string = b64_string.split(",")[1]

	# Base64 Decoding
	audio_bytes = base64.b64decode(b64_string)

	# Load via librosa for robust MP3 support
	with io.BytesIO(audio_bytes) as bio:
	audio, sr = librosa.load(bio, sr=TARGET_SR)

	# Padding/Normalization
	if len(audio) < TARGET_SR:
	audio = np.pad(audio, (0, TARGET_SR - len(audio)))

	return audio.astype(np.float32)
	except Exception as e:
	logger.error(f"Preprocessing error: {e}")
	raise ValueError("Invalid audio data")

	def generate_explanation(classification: str, confidence: float):
	if classification == "AI_GENERATED":
	return "Unnatural pitch consistency and robotic speech patterns detected in the spectral analysis."
	return "Natural prosody and human-like frequency variance identified."

	# ======================================================
	# ENDPOINTS
	# ======================================================
	@app.post("/api/voice-detection")
	async def voice_detection(
	request: VoiceRequest,
	auth: str = Depends(verify_api_key)
	):
	if model is None:
	return {"status": "error", "message": "Model not available"}

	try:
	# 1. Audio Processing
	waveform = preprocess_audio(request.audioBase64)

	# 2. Inference
	inputs = feature_extractor(waveform, sampling_rate=TARGET_SR, return_tensors="pt").to(DEVICE)
	with torch.no_grad():
	logits = model(**inputs).logits
	probs = torch.softmax(logits, dim=-1)

	confidence, pred_idx = torch.max(probs, dim=-1)
	classification = LABEL_MAP.get(int(pred_idx.item()), "UNKNOWN")
	score = round(float(confidence.item()), 2)

	# 3. Response Generation (Matches Hackathon Format)
	return {
	"status": "success",
	"language": request.language,
	"classification": classification,
	"confidenceScore": score,
	"explanation": generate_explanation(classification, score)
	}

	except Exception as e:
	logger.error(f"Inference error: {e}")
	return {
	"status": "error",
	"message": "Malformed request or processing error"
	}

	if __name__ == "__main__":
	uvicorn.run("app:app", host="0.0.0.0", port=7860)