Zenoharsh01's picture
Upload 18 files
16350ea verified
import os
import uuid
import librosa
import numpy as np
from fastapi import FastAPI, Depends, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import Optional
import logging
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Import your existing modules
try:
from .auth import validate_api_key
from .models.processor import AudioProcessor
from .models.classifier import EnhancedHybridVoiceClassifier as VoiceClassifier
from .models.lid import LanguageDetector
except ImportError:
# Fallback for direct execution
try:
from auth import validate_api_key
from models.processor import AudioProcessor
from models.classifier import EnhancedHybridVoiceClassifier as VoiceClassifier
from models.lid import LanguageDetector
except ImportError:
logger.error("Failed to import required modules. Check your project structure.")
raise
# Initialize FastAPI with metadata
app = FastAPI(
title="Voice Classifier API",
description="AI vs Human voice detection for Tamil, English, Hindi, Malayalam, Telugu",
version="1.0.0"
)
# Add CORS middleware for evaluation access
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Allow all origins for evaluation
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# --- Initialize Models (Global Load) ---
logger.info("="*70)
logger.info("πŸš€ Initializing System Models...")
logger.info("="*70)
try:
processor = AudioProcessor()
logger.info("βœ… AudioProcessor loaded")
classifier = VoiceClassifier()
logger.info("βœ… VoiceClassifier loaded")
lid_detector = LanguageDetector()
logger.info("βœ… LanguageDetector loaded")
logger.info("="*70)
logger.info("βœ… System Ready for Evaluation")
logger.info("="*70)
except Exception as e:
logger.error(f"❌ Failed to initialize models: {e}")
raise
# --- Request Schema ---
class DetectionRequest(BaseModel):
language: str # Tamil, English, Hindi, Malayalam, Telugu
audioFormat: str # mp3, wav, m4a, flac, ogg
audioBase64: str # Base64-encoded audio
return_details: bool = False # Optional detailed output
class Config:
schema_extra = {
"example": {
"language": "English",
"audioFormat": "mp3",
"audioBase64": "SGVsbG8gV29ybGQ=",
"return_details": False
}
}
# --- Health Check Endpoint ---
@app.get("/")
async def root():
"""Root endpoint - API status"""
return {
"status": "online",
"service": "Voice Classifier API",
"version": "1.0.0",
"supported_languages": ["Tamil", "English", "Hindi", "Malayalam", "Telugu"],
"endpoints": {
"health": "/health",
"detection": "/api/voice-detection",
"docs": "/docs"
}
}
@app.get("/health")
async def health_check():
"""Detailed health check for monitoring"""
return {
"status": "healthy",
"models_loaded": {
"processor": processor is not None,
"classifier": classifier is not None,
"lid_detector": lid_detector is not None
},
"ready_for_evaluation": True
}
# --- Main Detection Endpoint ---
@app.post("/api/voice-detection", dependencies=[Depends(validate_api_key)])
async def detect_voice(payload: DetectionRequest):
"""
Detect if a voice is AI-generated or human.
CRITICAL FOR EVALUATION:
- Accepts Base64-encoded audio in supported formats
- Returns classification: AI_GENERATED or HUMAN
- Includes confidenceScore (0.0 to 1.0)
- Provides human-readable explanation
- Auto-detects language (overrides input if needed)
Request Format:
{
"language": "Tamil",
"audioFormat": "mp3",
"audioBase64": "base64_encoded_audio_data",
"return_details": false
}
Response Format:
{
"status": "success",
"language": "Tamil",
"classification": "AI_GENERATED",
"confidenceScore": 0.91,
"explanation": "Unnatural pitch consistency detected"
}
"""
# Create a unique temp file for this request
temp_filename = f"temp_{uuid.uuid4()}.{payload.audioFormat}"
try:
logger.info(f"πŸ“₯ Processing request: language={payload.language}, format={payload.audioFormat}")
# 1. Decode Base64 & Save to Disk
audio_io = processor.decode_base64(payload.audioBase64)
with open(temp_filename, "wb") as f:
f.write(audio_io.getbuffer())
logger.info(f"βœ… Audio decoded and saved to {temp_filename}")
# 2. Load Audio Data (Optimized: Load once for everyone)
# Load as 16kHz mono, which is the standard for most AI models
audio_array, _ = librosa.load(temp_filename, sr=16000, mono=True)
# Validate audio is not silent
rms = np.sqrt(np.mean(audio_array**2))
if rms < 0.001:
logger.warning(f"⚠️ Silent audio detected (RMS: {rms})")
return {
"status": "error",
"message": "Audio appears to be silent or nearly silent"
}
logger.info(f"βœ… Audio loaded: {len(audio_array)/16000:.1f}s duration, RMS: {rms:.4f}")
# 3. Detect Language
detected_lang = lid_detector.detect(temp_filename)
logger.info(f"🌍 Language detected: {detected_lang}")
# 4. Forensic Analysis (AI vs Human) - UPDATED
# The new classifier returns a dictionary with detailed info
logger.info(f"πŸ€– Running classifier for {detected_lang}...")
result = classifier.predict(audio_array, language=detected_lang, return_details=payload.return_details)
# Extract values from the new classifier format
classification = result.get("verdict", "UNKNOWN") # "AI_GENERATED" or "HUMAN"
confidence = result.get("confidence", 0.0) # 0.0 to 1.0
explanation = result.get("explanation", "") # Detailed explanation
method = result.get("method", "unknown") # Detection method used
logger.info(f"βœ… Classification: {classification} | Confidence: {confidence:.3f}")
# CRITICAL: Ensure classification is valid for evaluation
if classification not in ["AI_GENERATED", "HUMAN"]:
logger.warning(f"⚠️ Invalid classification '{classification}', defaulting to UNCERTAIN")
classification = "HUMAN" if confidence > 0.5 else "AI_GENERATED"
confidence = 0.5
explanation = "Unable to classify with high confidence"
# 5. Construct "Smart" Explanation
final_explanation = explanation
# Add language mismatch note if detected
if payload.language.lower() != detected_lang.lower():
final_explanation += f" (Note: Input labeled as {payload.language}, but detected {detected_lang})."
# 6. Build Response (EXACT FORMAT FOR EVALUATION)
response = {
"status": "success",
"language": detected_lang,
"classification": classification, # MUST be "AI_GENERATED" or "HUMAN"
"confidenceScore": round(confidence, 3), # Round to 3 decimal places
"explanation": final_explanation
}
# 7. Add Optional Method Field (not required by spec but useful)
if payload.return_details:
response["method"] = method
# Include all the detailed analysis from the classifier
if "heuristic_score" in result:
response["heuristic_score"] = result["heuristic_score"]
if "heuristic_reason" in result:
response["heuristic_reason"] = result["heuristic_reason"]
if "model_confidence" in result:
response["model_confidence"] = result["model_confidence"]
if "model_verdict" in result:
response["model_verdict"] = result["model_verdict"]
if "segments_analyzed" in result:
response["segments_analyzed"] = result["segments_analyzed"]
if "details" in result:
response["details"] = result["details"]
logger.info(f"πŸ“€ Response sent: {classification} with confidence {confidence:.3f}")
return response
except Exception as e:
logger.error(f"❌ API Error: {str(e)}", exc_info=True)
return {
"status": "error",
"message": f"Processing failed: {str(e)}"
}
finally:
# 8. Cleanup: Always remove the temp file
if os.path.exists(temp_filename):
try:
os.remove(temp_filename)
logger.debug(f"πŸ—‘οΈ Cleaned up temp file: {temp_filename}")
except Exception as e:
logger.warning(f"⚠️ Failed to remove temp file: {e}")
# --- Batch Detection Endpoint (Optional, for efficiency) ---
@app.post("/api/batch-detection", dependencies=[Depends(validate_api_key)])
async def batch_detect_voice(payloads: list[DetectionRequest]):
"""
Process multiple audio files in one request (Optional for evaluation)
Maximum 10 files per batch to prevent overload
"""
if len(payloads) > 10:
raise HTTPException(
status_code=400,
detail="Maximum 10 files per batch request"
)
results = []
for i, payload in enumerate(payloads):
logger.info(f"Processing batch item {i+1}/{len(payloads)}")
try:
result = await detect_voice(payload)
results.append(result)
except Exception as e:
results.append({
"status": "error",
"message": str(e)
})
return {"results": results}
# --- Error Handlers ---
@app.exception_handler(HTTPException)
async def http_exception_handler(request, exc: HTTPException):
"""Handle HTTP exceptions"""
return {
"status": "error",
"message": exc.detail
}
@app.exception_handler(Exception)
async def general_exception_handler(request, exc: Exception):
"""Handle unexpected exceptions"""
logger.error(f"Unexpected error: {exc}", exc_info=True)
return {
"status": "error",
"message": "An unexpected error occurred"
}
# --- Main Entry Point ---
if __name__ == "__main__":
import uvicorn
logger.info("Starting Voice Classifier API Server...")
uvicorn.run(
app,
host="0.0.0.0",
port=8000,
log_level="info"
)