Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, HTTPException, Header, Depends, Query | |
| from fastapi.responses import JSONResponse, FileResponse | |
| from fastapi.staticfiles import StaticFiles | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel, Field | |
| from typing import Optional, Literal, Dict, Any, List | |
| from app.core.audio import decode_base64_audio, preprocess_audio | |
| from app.core.model import voice_detector | |
| from app.core.explanation import generate_explanation | |
| from app.config import settings | |
| import uvicorn | |
| import logging | |
| import os | |
| # Setup Logger | |
| logger = logging.getLogger("api") | |
| app = FastAPI( | |
| title="VoiceGuard — AI Voice Detection API", | |
| description=( | |
| "World-class deepfake audio detection API. " | |
| "Combines neural model inference with multi-analyzer forensic analysis " | |
| "to detect AI-generated speech with maximum accuracy." | |
| ), | |
| version="3.0.0" | |
| ) | |
| # CORS Security | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # Mount Static Files (for the Web UI) | |
| static_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static") | |
| if os.path.exists(static_path): | |
| app.mount("/static", StaticFiles(directory=static_path), name="static") | |
| # Custom Error Handlers (Hackathon Specification) | |
| async def http_exception_handler(request, exc): | |
| return JSONResponse( | |
| status_code=exc.status_code, | |
| content={"status": "error", "message": exc.detail} | |
| ) | |
| async def general_exception_handler(request, exc): | |
| logger.error(f"Unhandled exception: {str(exc)}") | |
| return JSONResponse( | |
| status_code=500, | |
| content={"status": "error", "message": "Internal server error"} | |
| ) | |
| # =============================================================== | |
| # Request / Response Models | |
| # =============================================================== | |
| class AudioRequest(BaseModel): | |
| language: Literal["Tamil", "English", "Hindi", "Malayalam", "Telugu"] = Field( | |
| ..., description="Language of the audio" | |
| ) | |
| audioFormat: Literal["mp3", "wav"] = Field( | |
| ..., description="Audio format (mp3 or wav)" | |
| ) | |
| audioBase64: str = Field( | |
| ..., description="Base64-encoded audio file" | |
| ) | |
| class ForensicDetail(BaseModel): | |
| score: float = Field(..., description="Analyzer score (0=human, 1=AI)") | |
| verdict: str = Field(..., description="Analyzer verdict") | |
| artifacts_found: List[str] = Field(default_factory=list) | |
| details: Dict[str, Any] = Field(default_factory=dict) | |
| class NeuralModelDetail(BaseModel): | |
| score: float = Field(..., description="Neural model AI probability") | |
| verdict: str = Field(..., description="Neural model verdict") | |
| segments_analyzed: int = Field(..., description="Number of audio segments analyzed") | |
| per_segment_scores: List[float] = Field(default_factory=list) | |
| class AudioProfileResponse(BaseModel): | |
| duration_sec: float | |
| snr_db: float | |
| clipping_detected: bool | |
| silence_ratio: float | |
| rms_energy: float | |
| sample_rate: int | |
| class DetectionResponse(BaseModel): | |
| status: Literal["success", "error"] = Field(default="success") | |
| language: str = Field(..., description="Language of the audio") | |
| classification: Literal["AI_GENERATED", "HUMAN"] = Field( | |
| ..., description="Voice classification result" | |
| ) | |
| confidenceScore: float = Field( | |
| ..., ge=0.0, le=1.0, description="Confidence score (0.0–1.0)" | |
| ) | |
| explanation: str = Field( | |
| ..., description="Evidence-based explanation for the decision" | |
| ) | |
| # Extended fields | |
| inferenceTimeMs: Optional[float] = Field( | |
| None, description="Total inference time in milliseconds" | |
| ) | |
| analyzersAgree: Optional[bool] = Field( | |
| None, description="Whether neural model and forensic analyzers agree" | |
| ) | |
| forensics: Optional[Dict[str, Any]] = Field( | |
| None, description="Detailed forensic analysis (when detailed=true)" | |
| ) | |
| audioProfile: Optional[AudioProfileResponse] = Field( | |
| None, description="Audio quality profile (when detailed=true)" | |
| ) | |
| artifactsSummary: Optional[List[str]] = Field( | |
| None, description="List of all detected artifacts (when detailed=true)" | |
| ) | |
| class ErrorResponse(BaseModel): | |
| status: Literal["error"] = "error" | |
| message: str | |
| # =============================================================== | |
| # Authentication | |
| # =============================================================== | |
| def verify_api_key( | |
| x_api_key_lower: str = Header(None, alias="x-api-key"), | |
| x_api_key_upper: str = Header(None, alias="X-API-Key") | |
| ): | |
| x_api_key = x_api_key_lower or x_api_key_upper | |
| if not settings.API_KEY: | |
| return True | |
| if not x_api_key: | |
| raise HTTPException( | |
| status_code=401, | |
| detail="API key is required. Please provide x-api-key header." | |
| ) | |
| if x_api_key != settings.API_KEY: | |
| raise HTTPException(status_code=403, detail="Invalid API key") | |
| return True | |
| # =============================================================== | |
| # Routes | |
| # =============================================================== | |
| def read_root(): | |
| index_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static", "index.html") | |
| if os.path.exists(index_path): | |
| return FileResponse(index_path) | |
| return { | |
| "status": "healthy", | |
| "message": "VoiceGuard AI Voice Detection API v3.0", | |
| "authentication": "enabled" if settings.API_KEY else "disabled" | |
| } | |
| def test_console(): | |
| """Serve the Base64 Testing Console.""" | |
| test_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static", "test.html") | |
| if os.path.exists(test_path): | |
| return FileResponse(test_path) | |
| return {"error": "test.html not found"} | |
| def health_check(): | |
| return { | |
| "status": "healthy", | |
| "version": "3.0.0", | |
| "engine": "multi-analyzer forensic detection", | |
| "model": settings.MODEL_NAME, | |
| "analyzers": ["neural_model", "spectral_analysis", "temporal_analysis", | |
| "formant_analysis", "artifact_detection"], | |
| "authentication": "enabled" if settings.API_KEY else "disabled" | |
| } | |
| async def detect_voice( | |
| request: AudioRequest, | |
| authenticated: bool = Depends(verify_api_key), | |
| detailed: bool = Query(False, description="Return full forensic breakdown") | |
| ): | |
| """ | |
| 🎙️ World-Class AI Voice Detection | |
| Analyzes audio using a multi-stage pipeline: | |
| 1. **Neural Model** — Wav2Vec2 deepfake classifier (multi-segment inference) | |
| 2. **Spectral Analyzer** — Detects unnatural spectral patterns | |
| 3. **Temporal Analyzer** — Catches robotic timing and uniform pauses | |
| 4. **Formant Analyzer** — Identifies synthetic formant transitions | |
| 5. **Artifact Detector** — Finds phase discontinuities and click artifacts | |
| Results are fused for maximum accuracy with evidence-based explanations. | |
| """ | |
| try: | |
| # 1. Decode Base64 | |
| audio_file = decode_base64_audio(request.audioBase64) | |
| # 2. Preprocess Audio (now returns profile too) | |
| audio_array, audio_profile = preprocess_audio(audio_file) | |
| # 3. Full Detection Pipeline (neural + forensics + fusion) | |
| result = voice_detector.predict( | |
| audio_array, | |
| audio_profile=audio_profile, | |
| detailed=detailed, | |
| ) | |
| # 4. Generate Evidence-Based Explanation | |
| explanation = generate_explanation(result) | |
| # 5. Build Response | |
| response = DetectionResponse( | |
| status="success", | |
| language=request.language, | |
| classification=result["classification"], | |
| confidenceScore=round(result["confidence"], 2), | |
| explanation=explanation, | |
| inferenceTimeMs=result.get("inference_time_ms"), | |
| analyzersAgree=result.get("analyzers_agree"), | |
| ) | |
| # Include detailed forensics if requested | |
| if detailed: | |
| response.forensics = result.get("forensics") | |
| response.artifactsSummary = result.get("artifacts_summary") | |
| if audio_profile: | |
| response.audioProfile = AudioProfileResponse(**audio_profile) | |
| return response | |
| except HTTPException as he: | |
| raise he | |
| except Exception as e: | |
| logger.error(f"Error processing request: {str(e)}") | |
| raise HTTPException( | |
| status_code=500, | |
| detail="Internal server error processing the audio" | |
| ) | |
| if __name__ == "__main__": | |
| uvicorn.run("app.main:app", host="0.0.0.0", port=8000, reload=True) | |