diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..8855f84dfa5a66f7020f5651db5de165eabb0fc6
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,70 @@
+# =============================================================================
+# VoiceAuth API Dockerfile
+# =============================================================================
+# Multi-stage build for optimized production image
+
+# -----------------------------------------------------------------------------
+# Stage 1: Builder
+# -----------------------------------------------------------------------------
+FROM python:3.11-slim AS builder
+
+# Install build dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+
+# Create virtual environment
+RUN python -m venv /opt/venv
+ENV PATH="/opt/venv/bin:$PATH"
+
+# Install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+
+# -----------------------------------------------------------------------------
+# Stage 2: Production
+# -----------------------------------------------------------------------------
+FROM python:3.11-slim AS production
+
+# Install runtime dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ffmpeg \
+    libsndfile1 \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+# Create non-root user
+RUN useradd --create-home --uid 1000 appuser
+
+# Copy virtual environment from builder
+COPY --from=builder /opt/venv /opt/venv
+ENV PATH="/opt/venv/bin:$PATH"
+
+# Set working directory
+WORKDIR /app
+
+# Copy application code
+COPY --chown=appuser:appuser . .
+
+# Create directories for models and logs
+RUN mkdir -p /app/models /app/logs && \
+    chown -R appuser:appuser /app
+
+# Switch to non-root user
+USER appuser
+
+# Environment variables
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PORT=7860
+
+# Expose port
+EXPOSE 7860
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
+    CMD curl -f http://localhost:7860/api/health || exit 1
+
+# Run application
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
diff --git a/app/__init__.py b/app/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f54750c5b879ca62a33a9931d0181eb902e2aad
--- /dev/null
+++ b/app/__init__.py
@@ -0,0 +1,4 @@
+"""VoiceAuth - AI Voice Detection API."""
+
+__version__ = "1.0.0"
+__author__ = "VoiceAuth Team"
diff --git a/app/__pycache__/__init__.cpython-312.pyc b/app/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..013187ad5ea6b4bf790313e8348f3eaffcd33f07
Binary files /dev/null and b/app/__pycache__/__init__.cpython-312.pyc differ
diff --git a/app/__pycache__/config.cpython-312.pyc b/app/__pycache__/config.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..52c7236e297fab6aa445a2d5e24ec29a9c6eb29c
Binary files /dev/null and b/app/__pycache__/config.cpython-312.pyc differ
diff --git a/app/__pycache__/main.cpython-312.pyc b/app/__pycache__/main.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ade6ea4926e557315516bd3fe9dae41147f9ad0d
Binary files /dev/null and b/app/__pycache__/main.cpython-312.pyc differ
diff --git a/app/api/__init__.py b/app/api/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5fa0ee83874ecb0bab9ef44dd2d5479eaec47e4b
--- /dev/null
+++ b/app/api/__init__.py
@@ -0,0 +1,9 @@
+"""API package."""
+
+from app.api.routes import health
+from app.api.routes import voice_detection
+
+__all__ = [
+    "health",
+    "voice_detection",
+]
diff --git a/app/api/__pycache__/__init__.cpython-312.pyc b/app/api/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..176c8afafd16e3dc95d4f8d5b203c0475d911595
Binary files /dev/null and b/app/api/__pycache__/__init__.cpython-312.pyc differ
diff --git a/app/api/__pycache__/dependencies.cpython-312.pyc b/app/api/__pycache__/dependencies.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b20a4461e967892690a5655a0bf5e25c8b1bad36
Binary files /dev/null and b/app/api/__pycache__/dependencies.cpython-312.pyc differ
diff --git a/app/api/dependencies.py b/app/api/dependencies.py
new file mode 100644
index 0000000000000000000000000000000000000000..48ca97ce61328b481ba5f3302a916a7c33d1e219
--- /dev/null
+++ b/app/api/dependencies.py
@@ -0,0 +1,143 @@
+"""
+FastAPI dependency injection.
+
+Provides dependencies for route handlers.
+"""
+
+from typing import Annotated
+
+from fastapi import Depends
+from fastapi import Header
+from fastapi import HTTPException
+from fastapi import Request
+from fastapi import status
+
+from app.config import Settings
+from app.config import get_settings
+from app.ml.model_loader import ModelLoader
+from app.services.voice_detector import VoiceDetector
+from app.utils.logger import get_logger
+
+logger = get_logger(__name__)
+
+
+def get_model_loader(request: Request) -> ModelLoader:
+    """
+    Get ModelLoader from app state.
+
+    Args:
+        request: FastAPI request object
+
+    Returns:
+        ModelLoader instance from app state
+    """
+    if hasattr(request.app.state, "model_loader"):
+        return request.app.state.model_loader
+    return ModelLoader()
+
+
+def get_voice_detector(
+    model_loader: Annotated[ModelLoader, Depends(get_model_loader)],
+) -> VoiceDetector:
+    """
+    Get VoiceDetector instance.
+
+    Args:
+        model_loader: ModelLoader from dependency
+
+    Returns:
+        Configured VoiceDetector instance
+    """
+    return VoiceDetector(model_loader=model_loader)
+
+
+async def validate_api_key(
+    x_api_key: Annotated[
+        str | None,
+        Header(
+            alias="x-api-key",
+            description="API key for authentication",
+        ),
+    ] = None,
+    settings: Annotated[Settings, Depends(get_settings)] = None,  # type: ignore
+) -> str:
+    """
+    Validate API key from request header.
+
+    Args:
+        x_api_key: API key from x-api-key header
+        settings: Application settings
+
+    Returns:
+        Validated API key
+
+    Raises:
+        HTTPException: 401 if API key is missing or invalid
+    """
+    if settings is None:
+        settings = get_settings()
+
+    if not x_api_key:
+        logger.warning("Request without API key")
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="API key is required. Provide it in the x-api-key header.",
+            headers={"WWW-Authenticate": "ApiKey"},
+        )
+
+    # Get valid API keys
+    valid_keys = settings.api_keys_list
+
+    if not valid_keys:
+        logger.error("No API keys configured on server")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Server configuration error",
+        )
+
+    # Constant-time comparison to prevent timing attacks
+    key_valid = False
+    for valid_key in valid_keys:
+        if _constant_time_compare(x_api_key, valid_key):
+            key_valid = True
+            break
+
+    if not key_valid:
+        logger.warning(
+            "Invalid API key attempt",
+            key_prefix=x_api_key[:8] + "..." if len(x_api_key) > 8 else "***",
+        )
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Invalid API key",
+            headers={"WWW-Authenticate": "ApiKey"},
+        )
+
+    return x_api_key
+
+
+def _constant_time_compare(val1: str, val2: str) -> bool:
+    """
+    Constant-time string comparison to prevent timing attacks.
+
+    Args:
+        val1: First string
+        val2: Second string
+
+    Returns:
+        True if strings are equal
+    """
+    if len(val1) != len(val2):
+        return False
+
+    result = 0
+    for x, y in zip(val1, val2):
+        result |= ord(x) ^ ord(y)
+
+    return result == 0
+
+
+# Type aliases for cleaner route signatures
+ValidatedApiKey = Annotated[str, Depends(validate_api_key)]
+VoiceDetectorDep = Annotated[VoiceDetector, Depends(get_voice_detector)]
+SettingsDep = Annotated[Settings, Depends(get_settings)]
diff --git a/app/api/middleware/__init__.py b/app/api/middleware/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..60ddaff3d1ae19d79a1d2bd5413ceea11183ef43
--- /dev/null
+++ b/app/api/middleware/__init__.py
@@ -0,0 +1,13 @@
+"""Middleware package."""
+
+from app.api.middleware.auth import APIKeyMiddleware
+from app.api.middleware.error_handler import setup_exception_handlers
+from app.api.middleware.rate_limiter import get_limiter
+from app.api.middleware.rate_limiter import limiter
+
+__all__ = [
+    "APIKeyMiddleware",
+    "setup_exception_handlers",
+    "limiter",
+    "get_limiter",
+]
diff --git a/app/api/middleware/__pycache__/__init__.cpython-312.pyc b/app/api/middleware/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2555a164db5067761e11aff0ad1c3c40ed9ea62d
Binary files /dev/null and b/app/api/middleware/__pycache__/__init__.cpython-312.pyc differ
diff --git a/app/api/middleware/__pycache__/auth.cpython-312.pyc b/app/api/middleware/__pycache__/auth.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..aa0b3b01b9264a3f8431f82ed12bb69e1d3cec9d
Binary files /dev/null and b/app/api/middleware/__pycache__/auth.cpython-312.pyc differ
diff --git a/app/api/middleware/__pycache__/error_handler.cpython-312.pyc b/app/api/middleware/__pycache__/error_handler.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c197c3c3eb998ca4563d1c377024c25b7ca1b72b
Binary files /dev/null and b/app/api/middleware/__pycache__/error_handler.cpython-312.pyc differ
diff --git a/app/api/middleware/__pycache__/rate_limiter.cpython-312.pyc b/app/api/middleware/__pycache__/rate_limiter.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9d3e0eed346dedbe5a5181b28381dc3c6b9cfe50
Binary files /dev/null and b/app/api/middleware/__pycache__/rate_limiter.cpython-312.pyc differ
diff --git a/app/api/middleware/auth.py b/app/api/middleware/auth.py
new file mode 100644
index 0000000000000000000000000000000000000000..dcb701d0112cee6aa463a7130165d217c9438283
--- /dev/null
+++ b/app/api/middleware/auth.py
@@ -0,0 +1,107 @@
+"""
+API Key authentication middleware.
+
+Provides middleware for API key validation.
+"""
+
+from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.requests import Request
+from starlette.responses import JSONResponse
+
+from app.config import get_settings
+from app.utils.logger import get_logger
+
+logger = get_logger(__name__)
+
+
+class APIKeyMiddleware(BaseHTTPMiddleware):
+    """
+    Middleware for API key authentication.
+
+    This middleware checks for valid API keys on protected endpoints.
+    Public endpoints (health, docs) are excluded from authentication.
+    """
+
+    # Endpoints that don't require authentication
+    PUBLIC_PATHS: set[str] = {
+        "/api/health",
+        "/api/ready",
+        "/api/languages",
+        "/api/",
+        "/docs",
+        "/redoc",
+        "/openapi.json",
+        "/",
+    }
+
+    async def dispatch(self, request: Request, call_next):
+        """
+        Process request and validate API key for protected endpoints.
+
+        Args:
+            request: Incoming request
+            call_next: Next middleware/handler
+
+        Returns:
+            Response from next handler or 401 error
+        """
+        # Skip authentication for public paths
+        path = request.url.path.rstrip("/") or "/"
+
+        # Check if path is public
+        is_public = path in self.PUBLIC_PATHS or any(
+            path.startswith(public.rstrip("/")) for public in self.PUBLIC_PATHS if public != "/"
+        )
+
+        if is_public:
+            return await call_next(request)
+
+        # Get API key from header
+        api_key = request.headers.get("x-api-key")
+
+        if not api_key:
+            logger.warning(
+                "Request without API key",
+                path=path,
+                method=request.method,
+            )
+            return JSONResponse(
+                status_code=401,
+                content={
+                    "status": "error",
+                    "message": "API key is required",
+                },
+                headers={"WWW-Authenticate": "ApiKey"},
+            )
+
+        # Validate API key
+        settings = get_settings()
+        valid_keys = settings.api_keys_list
+
+        if not valid_keys:
+            logger.error("No API keys configured")
+            return JSONResponse(
+                status_code=500,
+                content={
+                    "status": "error",
+                    "message": "Server configuration error",
+                },
+            )
+
+        if api_key not in valid_keys:
+            logger.warning(
+                "Invalid API key",
+                path=path,
+                key_prefix=api_key[:8] + "..." if len(api_key) > 8 else "***",
+            )
+            return JSONResponse(
+                status_code=401,
+                content={
+                    "status": "error",
+                    "message": "Invalid API key",
+                },
+                headers={"WWW-Authenticate": "ApiKey"},
+            )
+
+        # Continue to next handler
+        return await call_next(request)
diff --git a/app/api/middleware/error_handler.py b/app/api/middleware/error_handler.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d74d63a128c6074785db27fd288d1d548282c6f
--- /dev/null
+++ b/app/api/middleware/error_handler.py
@@ -0,0 +1,130 @@
+"""
+Global error handling for FastAPI application.
+
+Provides exception handlers for consistent error responses.
+"""
+
+from fastapi import FastAPI
+from fastapi import HTTPException
+from fastapi import Request
+from fastapi.exceptions import RequestValidationError
+from starlette.responses import JSONResponse
+
+from app.utils.logger import get_logger
+
+logger = get_logger(__name__)
+
+
+async def validation_exception_handler(
+    request: Request,
+    exc: RequestValidationError,
+) -> JSONResponse:
+    """
+    Handle Pydantic validation errors.
+
+    Formats validation errors into a consistent response format.
+
+    Args:
+        request: Incoming request
+        exc: Validation exception
+
+    Returns:
+        JSON response with error details
+    """
+    errors = exc.errors()
+
+    # Format error messages
+    error_messages = []
+    for error in errors:
+        loc = " -> ".join(str(x) for x in error.get("loc", []))
+        msg = error.get("msg", "Validation error")
+        error_messages.append(f"{loc}: {msg}")
+
+    logger.warning(
+        "Validation error",
+        path=request.url.path,
+        errors=error_messages,
+    )
+
+    return JSONResponse(
+        status_code=422,
+        content={
+            "status": "error",
+            "message": "Validation error",
+            "details": {
+                "errors": error_messages,
+            },
+        },
+    )
+
+
+async def http_exception_handler(
+    request: Request,
+    exc: HTTPException,
+) -> JSONResponse:
+    """
+    Handle HTTP exceptions.
+
+    Formats HTTP exceptions into a consistent response format.
+
+    Args:
+        request: Incoming request
+        exc: HTTP exception
+
+    Returns:
+        JSON response with error details
+    """
+    return JSONResponse(
+        status_code=exc.status_code,
+        content={
+            "status": "error",
+            "message": exc.detail,
+        },
+        headers=exc.headers,
+    )
+
+
+async def general_exception_handler(
+    request: Request,
+    exc: Exception,
+) -> JSONResponse:
+    """
+    Handle unexpected exceptions.
+
+    Logs the exception and returns a generic error response.
+
+    Args:
+        request: Incoming request
+        exc: Unexpected exception
+
+    Returns:
+        JSON response with generic error message
+    """
+    logger.exception(
+        "Unhandled exception",
+        path=request.url.path,
+        method=request.method,
+        error=str(exc),
+    )
+
+    return JSONResponse(
+        status_code=500,
+        content={
+            "status": "error",
+            "message": "Internal server error",
+        },
+    )
+
+
+def setup_exception_handlers(app: FastAPI) -> None:
+    """
+    Register all exception handlers with the FastAPI app.
+
+    Args:
+        app: FastAPI application instance
+    """
+    app.add_exception_handler(RequestValidationError, validation_exception_handler)
+    app.add_exception_handler(HTTPException, http_exception_handler)
+    app.add_exception_handler(Exception, general_exception_handler)
+
+    logger.debug("Exception handlers registered")
diff --git a/app/api/middleware/rate_limiter.py b/app/api/middleware/rate_limiter.py
new file mode 100644
index 0000000000000000000000000000000000000000..937914423a7c53e1613cc8f168503218f3902616
--- /dev/null
+++ b/app/api/middleware/rate_limiter.py
@@ -0,0 +1,90 @@
+"""
+Rate limiting middleware using SlowAPI.
+
+Provides request rate limiting per API key.
+"""
+
+from slowapi import Limiter
+from slowapi.util import get_remote_address
+from starlette.requests import Request
+
+from app.config import get_settings
+from app.utils.logger import get_logger
+
+logger = get_logger(__name__)
+
+
+def get_api_key_or_ip(request: Request) -> str:
+    """
+    Extract rate limit key from request.
+
+    Uses API key if present, otherwise falls back to IP address.
+
+    Args:
+        request: Incoming request
+
+    Returns:
+        Rate limit key (API key or IP)
+    """
+    api_key = request.headers.get("x-api-key")
+
+    if api_key:
+        # Use API key for per-key rate limiting
+        return f"key:{api_key}"
+
+    # Fall back to IP address
+    return f"ip:{get_remote_address(request)}"
+
+
+def get_limiter() -> Limiter:
+    """
+    Create and configure rate limiter.
+
+    Returns:
+        Configured Limiter instance
+    """
+    settings = get_settings()
+
+    # Build default limit string
+    default_limit = f"{settings.RATE_LIMIT_REQUESTS}/minute"
+
+    return Limiter(
+        key_func=get_api_key_or_ip,
+        default_limits=[default_limit],
+        # Note: Redis storage will be configured in main.py if available
+    )
+
+
+# Global limiter instance
+limiter = get_limiter()
+
+
+def rate_limit_exceeded_handler(request: Request, exc: Exception):
+    """
+    Handle rate limit exceeded errors.
+
+    Args:
+        request: Request that exceeded the limit
+        exc: Rate limit exception
+
+    Returns:
+        JSON response with 429 status
+    """
+    from starlette.responses import JSONResponse
+
+    logger.warning(
+        "Rate limit exceeded",
+        path=request.url.path,
+        client=get_api_key_or_ip(request),
+    )
+
+    return JSONResponse(
+        status_code=429,
+        content={
+            "status": "error",
+            "message": "Rate limit exceeded. Please try again later.",
+        },
+        headers={
+            "Retry-After": "60",
+        },
+    )
diff --git a/app/api/routes/__init__.py b/app/api/routes/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..24ad3b1226130d38802c9a5e12d71a247b79a847
--- /dev/null
+++ b/app/api/routes/__init__.py
@@ -0,0 +1,9 @@
+"""Routes package."""
+
+from app.api.routes import health
+from app.api.routes import voice_detection
+
+__all__ = [
+    "health",
+    "voice_detection",
+]
diff --git a/app/api/routes/__pycache__/__init__.cpython-312.pyc b/app/api/routes/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5f51ab86e0a9c98fcd54c6af8e6608919ac6debb
Binary files /dev/null and b/app/api/routes/__pycache__/__init__.cpython-312.pyc differ
diff --git a/app/api/routes/__pycache__/federated.cpython-312.pyc b/app/api/routes/__pycache__/federated.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f48b0efb088add27a409b2e9731c77d946b2f63b
Binary files /dev/null and b/app/api/routes/__pycache__/federated.cpython-312.pyc differ
diff --git a/app/api/routes/__pycache__/health.cpython-312.pyc b/app/api/routes/__pycache__/health.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4f17d18121960dbac2a6711a4e74a4968e04746c
Binary files /dev/null and b/app/api/routes/__pycache__/health.cpython-312.pyc differ
diff --git a/app/api/routes/__pycache__/voice_detection.cpython-312.pyc b/app/api/routes/__pycache__/voice_detection.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..733dd266d9c2535b8129163046f9e7662f5fdcb4
Binary files /dev/null and b/app/api/routes/__pycache__/voice_detection.cpython-312.pyc differ
diff --git a/app/api/routes/federated.py b/app/api/routes/federated.py
new file mode 100644
index 0000000000000000000000000000000000000000..254939eb0fc8e2bc6feff9909cadc8bd13648a85
--- /dev/null
+++ b/app/api/routes/federated.py
@@ -0,0 +1,100 @@
+"""
+Federated Learning API endpoints.
+
+Provides endpoints for FL operations:
+- Client registration
+- Contribution submission
+- Federation status
+"""
+
+from fastapi import APIRouter
+from fastapi import HTTPException
+from fastapi import status
+from pydantic import BaseModel
+from pydantic import Field
+
+from app.api.dependencies import ValidatedApiKey
+from app.services.federated_learning import fl_manager
+
+router = APIRouter()
+
+
+class ClientRegistrationRequest(BaseModel):
+    """Request to register as FL client."""
+    
+    client_id: str = Field(..., min_length=3, max_length=64)
+    organization: str | None = Field(None, max_length=128)
+
+
+class ContributionRequest(BaseModel):
+    """Request to submit a training contribution."""
+    
+    client_id: str = Field(..., min_length=3, max_length=64)
+    gradient_hash: str = Field(..., min_length=16, max_length=128)
+    samples_trained: int = Field(..., ge=1, le=100000)
+    local_accuracy: float = Field(..., ge=0.0, le=1.0)
+
+
+@router.post(
+    "/federated/register",
+    summary="Register as Federated Client",
+    description="Register as a federated learning participant.",
+)
+async def register_client(
+    request: ClientRegistrationRequest,
+    api_key: ValidatedApiKey,
+) -> dict:
+    """Register a new federated learning client."""
+    client = fl_manager.register_client(
+        client_id=request.client_id,
+        organization=request.organization,
+    )
+    
+    return {
+        "status": "registered",
+        "client_id": client.client_id,
+        "organization": client.organization,
+        "registered_at": client.registered_at,
+    }
+
+
+@router.post(
+    "/federated/contribute",
+    summary="Submit Training Contribution",
+    description="Submit model gradients from local training.",
+)
+async def submit_contribution(
+    request: ContributionRequest,
+    api_key: ValidatedApiKey,
+) -> dict:
+    """Submit a training contribution."""
+    result = fl_manager.submit_contribution(
+        client_id=request.client_id,
+        gradient_hash=request.gradient_hash,
+        samples_trained=request.samples_trained,
+        local_accuracy=request.local_accuracy,
+    )
+    
+    return result
+
+
+@router.get(
+    "/federated/status",
+    summary="Federation Status",
+    description="Get current federated learning status.",
+)
+async def federation_status() -> dict:
+    """Get federation status."""
+    return fl_manager.get_federation_status()
+
+
+@router.post(
+    "/federated/aggregate",
+    summary="Trigger Aggregation",
+    description="Trigger federated model aggregation (admin only).",
+)
+async def trigger_aggregation(
+    api_key: ValidatedApiKey,
+) -> dict:
+    """Trigger model aggregation."""
+    return fl_manager.simulate_aggregation()
diff --git a/app/api/routes/health.py b/app/api/routes/health.py
new file mode 100644
index 0000000000000000000000000000000000000000..73e2d78d2b03d679e4bb92dcf2fe586fc03832ae
--- /dev/null
+++ b/app/api/routes/health.py
@@ -0,0 +1,101 @@
+"""
+Health check endpoints.
+
+Provides health, readiness, and information endpoints.
+"""
+
+from fastapi import APIRouter
+
+from app.api.dependencies import VoiceDetectorDep
+from app.models.enums import SupportedLanguage
+from app.models.response import HealthResponse
+from app.models.response import LanguagesResponse
+
+router = APIRouter()
+
+
+@router.get(
+    "/health",
+    response_model=HealthResponse,
+    summary="Health Check",
+    description="Check the health status of the API and ML model.",
+)
+async def health_check(
+    voice_detector: VoiceDetectorDep,
+) -> HealthResponse:
+    """
+    Get health status of the API.
+
+    Returns model loading status, device info, and supported languages.
+    """
+    health = voice_detector.health_check()
+
+    return HealthResponse(
+        status=health["status"],
+        version=health["version"],
+        model_loaded=health["model_loaded"],
+        model_name=health.get("model_name"),
+        device=health.get("device"),
+        supported_languages=health["supported_languages"],
+    )
+
+
+@router.get(
+    "/ready",
+    summary="Readiness Check",
+    description="Check if the API is ready to accept requests.",
+)
+async def readiness_check(
+    voice_detector: VoiceDetectorDep,
+) -> dict:
+    """
+    Check if API is ready to accept requests.
+
+    Returns ready status based on model availability.
+    """
+    health = voice_detector.health_check()
+
+    if health["model_loaded"]:
+        return {"status": "ready", "message": "API is ready to accept requests"}
+    else:
+        return {"status": "not_ready", "message": "Model is still loading"}
+
+
+@router.get(
+    "/languages",
+    response_model=LanguagesResponse,
+    summary="Supported Languages",
+    description="Get the list of supported languages for voice detection.",
+)
+async def supported_languages() -> LanguagesResponse:
+    """
+    Get list of supported languages.
+
+    Returns all languages supported by the voice detection API.
+    """
+    languages = SupportedLanguage.values()
+
+    return LanguagesResponse(
+        languages=languages,
+        count=len(languages),
+    )
+
+
+@router.get(
+    "/",
+    summary="API Info",
+    description="Get basic API information.",
+)
+async def api_info() -> dict:
+    """
+    Get basic API information.
+
+    Returns API name, version, and documentation links.
+    """
+    return {
+        "name": "VoiceAuth API",
+        "description": "AI-Generated Voice Detection API",
+        "version": "1.0.0",
+        "documentation": "/docs",
+        "supported_languages": SupportedLanguage.values(),
+    }
diff --git a/app/api/routes/voice_detection.py b/app/api/routes/voice_detection.py
new file mode 100644
index 0000000000000000000000000000000000000000..2b3d07ca11e5b42690e57a639df08d813192a64f
--- /dev/null
+++ b/app/api/routes/voice_detection.py
@@ -0,0 +1,150 @@
+"""
+Voice detection API endpoint.
+
+Main endpoint for detecting AI-generated vs human voice.
+"""
+
+from fastapi import APIRouter
+from fastapi import HTTPException
+from fastapi import status
+
+from app.api.dependencies import ValidatedApiKey
+from app.api.dependencies import VoiceDetectorDep
+from app.models.request import VoiceDetectionRequest
+from app.models.response import ErrorResponse
+from app.models.response import VoiceDetectionResponse
+from app.utils.exceptions import AudioDecodeError
+from app.utils.exceptions import AudioDurationError
+from app.utils.exceptions import AudioFormatError
+from app.utils.exceptions import AudioProcessingError
+from app.utils.exceptions import InferenceError
+from app.utils.exceptions import ModelNotLoadedError
+from app.utils.logger import get_logger
+
+logger = get_logger(__name__)
+
+router = APIRouter()
+
+
+@router.post(
+    "/voice-detection",
+    response_model=VoiceDetectionResponse,
+    response_model_include={"status", "language", "classification", "confidenceScore", "explanation"},
+    responses={
+        200: {
+            "description": "Successful voice detection",
+            "model": VoiceDetectionResponse,
+        },
+        400: {
+            "description": "Invalid audio data",
+            "model": ErrorResponse,
+        },
+        401: {
+            "description": "Invalid or missing API key",
+            "model": ErrorResponse,
+        },
+        422: {
+            "description": "Validation error",
+            "model": ErrorResponse,
+        },
+        429: {
+            "description": "Rate limit exceeded",
+            "model": ErrorResponse,
+        },
+        500: {
+            "description": "Internal server error",
+            "model": ErrorResponse,
+        },
+        503: {
+            "description": "Model not loaded",
+            "model": ErrorResponse,
+        },
+    },
+    summary="Detect AI-Generated Voice",
+    description="""
+Analyze a voice sample to determine if it's AI-generated or spoken by a human.
+
+**Supported Languages:** Tamil, English, Hindi, Malayalam, Telugu
+
+**Input Requirements:**
+- Audio must be Base64-encoded MP3
+- Duration: 0.5s to 30s
+- One audio sample per request
+
+**Response:**
+- Classification: AI_GENERATED or HUMAN
+- Confidence score: 0.0 to 1.0
+- Human-readable explanation
+    """,
+)
+async def detect_voice(
+    request: VoiceDetectionRequest,
+    voice_detector: VoiceDetectorDep,
+    api_key: ValidatedApiKey,
+) -> VoiceDetectionResponse:
+    """
+    Detect whether a voice sample is AI-generated or human.
+
+    Args:
+        request: Voice detection request with audio data
+        voice_detector: VoiceDetector service dependency
+        api_key: Validated API key from header
+
+    Returns:
+        VoiceDetectionResponse with classification result
+    """
+    try:
+        result = await voice_detector.detect(
+            audio_base64=request.audioBase64,
+            language=request.language,
+        )
+        return result
+
+    except AudioDecodeError as e:
+        logger.warning("Audio decode error", error=str(e))
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Failed to decode audio: {e.message}",
+        ) from e
+
+    except AudioFormatError as e:
+        logger.warning("Audio format error", error=str(e))
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Invalid audio format: {e.message}",
+        ) from e
+
+    except AudioDurationError as e:
+        logger.warning("Audio duration error", error=str(e), details=e.details)
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=e.message,
+        ) from e
+
+    except AudioProcessingError as e:
+        logger.error("Audio processing error", error=str(e))
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Audio processing failed: {e.message}",
+        ) from e
+
+    except ModelNotLoadedError as e:
+        logger.error("Model not loaded", error=str(e))
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="Voice detection model is not available. Please try again later.",
+        ) from e
+
+    except InferenceError as e:
+        logger.error("Inference error", error=str(e))
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Voice analysis failed. Please try again.",
+        ) from e
+
+    except Exception as e:
+        logger.exception("Unexpected error in voice detection")
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Internal server error",
+        ) from e
diff --git a/app/config.py b/app/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..43ff1c73cda3a2aa2efc33d26b2904cc975dbd50
--- /dev/null
+++ b/app/config.py
@@ -0,0 +1,124 @@
+"""
+Application configuration using Pydantic Settings.
+
+Loads configuration from environment variables and .env file.
+"""
+
+from functools import lru_cache
+from typing import Literal
+
+import torch
+from pydantic import Field
+from pydantic import field_validator
+from pydantic_settings import BaseSettings
+from pydantic_settings import SettingsConfigDict
+
+
+class Settings(BaseSettings):
+    """Application settings loaded from environment variables."""
+
+    model_config = SettingsConfigDict(
+        env_file=".env",
+        env_file_encoding="utf-8",
+        case_sensitive=False,
+        extra="ignore",
+    )
+
+    # -------------------------------------------------------------------------
+    # Application Settings
+    # -------------------------------------------------------------------------
+    APP_NAME: str = "VoiceAuth API"
+    APP_VERSION: str = "1.0.0"
+    DEBUG: bool = False
+    HOST: str = "0.0.0.0"
+    PORT: int = 8000
+
+    # -------------------------------------------------------------------------
+    # Security Settings
+    # -------------------------------------------------------------------------
+    API_KEYS: str = Field(
+        default="",
+        description="Comma-separated list of valid API keys",
+    )
+    CORS_ORIGINS: str = Field(
+        default="http://localhost:3000,http://localhost:8000",
+        description="Comma-separated list of allowed CORS origins",
+    )
+    RATE_LIMIT_REQUESTS: int = Field(default=100, ge=1)
+    RATE_LIMIT_PERIOD: int = Field(default=60, ge=1, description="Period in seconds")
+
+    # -------------------------------------------------------------------------
+    # ML Model Settings
+    # -------------------------------------------------------------------------
+    MODEL_NAME: str = "facebook/wav2vec2-base"
+    MODEL_PATH: str = ""
+    DEVICE: str = "auto"
+    MAX_AUDIO_DURATION: float = Field(default=30.0, ge=1.0)
+    MIN_AUDIO_DURATION: float = Field(default=0.5, ge=0.1)
+    SAMPLE_RATE: int = Field(default=16000, ge=8000, le=48000)
+
+    # -------------------------------------------------------------------------
+    # Redis Settings
+    # -------------------------------------------------------------------------
+    REDIS_URL: str = "redis://localhost:6379"
+    REDIS_DB: int = 0
+
+    # -------------------------------------------------------------------------
+    # Logging Settings
+    # -------------------------------------------------------------------------
+    LOG_LEVEL: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO"
+    LOG_FORMAT: Literal["json", "console"] = "json"
+
+    # -------------------------------------------------------------------------
+    # Computed Properties
+    # -------------------------------------------------------------------------
+    @property
+    def api_keys_list(self) -> list[str]:
+        """Parse comma-separated API keys into a list."""
+        if not self.API_KEYS:
+            return []
+        return [key.strip() for key in self.API_KEYS.split(",") if key.strip()]
+
+    @property
+    def cors_origins_list(self) -> list[str]:
+        """Parse comma-separated CORS origins into a list."""
+        if not self.CORS_ORIGINS:
+            return []
+        return [origin.strip() for origin in self.CORS_ORIGINS.split(",") if origin.strip()]
+
+    @property
+    def torch_device(self) -> str:
+        """Determine the appropriate torch device."""
+        if self.DEVICE == "auto":
+            return "cuda" if torch.cuda.is_available() else "cpu"
+        return self.DEVICE
+
+    @property
+    def model_identifier(self) -> str:
+        """Get the model path or name to load."""
+        return self.MODEL_PATH if self.MODEL_PATH else self.MODEL_NAME
+
+    # -------------------------------------------------------------------------
+    # Validators
+    # -------------------------------------------------------------------------
+    @field_validator("DEVICE")
+    @classmethod
+    def validate_device(cls, v: str) -> str:
+        """Validate device configuration."""
+        valid_devices = {"auto", "cpu", "cuda", "mps"}
+        # Allow cuda:N format
+        if v.startswith("cuda:"):
+            return v
+        if v not in valid_devices:
+            raise ValueError(f"Device must be one of {valid_devices} or 'cuda:N' format")
+        return v
+
+
+@lru_cache
+def get_settings() -> Settings:
+    """
+    Get cached settings instance.
+
+    Uses lru_cache to ensure settings are only loaded once.
+    """
+    return Settings()
diff --git a/app/main.py b/app/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..6b5f3ab0e340e1e0d2692b24122b06cd19e70660
--- /dev/null
+++ b/app/main.py
@@ -0,0 +1,182 @@
+"""
+VoiceAuth API - Main Application Entry Point.
+
+FastAPI application for AI-generated voice detection.
+"""
+
+from contextlib import asynccontextmanager
+
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from slowapi import _rate_limit_exceeded_handler
+from slowapi.errors import RateLimitExceeded
+
+from app.api.middleware.error_handler import setup_exception_handlers
+from app.api.middleware.rate_limiter import limiter
+from app.api.routes import health
+from app.api.routes import voice_detection
+from app.config import get_settings
+from app.ml.model_loader import ModelLoader
+from app.utils.logger import get_logger
+from app.utils.logger import setup_logging
+
+# Initialize logging first
+setup_logging()
+logger = get_logger(__name__)
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """
+    Application lifespan manager.
+
+    Handles startup and shutdown events:
+    - Startup: Load ML model, run warmup
+    - Shutdown: Unload model, cleanup
+    """
+    # =========================================================================
+    # STARTUP
+    # =========================================================================
+    logger.info("Starting VoiceAuth API...")
+
+    # Initialize model loader
+    model_loader = ModelLoader()
+    app.state.model_loader = model_loader
+
+    # Load ML model
+    logger.info("Loading ML model...")
+    try:
+        await model_loader.load_model_async()
+        logger.info("ML model loaded successfully")
+
+        # Run warmup inference
+        logger.info("Running model warmup...")
+        model_loader.warmup()
+        logger.info("Model warmup complete")
+
+    except Exception as e:
+        logger.error("Failed to load ML model", error=str(e))
+        # Continue without model for health checks
+        # Actual detection will fail with proper error
+
+    logger.info("VoiceAuth API is ready!")
+
+    yield
+
+    # =========================================================================
+    # SHUTDOWN
+    # =========================================================================
+    logger.info("Shutting down VoiceAuth API...")
+
+    # Unload model
+    if hasattr(app.state, "model_loader"):
+        app.state.model_loader.unload_model()
+
+    logger.info("Shutdown complete")
+
+
+def create_app() -> FastAPI:
+    """
+    Create and configure the FastAPI application.
+
+    Returns:
+        Configured FastAPI application
+    """
+    settings = get_settings()
+
+    # Create FastAPI app
+    app = FastAPI(
+        title=settings.APP_NAME,
+        version=settings.APP_VERSION,
+        description="""
+# VoiceAuth - AI Voice Detection API
+
+Detect whether a voice sample is **AI-generated** or **human-spoken** across 5 languages.
+
+## Supported Languages
+- Tamil
+- English
+- Hindi
+- Malayalam
+- Telugu
+
+## Authentication
+All detection requests require an API key in the `x-api-key` header.
+
+## Rate Limiting
+Default: 100 requests per minute per API key.
+        """,
+        lifespan=lifespan,
+        docs_url="/docs",
+        redoc_url="/redoc",
+        openapi_url="/openapi.json",
+    )
+
+    # =========================================================================
+    # MIDDLEWARE
+    # =========================================================================
+
+    # CORS
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=settings.cors_origins_list or ["*"],
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+
+    # Rate limiting
+    app.state.limiter = limiter
+    app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
+
+    # =========================================================================
+    # EXCEPTION HANDLERS
+    # =========================================================================
+    setup_exception_handlers(app)
+
+    # =========================================================================
+    # ROUTES
+    # =========================================================================
+    app.include_router(
+        health.router,
+        prefix="/api",
+        tags=["Health"],
+    )
+    app.include_router(
+        voice_detection.router,
+        prefix="/api",
+        tags=["Voice Detection"],
+    )
+    
+    # Federated Learning routes (Phase 2)
+    from app.api.routes import federated
+    app.include_router(
+        federated.router,
+        prefix="/api",
+        tags=["Federated Learning"],
+    )
+
+    return app
+
+
+# Create application instance
+app = create_app()
+
+
+def main() -> None:
+    """Run the application using uvicorn."""
+    import uvicorn
+
+    settings = get_settings()
+
+    uvicorn.run(
+        "app.main:app",
+        host=settings.HOST,
+        port=settings.PORT,
+        reload=settings.DEBUG,
+        log_level=settings.LOG_LEVEL.lower(),
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/app/ml/__init__.py b/app/ml/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4c57f2fe1337dd24bfd532ddc7a453633e18d9c7
--- /dev/null
+++ b/app/ml/__init__.py
@@ -0,0 +1,11 @@
+"""Machine Learning pipeline package."""
+
+from app.ml.inference import InferenceEngine
+from app.ml.model_loader import ModelLoader
+from app.ml.preprocessing import AudioPreprocessor
+
+__all__ = [
+    "ModelLoader",
+    "InferenceEngine",
+    "AudioPreprocessor",
+]
diff --git a/app/ml/__pycache__/__init__.cpython-312.pyc b/app/ml/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..80f5d2cd8c6221859612b819d313d943f7dfabbf
Binary files /dev/null and b/app/ml/__pycache__/__init__.cpython-312.pyc differ
diff --git a/app/ml/__pycache__/inference.cpython-312.pyc b/app/ml/__pycache__/inference.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8535952952cf54ac5693978e4dc8e932b1f958f1
Binary files /dev/null and b/app/ml/__pycache__/inference.cpython-312.pyc differ
diff --git a/app/ml/__pycache__/model_loader.cpython-312.pyc b/app/ml/__pycache__/model_loader.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6309b66c273218ef843c65590b1252aa2bedaa95
Binary files /dev/null and b/app/ml/__pycache__/model_loader.cpython-312.pyc differ
diff --git a/app/ml/__pycache__/preprocessing.cpython-312.pyc b/app/ml/__pycache__/preprocessing.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..71731237ece320d103a2a07ec88b57f601f6da40
Binary files /dev/null and b/app/ml/__pycache__/preprocessing.cpython-312.pyc differ
diff --git a/app/ml/inference.py b/app/ml/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..436e35e056e956e737d410198de713814016a448
--- /dev/null
+++ b/app/ml/inference.py
@@ -0,0 +1,235 @@
+"""
+Inference engine for voice classification.
+
+Handles model inference and result processing.
+"""
+
+import time
+from typing import TYPE_CHECKING
+
+import torch
+import torch.nn.functional as F
+
+from app.models.enums import Classification
+from app.utils.constants import ID_TO_LABEL
+from app.utils.exceptions import InferenceError
+from app.utils.logger import get_logger
+
+if TYPE_CHECKING:
+    from transformers import Wav2Vec2ForSequenceClassification
+    from transformers import Wav2Vec2Processor
+
+logger = get_logger(__name__)
+
+
+class InferenceEngine:
+    """
+    Inference engine for Wav2Vec2 voice classification.
+
+    Handles running model inference and converting outputs
+    to classification results.
+    """
+
+    def __init__(
+        self,
+        model: "Wav2Vec2ForSequenceClassification",
+        processor: "Wav2Vec2Processor",
+        device: str = "cpu",
+    ) -> None:
+        """
+        Initialize InferenceEngine.
+
+        Args:
+            model: Loaded Wav2Vec2ForSequenceClassification model
+            processor: Wav2Vec2Processor for preprocessing
+            device: Device to run inference on
+        """
+        self.model = model
+        self.processor = processor
+        self.device = device
+
+    def predict(
+        self,
+        input_tensors: dict[str, torch.Tensor],
+    ) -> tuple[Classification, float]:
+        """
+        Run inference and return classification result.
+
+        Args:
+            input_tensors: Preprocessed input tensors with input_values
+
+        Returns:
+            Tuple of (Classification, confidence_score)
+
+        Raises:
+            InferenceError: If inference fails
+        """
+        try:
+            start_time = time.perf_counter()
+
+            # Ensure model is in eval mode
+            self.model.eval()
+
+            # Run inference without gradient computation
+            with torch.no_grad():
+                outputs = self.model(**input_tensors)
+
+            # Get logits
+            logits = outputs.logits
+
+            # Apply softmax to get probabilities
+            probabilities = F.softmax(logits, dim=-1)
+
+            # Get predicted class and confidence
+            confidence, predicted_class = torch.max(probabilities, dim=-1)
+
+            # Convert to Python types
+            predicted_class_id = predicted_class.item()
+            confidence_score = confidence.item()
+
+            # Get label from model's config or fallback to our mapping
+            if hasattr(self.model.config, 'id2label') and self.model.config.id2label:
+                model_label = self.model.config.id2label.get(predicted_class_id, "HUMAN")
+                # Convert pretrained model labels to standard format
+                model_label_lower = str(model_label).lower()
+                
+                ai_keywords = ["fake", "spoof", "synthetic", "ai", "deepfake", "generated"]
+                is_ai = any(keyword in model_label_lower for keyword in ai_keywords)
+                
+                if is_ai:
+                    label = "AI_GENERATED"
+                else:
+                    label = "HUMAN"
+            else:
+                label = ID_TO_LABEL.get(predicted_class_id, "HUMAN")
+            
+            classification = Classification(label)
+
+            inference_time_ms = (time.perf_counter() - start_time) * 1000
+
+            logger.info(
+                "Inference complete",
+                classification=classification.value,
+                confidence=round(confidence_score, 4),
+                inference_time_ms=round(inference_time_ms, 2),
+            )
+
+            return classification, confidence_score
+
+        except Exception as e:
+            logger.error("Inference failed", error=str(e))
+            raise InferenceError(
+                f"Model inference failed: {e}",
+                details={"error": str(e)},
+            ) from e
+
+    def predict_with_probabilities(
+        self,
+        input_tensors: dict[str, torch.Tensor],
+    ) -> dict:
+        """
+        Run inference and return full probability distribution.
+
+        Args:
+            input_tensors: Preprocessed input tensors
+
+        Returns:
+            Dictionary with classification, confidence, and all probabilities
+        """
+        try:
+            self.model.eval()
+
+            with torch.no_grad():
+                outputs = self.model(**input_tensors)
+
+            logits = outputs.logits
+            probabilities = F.softmax(logits, dim=-1)
+
+            # Get all probabilities
+            probs = probabilities.squeeze().cpu().numpy()
+
+            # Get predicted class
+            confidence, predicted_class = torch.max(probabilities, dim=-1)
+            predicted_class_id = predicted_class.item()
+            label = ID_TO_LABEL.get(predicted_class_id, "HUMAN")
+
+            return {
+                "classification": Classification(label),
+                "confidence": float(confidence.item()),
+                "probabilities": {
+                    "HUMAN": float(probs[0]) if len(probs) > 0 else 0.0,
+                    "AI_GENERATED": float(probs[1]) if len(probs) > 1 else 0.0,
+                },
+            }
+
+        except Exception as e:
+            logger.error("Inference with probabilities failed", error=str(e))
+            raise InferenceError(
+                f"Model inference failed: {e}",
+                details={"error": str(e)},
+            ) from e
+
+    def batch_predict(
+        self,
+        input_tensors: dict[str, torch.Tensor],
+    ) -> list[tuple[Classification, float]]:
+        """
+        Run batch inference.
+
+        Args:
+            input_tensors: Batched preprocessed input tensors
+
+        Returns:
+            List of (Classification, confidence) tuples
+        """
+        try:
+            self.model.eval()
+
+            with torch.no_grad():
+                outputs = self.model(**input_tensors)
+
+            logits = outputs.logits
+            probabilities = F.softmax(logits, dim=-1)
+
+            results = []
+            for i in range(probabilities.shape[0]):
+                confidence, predicted_class = torch.max(probabilities[i], dim=-1)
+                predicted_class_id = predicted_class.item()
+                label = ID_TO_LABEL.get(predicted_class_id, "HUMAN")
+                results.append((Classification(label), float(confidence.item())))
+
+            return results
+
+        except Exception as e:
+            logger.error("Batch inference failed", error=str(e))
+            raise InferenceError(
+                f"Batch inference failed: {e}",
+                details={"error": str(e)},
+            ) from e
+
+    def get_hidden_states(
+        self,
+        input_tensors: dict[str, torch.Tensor],
+    ) -> torch.Tensor:
+        """
+        Extract hidden states for explainability.
+
+        Args:
+            input_tensors: Preprocessed input tensors
+
+        Returns:
+            Hidden state tensor from last layer
+        """
+        self.model.eval()
+
+        with torch.no_grad():
+            outputs = self.model(
+                **input_tensors,
+                output_hidden_states=True,
+            )
+
+        # Return last hidden state
+        if hasattr(outputs, "hidden_states") and outputs.hidden_states:
+            return outputs.hidden_states[-1]
+
+        return torch.tensor([])
diff --git a/app/ml/model_loader.py b/app/ml/model_loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d7a61b09920995281cee5c805502a3dc8fdebe5
--- /dev/null
+++ b/app/ml/model_loader.py
@@ -0,0 +1,246 @@
+"""
+Model loader for Wav2Vec2 voice classification model.
+
+Handles loading, caching, and management of the ML model.
+"""
+
+import gc
+import threading
+from typing import TYPE_CHECKING
+
+import torch
+from transformers import Wav2Vec2ForSequenceClassification
+from transformers import Wav2Vec2Processor
+
+from app.config import get_settings
+from app.utils.constants import ID_TO_LABEL
+from app.utils.constants import LABEL_TO_ID
+from app.utils.exceptions import ModelNotLoadedError
+from app.utils.logger import get_logger
+
+if TYPE_CHECKING:
+    from transformers import PreTrainedModel
+
+logger = get_logger(__name__)
+
+
+class ModelLoader:
+    """
+    Singleton model loader for Wav2Vec2 classification model.
+
+    Handles lazy loading, caching, and memory management of the ML model.
+    Thread-safe for production use.
+    """
+
+    _instance: "ModelLoader | None" = None
+    _lock: threading.Lock = threading.Lock()
+
+    def __new__(cls) -> "ModelLoader":
+        """Ensure only one instance exists (Singleton pattern)."""
+        if cls._instance is None:
+            with cls._lock:
+                if cls._instance is None:
+                    cls._instance = super().__new__(cls)
+                    cls._instance._initialized = False
+        return cls._instance
+
+    def __init__(self) -> None:
+        """Initialize ModelLoader if not already initialized."""
+        if getattr(self, "_initialized", False):
+            return
+
+        self.settings = get_settings()
+        self.model: Wav2Vec2ForSequenceClassification | None = None
+        self.processor: Wav2Vec2Processor | None = None
+        self.device: str = self.settings.torch_device
+        self._model_lock = threading.Lock()
+        self._initialized = True
+
+        logger.info(
+            "ModelLoader initialized",
+            device=self.device,
+            model_identifier=self.settings.model_identifier,
+        )
+
+    @property
+    def is_loaded(self) -> bool:
+        """Check if model is loaded and ready for inference."""
+        return self.model is not None and self.processor is not None
+
+    def load_model(self) -> None:
+        """
+        Load the Wav2Vec2 model and processor.
+
+        Thread-safe loading with proper error handling.
+
+        Raises:
+            Exception: If model loading fails
+        """
+        with self._model_lock:
+            if self.is_loaded:
+                logger.debug("Model already loaded, skipping")
+                return
+
+            model_identifier = self.settings.model_identifier
+
+            logger.info("Loading Wav2Vec2 model", model=model_identifier, device=self.device)
+
+            try:
+                # Load processor - try model first, fallback to base wav2vec2
+                try:
+                    self.processor = Wav2Vec2Processor.from_pretrained(
+                        model_identifier,
+                        trust_remote_code=False,
+                    )
+                except Exception:
+                    # Fine-tuned models often don't have processor, use base
+                    logger.info("Using base wav2vec2 processor")
+                    self.processor = Wav2Vec2Processor.from_pretrained(
+                        "facebook/wav2vec2-base",
+                        trust_remote_code=False,
+                    )
+
+                # Load model with classification head
+                # For pretrained deepfake models, use their existing configuration
+                self.model = Wav2Vec2ForSequenceClassification.from_pretrained(
+                    model_identifier,
+                    trust_remote_code=False,
+                    ignore_mismatched_sizes=True,  # Allow different classifier sizes
+                )
+
+                # Move model to device
+                self.model = self.model.to(self.device)
+
+                # Set to evaluation mode
+                self.model.eval()
+
+                # Log memory usage
+                if self.device.startswith("cuda"):
+                    memory_allocated = torch.cuda.memory_allocated() / (1024**3)
+                    logger.info(
+                        "Model loaded successfully",
+                        device=self.device,
+                        gpu_memory_gb=round(memory_allocated, 2),
+                    )
+                else:
+                    logger.info("Model loaded successfully", device=self.device)
+
+            except Exception as e:
+                self.model = None
+                self.processor = None
+                logger.error("Failed to load model", error=str(e))
+                raise
+
+    async def load_model_async(self) -> None:
+        """
+        Async wrapper for model loading.
+
+        Useful for FastAPI lifespan context.
+        """
+        # Run in thread pool to avoid blocking
+        import asyncio
+
+        loop = asyncio.get_event_loop()
+        await loop.run_in_executor(None, self.load_model)
+
+    def get_model(self) -> tuple[Wav2Vec2ForSequenceClassification, Wav2Vec2Processor]:
+        """
+        Get the loaded model and processor.
+
+        Returns:
+            Tuple of (model, processor)
+
+        Raises:
+            ModelNotLoadedError: If model is not loaded
+        """
+        if not self.is_loaded:
+            raise ModelNotLoadedError(
+                "Model not loaded. Call load_model() first.",
+                details={"model_identifier": self.settings.model_identifier},
+            )
+
+        return self.model, self.processor  # type: ignore
+
+    def unload_model(self) -> None:
+        """
+        Unload model and free memory.
+
+        Useful for memory management in constrained environments.
+        """
+        with self._model_lock:
+            if self.model is not None:
+                del self.model
+                self.model = None
+
+            if self.processor is not None:
+                del self.processor
+                self.processor = None
+
+            # Force garbage collection
+            gc.collect()
+
+            # Clear CUDA cache if using GPU
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+
+            logger.info("Model unloaded, memory freed")
+
+    def warmup(self) -> None:
+        """
+        Run a warmup inference to initialize CUDA kernels.
+
+        This reduces latency on the first real inference.
+        """
+        if not self.is_loaded:
+            logger.warning("Cannot warmup - model not loaded")
+            return
+
+        logger.info("Running model warmup...")
+
+        try:
+            # Create dummy input
+            dummy_audio = torch.randn(1, 16000)  # 1 second of audio
+
+            model, processor = self.get_model()
+
+            # Preprocess dummy audio
+            inputs = processor(
+                dummy_audio.squeeze().numpy(),
+                sampling_rate=16000,
+                return_tensors="pt",
+                padding=True,
+            )
+
+            inputs = {k: v.to(self.device) for k, v in inputs.items()}
+
+            # Run warmup inference
+            with torch.no_grad():
+                _ = model(**inputs)
+
+            logger.info("Model warmup complete")
+
+        except Exception as e:
+            logger.warning("Warmup failed (non-critical)", error=str(e))
+
+    def health_check(self) -> dict:
+        """
+        Get model health status.
+
+        Returns:
+            Dictionary with health information
+        """
+        status = {
+            "model_loaded": self.is_loaded,
+            "device": self.device,
+            "model_identifier": self.settings.model_identifier,
+        }
+
+        if self.device.startswith("cuda") and torch.cuda.is_available():
+            status["gpu_memory_allocated_gb"] = round(
+                torch.cuda.memory_allocated() / (1024**3), 2
+            )
+            status["gpu_memory_reserved_gb"] = round(
+                torch.cuda.memory_reserved() / (1024**3), 2
+            )
+
+        return status
diff --git a/app/ml/preprocessing.py b/app/ml/preprocessing.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ffa789cf8cf4e8cb380a79d098ffde12f06f766
--- /dev/null
+++ b/app/ml/preprocessing.py
@@ -0,0 +1,155 @@
+"""
+Audio preprocessing for Wav2Vec2 model.
+
+Handles conversion from audio arrays to model input tensors.
+"""
+
+import numpy as np
+import torch
+from transformers import Wav2Vec2Processor
+
+from app.utils.constants import TARGET_SAMPLE_RATE
+from app.utils.logger import get_logger
+
+logger = get_logger(__name__)
+
+
+class AudioPreprocessor:
+    """
+    Preprocessor for preparing audio data for Wav2Vec2 model.
+
+    Converts numpy audio arrays into the tensor format expected
+    by the Wav2Vec2ForSequenceClassification model.
+    """
+
+    def __init__(
+        self,
+        processor: Wav2Vec2Processor,
+        device: str = "cpu",
+    ) -> None:
+        """
+        Initialize AudioPreprocessor.
+
+        Args:
+            processor: Wav2Vec2Processor instance
+            device: Target device for tensors (cpu/cuda)
+        """
+        self.processor = processor
+        self.device = device
+        self.sample_rate = TARGET_SAMPLE_RATE
+
+    def validate_input(self, audio_array: np.ndarray) -> bool:
+        """
+        Validate audio array for processing.
+
+        Args:
+            audio_array: Input audio array
+
+        Returns:
+            True if valid
+
+        Raises:
+            ValueError: If validation fails
+        """
+        if not isinstance(audio_array, np.ndarray):
+            raise ValueError(f"Expected numpy array, got {type(audio_array)}")
+
+        if audio_array.ndim != 1:
+            raise ValueError(f"Expected 1D array, got {audio_array.ndim}D")
+
+        if len(audio_array) == 0:
+            raise ValueError("Audio array is empty")
+
+        if np.isnan(audio_array).any():
+            raise ValueError("Audio array contains NaN values")
+
+        if np.isinf(audio_array).any():
+            raise ValueError("Audio array contains infinite values")
+
+        return True
+
+    def preprocess(
+        self,
+        audio_array: np.ndarray,
+        return_attention_mask: bool = True,
+    ) -> dict[str, torch.Tensor]:
+        """
+        Preprocess audio array for model inference.
+
+        Args:
+            audio_array: 1D numpy array of audio samples (16kHz, normalized)
+            return_attention_mask: Whether to return attention mask
+
+        Returns:
+            Dictionary with input_values and optionally attention_mask
+        """
+        # Validate input
+        self.validate_input(audio_array)
+
+        # Ensure float32
+        audio_array = audio_array.astype(np.float32)
+
+        # Process through Wav2Vec2Processor
+        inputs = self.processor(
+            audio_array,
+            sampling_rate=self.sample_rate,
+            return_tensors="pt",
+            padding=True,
+            return_attention_mask=return_attention_mask,
+        )
+
+        # Move to target device
+        inputs = {key: value.to(self.device) for key, value in inputs.items()}
+
+        logger.debug(
+            "Audio preprocessed for model",
+            input_length=inputs["input_values"].shape[-1],
+            device=self.device,
+        )
+
+        return inputs
+
+    def preprocess_batch(
+        self,
+        audio_arrays: list[np.ndarray],
+        return_attention_mask: bool = True,
+    ) -> dict[str, torch.Tensor]:
+        """
+        Preprocess a batch of audio arrays.
+
+        Args:
+            audio_arrays: List of 1D numpy arrays
+            return_attention_mask: Whether to return attention mask
+
+        Returns:
+            Dictionary with batched input_values and optionally attention_mask
+        """
+        # Validate all inputs
+        for i, audio in enumerate(audio_arrays):
+            try:
+                self.validate_input(audio)
+            except ValueError as e:
+                raise ValueError(f"Invalid audio at index {i}: {e}") from e
+
+        # Ensure float32
+        audio_arrays = [audio.astype(np.float32) for audio in audio_arrays]
+
+        # Process batch through Wav2Vec2Processor
+        inputs = self.processor(
+            audio_arrays,
+            sampling_rate=self.sample_rate,
+            return_tensors="pt",
+            padding=True,
+            return_attention_mask=return_attention_mask,
+        )
+
+        # Move to target device
+        inputs = {key: value.to(self.device) for key, value in inputs.items()}
+
+        logger.debug(
+            "Batch preprocessed for model",
+            batch_size=len(audio_arrays),
+            device=self.device,
+        )
+
+        return inputs
diff --git a/app/models/__init__.py b/app/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f21c718d6e929015d81e72d31b0a982c87ed49ed
--- /dev/null
+++ b/app/models/__init__.py
@@ -0,0 +1,19 @@
+"""Pydantic models package."""
+
+from app.models.enums import AudioFormat
+from app.models.enums import Classification
+from app.models.enums import SupportedLanguage
+from app.models.request import VoiceDetectionRequest
+from app.models.response import ErrorResponse
+from app.models.response import HealthResponse
+from app.models.response import VoiceDetectionResponse
+
+__all__ = [
+    "SupportedLanguage",
+    "Classification",
+    "AudioFormat",
+    "VoiceDetectionRequest",
+    "VoiceDetectionResponse",
+    "ErrorResponse",
+    "HealthResponse",
+]
diff --git a/app/models/__pycache__/__init__.cpython-312.pyc b/app/models/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..49d378087e2dfa862b80329e089dd3ed0362a278
Binary files /dev/null and b/app/models/__pycache__/__init__.cpython-312.pyc differ
diff --git a/app/models/__pycache__/enums.cpython-312.pyc b/app/models/__pycache__/enums.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..aaf794afbf347f652ad61deb08843dc8f3ab6d93
Binary files /dev/null and b/app/models/__pycache__/enums.cpython-312.pyc differ
diff --git a/app/models/__pycache__/request.cpython-312.pyc b/app/models/__pycache__/request.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5fa45a22bcc25b374a98a7b7cc0d7a8c0c3f1c8e
Binary files /dev/null and b/app/models/__pycache__/request.cpython-312.pyc differ
diff --git a/app/models/__pycache__/response.cpython-312.pyc b/app/models/__pycache__/response.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a558878160ca3d94e85cf906f4afb910573e0ffb
Binary files /dev/null and b/app/models/__pycache__/response.cpython-312.pyc differ
diff --git a/app/models/enums.py b/app/models/enums.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc9c72168f5dfe8b6550daa9139a58c3c8730228
--- /dev/null
+++ b/app/models/enums.py
@@ -0,0 +1,57 @@
+"""
+Enumeration types for VoiceAuth API.
+
+Defines supported languages, classification results, and audio formats.
+"""
+
+from enum import Enum
+
+
+class SupportedLanguage(str, Enum):
+    """
+    Supported languages for voice detection.
+
+    The API supports these 5 Indian languages for AI voice detection.
+    """
+
+    TAMIL = "Tamil"
+    ENGLISH = "English"
+    HINDI = "Hindi"
+    MALAYALAM = "Malayalam"
+    TELUGU = "Telugu"
+
+    @classmethod
+    def values(cls) -> list[str]:
+        """Get all language values as a list."""
+        return [lang.value for lang in cls]
+
+
+class Classification(str, Enum):
+    """
+    Voice classification result.
+
+    Indicates whether the detected voice is AI-generated or human.
+    """
+
+    AI_GENERATED = "AI_GENERATED"
+    HUMAN = "HUMAN"
+
+    @property
+    def is_synthetic(self) -> bool:
+        """Check if classification indicates synthetic voice."""
+        return self == Classification.AI_GENERATED
+
+
+class AudioFormat(str, Enum):
+    """
+    Supported audio input formats.
+
+    Currently only MP3 is supported as per competition requirements.
+    """
+
+    MP3 = "mp3"
+
+    @classmethod
+    def values(cls) -> list[str]:
+        """Get all format values as a list."""
+        return [fmt.value for fmt in cls]
diff --git a/app/models/request.py b/app/models/request.py
new file mode 100644
index 0000000000000000000000000000000000000000..5671ee2b27034b948122ccda5810b3fa76e87af7
--- /dev/null
+++ b/app/models/request.py
@@ -0,0 +1,99 @@
+"""
+Request models for VoiceAuth API.
+
+Defines Pydantic models for API request validation.
+"""
+
+import base64
+import re
+from typing import Annotated
+
+from pydantic import BaseModel
+from pydantic import ConfigDict
+from pydantic import Field
+from pydantic import field_validator
+
+from app.models.enums import AudioFormat
+from app.models.enums import SupportedLanguage
+
+
+class VoiceDetectionRequest(BaseModel):
+    """
+    Request model for voice detection endpoint.
+
+    Accepts Base64-encoded MP3 audio in one of 5 supported languages.
+    """
+
+    model_config = ConfigDict(
+        json_schema_extra={
+            "example": {
+                "language": "Tamil",
+                "audioFormat": "mp3",
+                "audioBase64": "SUQzBAAAAAAAI1RTU0UAAAAPAAADTGF2ZjU2LjM2LjEwMAAAAAAA...",
+            }
+        }
+    )
+
+    language: Annotated[
+        SupportedLanguage,
+        Field(
+            description="Language of the audio content. Must be one of: Tamil, English, Hindi, Malayalam, Telugu"
+        ),
+    ]
+
+    audioFormat: Annotated[
+        AudioFormat,
+        Field(
+            default=AudioFormat.MP3,
+            description="Format of the audio file. Currently only 'mp3' is supported",
+        ),
+    ] = AudioFormat.MP3
+
+    audioBase64: Annotated[
+        str,
+        Field(
+            min_length=100,
+            description="Base64-encoded MP3 audio data. Minimum 100 characters for valid audio",
+        ),
+    ]
+
+    @field_validator("audioBase64")
+    @classmethod
+    def validate_base64(cls, v: str) -> str:
+        """
+        Validate that the string is valid Base64.
+
+        Args:
+            v: The base64 string to validate
+
+        Returns:
+            The validated base64 string
+
+        Raises:
+            ValueError: If the string is not valid base64
+        """
+        # Remove any whitespace
+        v = v.strip()
+
+        # Check for valid base64 characters
+        base64_pattern = re.compile(r"^[A-Za-z0-9+/]*={0,2}$")
+        if not base64_pattern.match(v):
+            raise ValueError("Invalid Base64 encoding: contains invalid characters")
+
+        # Try to decode to verify it's valid base64
+        try:
+            # Add padding if needed
+            padding = 4 - len(v) % 4
+            if padding != 4:
+                v += "=" * padding
+
+            decoded = base64.b64decode(v)
+            if len(decoded) < 100:
+                raise ValueError("Decoded audio data is too small to be a valid MP3 file")
+
+        except Exception as e:
+            if "Invalid Base64" in str(e) or "too small" in str(e):
+                raise
+            raise ValueError(f"Invalid Base64 encoding: {e}") from e
+
+        return v.rstrip("=") + "=" * (4 - len(v.rstrip("=")) % 4) if len(v.rstrip("=")) % 4 else v
diff --git a/app/models/response.py b/app/models/response.py
new file mode 100644
index 0000000000000000000000000000000000000000..16be2aaf9d183b3bbe62c0784abd877d14589413
--- /dev/null
+++ b/app/models/response.py
@@ -0,0 +1,264 @@
+"""
+Response models for VoiceAuth API.
+
+Defines Pydantic models for API responses.
+
+PHASE 1 ENHANCED: Includes Risk Score, Quality Score, Temporal Analysis.
+"""
+
+from typing import Annotated
+from typing import Any
+from typing import Literal
+from typing import Optional
+
+from pydantic import BaseModel
+from pydantic import ConfigDict
+from pydantic import Field
+
+from app.models.enums import Classification
+
+
+class VoiceDetectionResponse(BaseModel):
+    """
+    Successful voice detection response.
+
+    Contains classification result, confidence score, explanation,
+    and comprehensive analysis data.
+    
+    PHASE 1 FEATURES:
+    - deepfakeRiskScore: Business-friendly risk rating
+    - audioQuality: Input quality assessment
+    - temporalAnalysis: Breathing, pauses, rhythm analysis
+    - audioForensics: Spectral and energy analysis
+    - performanceMetrics: Processing time breakdown
+    """
+
+    model_config = ConfigDict(
+        json_schema_extra={
+            "example": {
+                "status": "success",
+                "language": "Tamil",
+                "classification": "AI_GENERATED",
+                "confidenceScore": 0.91,
+                "explanation": "Strong evidence of AI-generated speech: absence of natural breathing sounds and mechanically consistent pause patterns detected",
+                "deepfakeRiskScore": {
+                    "score": 87,
+                    "level": "HIGH",
+                    "recommendation": "Manual review required before approval",
+                },
+                "audioQuality": {
+                    "score": 85,
+                    "rating": "GOOD",
+                    "reliability": "High confidence in detection results",
+                },
+                "temporalAnalysis": {
+                    "breathingDetected": False,
+                    "breathingNaturalness": 0.0,
+                    "pauseMechanicalScore": 0.78,
+                    "rhythmConsistency": 0.85,
+                    "anomalyScore": 0.72,
+                    "verdict": "HIGH_ANOMALY",
+                },
+                "audioForensics": {
+                    "spectralCentroid": 1523.45,
+                    "pitchStability": 0.89,
+                    "jitter": 0.0021,
+                    "energyConsistency": 0.92,
+                    "silenceRatio": 0.08,
+                    "aiLikelihood": 0.76,
+                },
+                "performanceMetrics": {
+                    "audioProcessingMs": 45.23,
+                    "forensicsAnalysisMs": 12.87,
+                    "temporalAnalysisMs": 8.45,
+                    "modelInferenceMs": 127.45,
+                    "totalProcessingMs": 193.00,
+                },
+            }
+        }
+    )
+
+    status: Annotated[
+        Literal["success"],
+        Field(description="Response status, always 'success' for successful detections"),
+    ] = "success"
+
+    language: Annotated[
+        str,
+        Field(description="Language of the analyzed audio"),
+    ]
+
+    classification: Annotated[
+        Classification,
+        Field(description="Classification result: AI_GENERATED or HUMAN"),
+    ]
+
+    confidenceScore: Annotated[
+        float,
+        Field(
+            ge=0.0,
+            le=1.0,
+            description="Calibrated confidence score between 0.0 and 1.0",
+        ),
+    ]
+
+    explanation: Annotated[
+        str,
+        Field(
+            max_length=250,
+            description="Human-readable explanation based on comprehensive analysis",
+        ),
+    ]
+
+    # NEW: Deepfake Risk Score
+    deepfakeRiskScore: Annotated[
+        Optional[dict[str, Any]],
+        Field(
+            default=None,
+            description="Business-friendly risk score (0-100) with level and recommendation",
+        ),
+    ] = None
+
+    # NEW: Audio Quality Score
+    audioQuality: Annotated[
+        Optional[dict[str, Any]],
+        Field(
+            default=None,
+            description="Input audio quality assessment affecting detection reliability",
+        ),
+    ] = None
+
+    # NEW: Temporal Analysis
+    temporalAnalysis: Annotated[
+        Optional[dict[str, Any]],
+        Field(
+            default=None,
+            description="Temporal anomaly analysis (breathing, pauses, rhythm)",
+        ),
+    ] = None
+
+    # Audio Forensics
+    audioForensics: Annotated[
+        Optional[dict[str, float]],
+        Field(
+            default=None,
+            description="Detailed audio forensics analysis metrics",
+        ),
+    ] = None
+
+    # Performance Metrics
+    performanceMetrics: Annotated[
+        Optional[dict[str, float]],
+        Field(
+            default=None,
+            description="Performance timing breakdown",
+        ),
+    ] = None
+
+
+class ErrorResponse(BaseModel):
+    """
+    Error response model.
+
+    Returned when the API encounters an error.
+    """
+
+    model_config = ConfigDict(
+        json_schema_extra={
+            "example": {
+                "status": "error",
+                "message": "Invalid API key or malformed request",
+            }
+        }
+    )
+
+    status: Annotated[
+        Literal["error"],
+        Field(description="Response status, always 'error' for error responses"),
+    ] = "error"
+
+    message: Annotated[
+        str,
+        Field(description="Human-readable error message"),
+    ]
+
+    details: Annotated[
+        Optional[dict[str, Any]],
+        Field(default=None, description="Additional error details if available"),
+    ] = None
+
+
+class HealthResponse(BaseModel):
+    """
+    Health check response model.
+
+    Returned by health check endpoints.
+    """
+
+    model_config = ConfigDict(
+        json_schema_extra={
+            "example": {
+                "status": "healthy",
+                "version": "1.0.0",
+                "model_loaded": True,
+                "model_name": "facebook/wav2vec2-base",
+                "device": "cuda",
+                "supported_languages": ["Tamil", "English", "Hindi", "Malayalam", "Telugu"],
+                "features": [
+                    "audio_forensics",
+                    "temporal_anomaly_detection",
+                    "deepfake_risk_score",
+                    "audio_quality_score",
+                ],
+            }
+        }
+    )
+
+    status: Annotated[
+        str,
+        Field(description="Health status: 'healthy' or 'unhealthy'"),
+    ]
+
+    version: Annotated[
+        str,
+        Field(description="API version"),
+    ]
+
+    model_loaded: Annotated[
+        bool,
+        Field(description="Whether the ML model is loaded and ready"),
+    ]
+
+    model_name: Annotated[
+        Optional[str],
+        Field(default=None, description="Name of the loaded model"),
+    ] = None
+
+    device: Annotated[
+        Optional[str],
+        Field(default=None, description="Device used for inference (cpu/cuda)"),
+    ] = None
+
+    supported_languages: Annotated[
+        list[str],
+        Field(description="List of supported languages"),
+    ]
+
+    features: Annotated[
+        Optional[list[str]],
+        Field(default=None, description="List of enabled features"),
+    ] = None
+
+
+class LanguagesResponse(BaseModel):
+    """Response model for supported languages endpoint."""
+
+    languages: Annotated[
+        list[str],
+        Field(description="List of supported language names"),
+    ]
+
+    count: Annotated[
+        int,
+        Field(description="Number of supported languages"),
+    ]
diff --git a/app/services/__init__.py b/app/services/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0b30e2c7b34a69d91d7890181771a6f3d0068ec7
--- /dev/null
+++ b/app/services/__init__.py
@@ -0,0 +1,19 @@
+"""Services package."""
+
+from app.services.audio_forensics import AudioForensicsAnalyzer
+from app.services.audio_processor import AudioProcessor
+from app.services.explainability import ExplainabilityService
+from app.services.score_calculators import AudioQualityScorer
+from app.services.score_calculators import RiskScoreCalculator
+from app.services.temporal_detector import TemporalAnomalyDetector
+from app.services.voice_detector import VoiceDetector
+
+__all__ = [
+    "AudioProcessor",
+    "AudioForensicsAnalyzer",
+    "ExplainabilityService",
+    "TemporalAnomalyDetector",
+    "RiskScoreCalculator",
+    "AudioQualityScorer",
+    "VoiceDetector",
+]
diff --git a/app/services/__pycache__/__init__.cpython-312.pyc b/app/services/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4a4fec60a15e85d68cbc851972c0536cb3d253fb
Binary files /dev/null and b/app/services/__pycache__/__init__.cpython-312.pyc differ
diff --git a/app/services/__pycache__/audio_forensics.cpython-312.pyc b/app/services/__pycache__/audio_forensics.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..413c8f566870546cf150450611c93eada43114fa
Binary files /dev/null and b/app/services/__pycache__/audio_forensics.cpython-312.pyc differ
diff --git a/app/services/__pycache__/audio_processor.cpython-312.pyc b/app/services/__pycache__/audio_processor.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cb0d902db9b46df5a8edf1e896d03b49ae9a9a5f
Binary files /dev/null and b/app/services/__pycache__/audio_processor.cpython-312.pyc differ
diff --git a/app/services/__pycache__/explainability.cpython-312.pyc b/app/services/__pycache__/explainability.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5612821bdf6d273053d81aa9dca6c65251ca069c
Binary files /dev/null and b/app/services/__pycache__/explainability.cpython-312.pyc differ
diff --git a/app/services/__pycache__/federated_learning.cpython-312.pyc b/app/services/__pycache__/federated_learning.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b27de73bf0cc8ad59f0fbea18dbcdc36ad8df821
Binary files /dev/null and b/app/services/__pycache__/federated_learning.cpython-312.pyc differ
diff --git a/app/services/__pycache__/score_calculators.cpython-312.pyc b/app/services/__pycache__/score_calculators.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c3b8a1597aad9662130529c105f3152981401129
Binary files /dev/null and b/app/services/__pycache__/score_calculators.cpython-312.pyc differ
diff --git a/app/services/__pycache__/temporal_detector.cpython-312.pyc b/app/services/__pycache__/temporal_detector.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5177c466e1e897b1811c9ec094a965bd149f1547
Binary files /dev/null and b/app/services/__pycache__/temporal_detector.cpython-312.pyc differ
diff --git a/app/services/__pycache__/voice_detector.cpython-312.pyc b/app/services/__pycache__/voice_detector.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f0277dfed810adb0ece2bb5391ac4b2e040ae8b2
Binary files /dev/null and b/app/services/__pycache__/voice_detector.cpython-312.pyc differ
diff --git a/app/services/audio_forensics.py b/app/services/audio_forensics.py
new file mode 100644
index 0000000000000000000000000000000000000000..d2fd3f7ab6c6ba87df7b79e4f47d23874c7b1eb0
--- /dev/null
+++ b/app/services/audio_forensics.py
@@ -0,0 +1,318 @@
+"""
+Audio Forensics Analyzer for deepfake detection.
+
+Extracts low-level audio features that help distinguish
+AI-generated speech from human speech.
+"""
+
+import numpy as np
+from scipy import signal
+from scipy.fft import fft
+
+from app.utils.logger import get_logger
+
+logger = get_logger(__name__)
+
+
+class AudioForensicsAnalyzer:
+    """
+    Forensic analysis of audio to detect AI-generated patterns.
+    
+    analyzes spectral characteristics, pitch stability, and silence patterns
+    to identify artifacts typical of neural vocoders.
+    """
+
+    def __init__(self, sample_rate: int = 16000):
+        """Initialize analyzer with sample rate."""
+        self.sample_rate = sample_rate
+
+    def analyze(self, audio_array: np.ndarray) -> dict:
+        """
+        Perform comprehensive forensic analysis on audio.
+
+        Args:
+            audio_array: Normalized audio samples (16kHz, mono)
+
+        Returns:
+            Dictionary with forensic metrics and AI likelihood indicators
+        """
+        logger.debug("Starting forensic audio analysis")
+
+        # Compute all forensic features
+        spectral = self._analyze_spectral(audio_array)
+        temporal = self._analyze_temporal(audio_array)
+        pitch = self._analyze_pitch_stability(audio_array)
+        energy = self._analyze_energy_patterns(audio_array)
+
+        # Combine into forensic report
+        forensics = {
+            "spectral": spectral,
+            "temporal": temporal,
+            "pitch": pitch,
+            "energy": energy,
+            "ai_indicators": self._compute_ai_indicators(spectral, temporal, pitch, energy),
+        }
+
+        logger.debug("Forensic analysis complete", indicators=forensics["ai_indicators"])
+        return forensics
+
+    def _analyze_spectral(self, audio: np.ndarray) -> dict:
+        """Analyze spectral characteristics."""
+        # Compute FFT
+        n = len(audio)
+        fft_vals = np.abs(fft(audio))[:n // 2]
+        freqs = np.fft.fftfreq(n, 1 / self.sample_rate)[:n // 2]
+
+        # Spectral centroid (center of mass of spectrum)
+        spectral_centroid = np.sum(freqs * fft_vals) / (np.sum(fft_vals) + 1e-10)
+
+        # Spectral flatness (measure of noise-like vs tonal)
+        # AI often has higher flatness in certain bands
+        geometric_mean = np.exp(np.mean(np.log(fft_vals + 1e-10)))
+        arithmetic_mean = np.mean(fft_vals) + 1e-10
+        spectral_flatness = geometric_mean / arithmetic_mean
+
+        # Spectral rolloff (frequency below which 85% of energy is contained)
+        cumsum = np.cumsum(fft_vals)
+        rolloff_idx = np.searchsorted(cumsum, 0.85 * cumsum[-1])
+        spectral_rolloff = freqs[min(rolloff_idx, len(freqs) - 1)]
+
+        # Spectral bandwidth
+        spectral_bandwidth = np.sqrt(
+            np.sum(((freqs - spectral_centroid) ** 2) * fft_vals) / (np.sum(fft_vals) + 1e-10)
+        )
+
+        return {
+            "centroid_hz": round(float(spectral_centroid), 2),
+            "flatness": round(float(spectral_flatness), 4),
+            "rolloff_hz": round(float(spectral_rolloff), 2),
+            "bandwidth_hz": round(float(spectral_bandwidth), 2),
+        }
+
+    def _analyze_temporal(self, audio: np.ndarray) -> dict:
+        """Analyze temporal characteristics."""
+        # Zero crossing rate (how often signal crosses zero)
+        zero_crossings = np.sum(np.abs(np.diff(np.sign(audio)))) / 2
+        zcr = zero_crossings / len(audio)
+
+        # RMS energy
+        rms = np.sqrt(np.mean(audio ** 2))
+
+        # Compute short-time energy variance (humans have more variation)
+        frame_size = int(0.025 * self.sample_rate)  # 25ms frames
+        hop_size = int(0.010 * self.sample_rate)    # 10ms hop
+
+        energies = []
+        for i in range(0, len(audio) - frame_size, hop_size):
+            frame = audio[i:i + frame_size]
+            energies.append(np.sum(frame ** 2))
+
+        energy_variance = np.var(energies) if energies else 0
+
+        # Silence ratio (AI often has different silence patterns)
+        silence_threshold = 0.01 * np.max(np.abs(audio))
+        silence_samples = np.sum(np.abs(audio) < silence_threshold)
+        silence_ratio = silence_samples / len(audio)
+
+        return {
+            "zero_crossing_rate": round(float(zcr), 6),
+            "rms_energy": round(float(rms), 6),
+            "energy_variance": round(float(energy_variance), 8),
+            "silence_ratio": round(float(silence_ratio), 4),
+        }
+
+    def _analyze_pitch_stability(self, audio: np.ndarray) -> dict:
+        """
+        Analyze pitch stability.
+        
+        AI-generated speech often has unnaturally stable pitch.
+        Humans have natural pitch variations (jitter).
+        """
+        # Use autocorrelation for pitch estimation
+        frame_size = int(0.030 * self.sample_rate)  # 30ms frames
+        hop_size = int(0.010 * self.sample_rate)    # 10ms hop
+
+        pitches = []
+        for i in range(0, len(audio) - frame_size, hop_size):
+            frame = audio[i:i + frame_size]
+            
+            # Autocorrelation
+            corr = np.correlate(frame, frame, mode='full')
+            corr = corr[len(corr) // 2:]
+            
+            # Find first peak after initial decay
+            d = np.diff(corr)
+            start = np.where(d > 0)[0]
+            
+            if len(start) > 0:
+                start = start[0]
+                peak = np.argmax(corr[start:]) + start
+                if peak > 0 and corr[peak] > 0.3 * corr[0]:
+                    pitch = self.sample_rate / peak
+                    if 50 < pitch < 500:  # Human voice range
+                        pitches.append(pitch)
+
+        if len(pitches) < 2:
+            return {
+                "mean_pitch_hz": 0,
+                "pitch_std": 0,
+                "pitch_stability": 1.0,  # Unknown = assume stable
+                "jitter": 0,
+            }
+
+        pitches = np.array(pitches)
+        mean_pitch = np.mean(pitches)
+        pitch_std = np.std(pitches)
+
+        # Pitch stability (inverse of variation) - high = AI-like
+        pitch_stability = 1.0 / (1.0 + pitch_std / (mean_pitch + 1e-10))
+
+        # Jitter (frame-to-frame pitch variation) - low = AI-like
+        jitter = np.mean(np.abs(np.diff(pitches))) / (mean_pitch + 1e-10)
+
+        return {
+            "mean_pitch_hz": round(float(mean_pitch), 2),
+            "pitch_std": round(float(pitch_std), 4),
+            "pitch_stability": round(float(pitch_stability), 4),
+            "jitter": round(float(jitter), 6),
+        }
+
+    def _analyze_energy_patterns(self, audio: np.ndarray) -> dict:
+        """Analyze energy envelope patterns."""
+        # Compute envelope using Hilbert transform
+        analytic_signal = signal.hilbert(audio)
+        envelope = np.abs(analytic_signal)
+
+        # Envelope smoothness (AI is often smoother)
+        envelope_diff = np.abs(np.diff(envelope))
+        envelope_roughness = np.mean(envelope_diff)
+
+        # Attack/decay characteristics
+        # Find amplitude peaks
+        peaks, _ = signal.find_peaks(envelope, height=0.1 * np.max(envelope))
+
+        if len(peaks) > 1:
+            # Measure consistency of peaks (AI is more consistent)
+            peak_heights = envelope[peaks]
+            peak_consistency = 1.0 - (np.std(peak_heights) / (np.mean(peak_heights) + 1e-10))
+        else:
+            peak_consistency = 0.5
+
+        return {
+            "envelope_roughness": round(float(envelope_roughness), 6),
+            "peak_consistency": round(float(peak_consistency), 4),
+            "dynamic_range": round(float(np.max(envelope) - np.min(envelope)), 4),
+        }
+
+    def _compute_ai_indicators(
+        self,
+        spectral: dict,
+        temporal: dict,
+        pitch: dict,
+        energy: dict,
+    ) -> dict:
+        """
+        Compute features indicating AI generation (Tuned for modern TTS).
+        
+        Modern AI (ElevenLabs etc) adds simulated breaths and jitter, so we must
+        be more sensitive to 'slightly too perfect' signals.
+        """
+        indicators = {}
+
+        # 1. Pitch Consistency
+        # AI pitch tracks are smoother than human vocal cords even with simulated emotion
+        pitch_stability = pitch.get("pitch_stability", 0.5)
+        # RELAXED: Needs very high stability (>0.75) to be suspicious
+        # This prevents high-quality human voice from flagging
+        indicators["pitch_regularity"] = min(1.0, pitch_stability / 0.75) 
+
+        # 2. Jitter (Micro-fluctuations)
+        # Real voices have chaotic micro-tremors. AI simulates them but often perfectly.
+        jitter = pitch.get("jitter", 0.02)
+        # RELAXED: Only extremely low jitter (<0.025) is suspicious
+        indicators["low_jitter"] = max(0.0, 1.0 - (jitter / 0.025))
+
+        # 3. Energy/Envelope Smoothness
+        # Neural vocoders produce smoother envelopes than air pressure from lungs
+        roughness = energy.get("envelope_roughness", 0.01)
+        # RELAXED: < 0.03 is suspicious
+        indicators["smooth_envelope"] = max(0.0, 1.0 - (roughness / 0.03))
+
+        # 4. Silence/Noise Floor
+        # Check if silence is "too digital" (low variance in zero crossing)
+        zcr = temporal.get("zero_crossing_rate", 0.1)
+        # RELAXED: Only mathematical silence (<0.01) is suspicious
+        indicators["unnatural_silence"] = 1.0 if zcr < 0.01 else 0.0
+
+        # 5. Energy Consistency (Peaks)
+        peak_consistency = energy.get("peak_consistency", 0.5)
+        indicators["energy_consistency"] = peak_consistency if peak_consistency > 0.8 else 0.0
+
+        # --- Aggressive Scoring for Robustness ---
+        # We assume if ANY strong indicator is present, chance of AI is high.
+        
+        scores = [
+            indicators["pitch_regularity"] * 1.2, # Weight pitch highest
+            indicators["low_jitter"] * 1.0,
+            indicators["smooth_envelope"] * 0.8,
+            indicators["unnatural_silence"] * 0.5,
+            indicators["energy_consistency"] * 0.6
+        ]
+        
+        # Take the MAXIMUM strong signal, not just average
+        # This catches cases where one feature is a "dead giveaway"
+        strongest_signal = max(scores)
+        average_signal = sum(scores) / len(scores)
+        
+        # Combined score is dominated by strongest signal
+        combined_likelihood = (strongest_signal * 0.7) + (average_signal * 0.3)
+        
+        indicators["combined_ai_likelihood"] = min(1.0, combined_likelihood)
+
+        return indicators
+
+    def get_explanation_factors(self, forensics: dict, classification: str = None) -> list[str]:
+        """
+        Get human-readable factors that contributed to detection.
+        
+        Args:
+            forensics: Forensics analysis data
+            classification: The final classification (AI_GENERATED or HUMAN)
+        
+        Returns list of detected indicators in plain English.
+        """
+        factors = []
+        indicators = forensics.get("ai_indicators", {})
+        ai_likelihood = indicators.get("combined_ai_likelihood", 0.5)
+
+        # If classified as AI, always show AI indicators
+        if classification == "AI_GENERATED":
+            # Show AI indicators based on what we found
+            if indicators.get("pitch_regularity", 0) > 0.4:
+                factors.append("unnaturally consistent pitch patterns")
+            if indicators.get("low_jitter", 0) > 0.4:
+                factors.append("absence of natural voice micro-variations")
+            if indicators.get("energy_consistency", 0) > 0.4:
+                factors.append("mechanical energy envelope patterns")
+            if indicators.get("smooth_envelope", 0) > 0.4:
+                factors.append("artificially smooth amplitude transitions")
+            if indicators.get("unnatural_silence", 0) > 0.3:
+                factors.append("irregular silence patterns")
+            
+            # If no strong indicators but still AI, give generic AI reason
+            if not factors:
+                factors.append("subtle synthetic audio artifacts")
+                
+        else:  # HUMAN classification
+            if forensics["pitch"]["jitter"] > 0.015:
+                factors.append("natural pitch variations")
+            if forensics["energy"]["envelope_roughness"] > 0.015:
+                factors.append("organic voice texture")
+            if 0.05 < forensics["temporal"]["silence_ratio"] < 0.25:
+                factors.append("natural breathing patterns")
+            
+            if not factors:
+                factors.append("natural human voice characteristics")
+
+        return factors if factors else ["voice characteristics analyzed"]
+
diff --git a/app/services/audio_processor.py b/app/services/audio_processor.py
new file mode 100644
index 0000000000000000000000000000000000000000..d317ae26396e65ea513fa8e4dd6b27bc3f28e571
--- /dev/null
+++ b/app/services/audio_processor.py
@@ -0,0 +1,301 @@
+"""
+Audio processing service for VoiceAuth API.
+
+Handles Base64 decoding, format conversion, and audio preprocessing.
+"""
+
+import base64
+import io
+from typing import TYPE_CHECKING
+
+import numpy as np
+from pydub import AudioSegment
+
+from app.config import get_settings
+from app.utils.constants import MP3_MAGIC_BYTES
+from app.utils.constants import TARGET_SAMPLE_RATE
+from app.utils.exceptions import AudioDecodeError
+from app.utils.exceptions import AudioDurationError
+from app.utils.exceptions import AudioFormatError
+from app.utils.exceptions import AudioProcessingError
+from app.utils.logger import get_logger
+
+if TYPE_CHECKING:
+    import torch
+
+logger = get_logger(__name__)
+
+
+class AudioProcessor:
+    """
+    Audio processing service for preparing audio for ML inference.
+
+    Handles the complete pipeline from Base64-encoded MP3 to
+    normalized numpy arrays suitable for Wav2Vec2.
+    """
+
+    def __init__(self) -> None:
+        """Initialize AudioProcessor with settings."""
+        self.settings = get_settings()
+        self.target_sample_rate = TARGET_SAMPLE_RATE
+
+    def decode_base64_audio(self, base64_string: str) -> bytes:
+        """
+        Decode Base64 string to raw audio bytes.
+
+        Args:
+            base64_string: Base64-encoded audio data
+
+        Returns:
+            Raw audio bytes
+
+        Raises:
+            AudioDecodeError: If decoding fails
+        """
+        try:
+            # Handle potential padding issues
+            base64_string = base64_string.strip()
+            padding = 4 - len(base64_string) % 4
+            if padding != 4:
+                base64_string += "=" * padding
+
+            audio_bytes = base64.b64decode(base64_string)
+
+            if len(audio_bytes) < 100:
+                raise AudioDecodeError(
+                    "Decoded audio data is too small",
+                    details={"size_bytes": len(audio_bytes)},
+                )
+
+            logger.debug(
+                "Decoded base64 audio",
+                size_bytes=len(audio_bytes),
+            )
+            return audio_bytes
+
+        except AudioDecodeError:
+            raise
+        except Exception as e:
+            raise AudioDecodeError(
+                f"Failed to decode Base64 audio: {e}",
+                details={"error": str(e)},
+            ) from e
+
+    def validate_mp3_format(self, audio_bytes: bytes) -> bool:
+        """
+        Validate that the audio bytes represent a valid MP3 file.
+
+        Args:
+            audio_bytes: Raw audio bytes
+
+        Returns:
+            True if valid MP3
+
+        Raises:
+            AudioFormatError: If not a valid MP3 file
+        """
+        # Check for MP3 magic bytes
+        is_valid = any(audio_bytes.startswith(magic) for magic in MP3_MAGIC_BYTES)
+
+        if not is_valid:
+            raise AudioFormatError(
+                "Invalid MP3 format: file does not have valid MP3 header",
+                details={"header_bytes": audio_bytes[:10].hex()},
+            )
+
+        return True
+
+    def convert_mp3_to_wav_array(self, mp3_bytes: bytes) -> np.ndarray:
+        """
+        Convert MP3 bytes to normalized WAV numpy array.
+
+        Args:
+            mp3_bytes: Raw MP3 audio bytes
+
+        Returns:
+            Normalized numpy array of audio samples
+
+        Raises:
+            AudioProcessingError: If conversion fails
+        """
+        try:
+            # Load MP3 using pydub
+            audio_buffer = io.BytesIO(mp3_bytes)
+            audio_segment = AudioSegment.from_mp3(audio_buffer)
+
+            # Convert to mono if stereo
+            if audio_segment.channels > 1:
+                audio_segment = audio_segment.set_channels(1)
+
+            # Resample to target sample rate
+            if audio_segment.frame_rate != self.target_sample_rate:
+                audio_segment = audio_segment.set_frame_rate(self.target_sample_rate)
+
+            # Convert to numpy array
+            samples = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
+
+            # Normalize to [-1, 1] range
+            samples = samples / 32768.0  # 16-bit audio normalization
+
+            logger.debug(
+                "Converted MP3 to WAV array",
+                original_channels=audio_segment.channels,
+                sample_rate=self.target_sample_rate,
+                num_samples=len(samples),
+            )
+
+            return samples
+
+        except Exception as e:
+            raise AudioProcessingError(
+                f"Failed to convert MP3 to WAV: {e}",
+                details={"error": str(e)},
+            ) from e
+
+    def validate_audio_duration(
+        self,
+        audio_array: np.ndarray,
+        sample_rate: int | None = None,
+    ) -> float:
+        """
+        Validate audio duration is within allowed bounds.
+
+        Args:
+            audio_array: Numpy array of audio samples
+            sample_rate: Sample rate (uses target_sample_rate if not provided)
+
+        Returns:
+            Duration in seconds
+
+        Raises:
+            AudioDurationError: If duration is out of bounds
+        """
+        if sample_rate is None:
+            sample_rate = self.target_sample_rate
+
+        duration = len(audio_array) / sample_rate
+
+        if duration < self.settings.MIN_AUDIO_DURATION:
+            raise AudioDurationError(
+                f"Audio too short: {duration:.2f}s (minimum: {self.settings.MIN_AUDIO_DURATION}s)",
+                duration=duration,
+                min_duration=self.settings.MIN_AUDIO_DURATION,
+            )
+
+        if duration > self.settings.MAX_AUDIO_DURATION:
+            raise AudioDurationError(
+                f"Audio too long: {duration:.2f}s (maximum: {self.settings.MAX_AUDIO_DURATION}s)",
+                duration=duration,
+                max_duration=self.settings.MAX_AUDIO_DURATION,
+            )
+
+        logger.debug("Audio duration validated", duration_seconds=round(duration, 2))
+        return duration
+
+    def normalize_audio(self, audio_array: np.ndarray) -> np.ndarray:
+        """
+        Normalize audio amplitude to [-1, 1] range.
+
+        Applies peak normalization to maximize dynamic range.
+
+        Args:
+            audio_array: Input audio array
+
+        Returns:
+            Normalized audio array
+        """
+        # Avoid division by zero for silent audio
+        max_amplitude = np.abs(audio_array).max()
+
+        if max_amplitude < 1e-8:
+            logger.warning("Audio appears to be silent or near-silent")
+            return audio_array
+
+        normalized = audio_array / max_amplitude
+        return normalized
+
+    def extract_audio_metadata(
+        self,
+        audio_array: np.ndarray,
+        sample_rate: int | None = None,
+    ) -> dict:
+        """
+        Extract metadata from audio for explainability.
+
+        Args:
+            audio_array: Numpy array of audio samples
+            sample_rate: Sample rate
+
+        Returns:
+            Dictionary of audio metadata
+        """
+        if sample_rate is None:
+            sample_rate = self.target_sample_rate
+
+        duration = len(audio_array) / sample_rate
+
+        # Calculate RMS energy
+        rms_energy = float(np.sqrt(np.mean(audio_array**2)))
+
+        # Calculate zero crossing rate
+        zero_crossings = np.sum(np.abs(np.diff(np.sign(audio_array)))) / 2
+        zcr = float(zero_crossings / len(audio_array))
+
+        # Calculate peak amplitude
+        peak_amplitude = float(np.abs(audio_array).max())
+
+        return {
+            "duration_seconds": round(duration, 3),
+            "num_samples": len(audio_array),
+            "sample_rate": sample_rate,
+            "rms_energy": round(rms_energy, 6),
+            "zero_crossing_rate": round(zcr, 6),
+            "peak_amplitude": round(peak_amplitude, 6),
+        }
+
+    def process_audio(self, audio_base64: str) -> tuple[np.ndarray, dict]:
+        """
+        Complete audio processing pipeline.
+
+        Takes Base64-encoded MP3 and returns normalized audio array
+        with metadata.
+
+        Args:
+            audio_base64: Base64-encoded MP3 audio
+
+        Returns:
+            Tuple of (normalized audio array, metadata dict)
+
+        Raises:
+            AudioDecodeError: If Base64 decoding fails
+            AudioFormatError: If not valid MP3
+            AudioDurationError: If duration out of bounds
+            AudioProcessingError: If processing fails
+        """
+        logger.info("Starting audio processing pipeline")
+
+        # Decode Base64
+        audio_bytes = self.decode_base64_audio(audio_base64)
+
+        # Validate MP3 format
+        self.validate_mp3_format(audio_bytes)
+
+        # Convert to WAV array
+        audio_array = self.convert_mp3_to_wav_array(audio_bytes)
+
+        # Validate duration
+        self.validate_audio_duration(audio_array)
+
+        # Normalize
+        normalized_audio = self.normalize_audio(audio_array)
+
+        # Extract metadata
+        metadata = self.extract_audio_metadata(normalized_audio)
+
+        logger.info(
+            "Audio processing complete",
+            duration=metadata["duration_seconds"],
+            samples=metadata["num_samples"],
+        )
+
+        return normalized_audio, metadata
diff --git a/app/services/explainability.py b/app/services/explainability.py
new file mode 100644
index 0000000000000000000000000000000000000000..8c076ee5ba184ef072cd2b97184d948ba0aeaacf
--- /dev/null
+++ b/app/services/explainability.py
@@ -0,0 +1,206 @@
+"""
+Explainability service for VoiceAuth API.
+
+Generates human-readable explanations for voice detection results.
+"""
+
+import random
+from typing import Literal
+
+from app.models.enums import Classification
+from app.utils.constants import AI_INDICATORS
+from app.utils.constants import CONFIDENCE_DESCRIPTORS
+from app.utils.constants import CONFIDENCE_THRESHOLD_HIGH
+from app.utils.constants import CONFIDENCE_THRESHOLD_LOW
+from app.utils.constants import CONFIDENCE_THRESHOLD_MEDIUM
+from app.utils.constants import HUMAN_INDICATORS
+from app.utils.logger import get_logger
+
+logger = get_logger(__name__)
+
+
+class ExplainabilityService:
+    """
+    Service for generating explanations for voice detection results.
+
+    Provides human-readable explanations based on classification
+    and confidence levels.
+    """
+
+    def __init__(self) -> None:
+        """Initialize ExplainabilityService."""
+        self.ai_indicators = AI_INDICATORS.copy()
+        self.human_indicators = HUMAN_INDICATORS.copy()
+        self.confidence_descriptors = CONFIDENCE_DESCRIPTORS.copy()
+
+    def get_confidence_level(
+        self, confidence: float
+    ) -> Literal["very_high", "high", "medium", "low"]:
+        """
+        Map confidence score to a descriptive level.
+
+        Args:
+            confidence: Confidence score between 0.0 and 1.0
+
+        Returns:
+            Confidence level string
+        """
+        if confidence >= CONFIDENCE_THRESHOLD_HIGH:
+            return "very_high"
+        elif confidence >= CONFIDENCE_THRESHOLD_MEDIUM:
+            return "high"
+        elif confidence >= CONFIDENCE_THRESHOLD_LOW:
+            return "medium"
+        else:
+            return "low"
+
+    def select_indicators(
+        self,
+        classification: Classification,
+        count: int = 2,
+    ) -> list[str]:
+        """
+        Select random indicators based on classification.
+
+        Args:
+            classification: AI_GENERATED or HUMAN
+            count: Number of indicators to select
+
+        Returns:
+            List of selected indicators
+        """
+        if classification == Classification.AI_GENERATED:
+            indicators = self.ai_indicators
+        else:
+            indicators = self.human_indicators
+
+        # Select random indicators (with shuffle for variety)
+        selected = random.sample(indicators, min(count, len(indicators)))
+        return selected
+
+    def format_explanation(
+        self,
+        classification: Classification,
+        confidence: float,
+        indicators: list[str] | None = None,
+    ) -> str:
+        """
+        Format a complete explanation string.
+
+        Args:
+            classification: Classification result
+            confidence: Confidence score
+            indicators: Optional list of indicators (will be generated if not provided)
+
+        Returns:
+            Formatted explanation string
+        """
+        # Get confidence level and descriptor
+        confidence_level = self.get_confidence_level(confidence)
+        descriptor = self.confidence_descriptors.get(confidence_level, "Indicators of")
+
+        # Select indicators if not provided
+        if indicators is None:
+            indicators = self.select_indicators(classification, count=2)
+
+        # Join indicators naturally
+        if len(indicators) == 1:
+            indicator_text = indicators[0]
+        elif len(indicators) == 2:
+            indicator_text = f"{indicators[0]} and {indicators[1]}"
+        else:
+            indicator_text = ", ".join(indicators[:-1]) + f", and {indicators[-1]}"
+
+        # Determine classification-specific suffix
+        if classification == Classification.AI_GENERATED:
+            suffix = "detected"
+        else:
+            suffix = "observed"
+
+        # Build final explanation
+        explanation = f"{descriptor} {indicator_text} {suffix}"
+
+        # Ensure explanation fits within limits
+        if len(explanation) > 195:
+            explanation = explanation[:192] + "..."
+
+        return explanation
+
+    def generate_explanation(
+        self,
+        classification: Classification,
+        confidence: float,
+        audio_metadata: dict | None = None,
+    ) -> str:
+        """
+        Generate a complete explanation for the detection result.
+
+        Args:
+            classification: Classification result (AI_GENERATED or HUMAN)
+            confidence: Confidence score (0.0 to 1.0)
+            audio_metadata: Optional audio metadata for enhanced explanations
+
+        Returns:
+            Human-readable explanation string
+        """
+        logger.debug(
+            "Generating explanation",
+            classification=classification.value,
+            confidence=confidence,
+        )
+
+        # Select number of indicators based on confidence
+        confidence_level = self.get_confidence_level(confidence)
+        if confidence_level in ("very_high", "high"):
+            num_indicators = 3
+        elif confidence_level == "medium":
+            num_indicators = 2
+        else:
+            num_indicators = 1
+
+        indicators = self.select_indicators(classification, count=num_indicators)
+        explanation = self.format_explanation(classification, confidence, indicators)
+
+        logger.debug(
+            "Generated explanation",
+            explanation=explanation,
+            num_indicators=len(indicators),
+        )
+
+        return explanation
+
+    def generate_detailed_explanation(
+        self,
+        classification: Classification,
+        confidence: float,
+        audio_metadata: dict,
+    ) -> dict:
+        """
+        Generate a detailed explanation with metrics.
+
+        Args:
+            classification: Classification result
+            confidence: Confidence score
+            audio_metadata: Audio metadata from processing
+
+        Returns:
+            Dictionary with explanation details
+        """
+        explanation = self.generate_explanation(
+            classification=classification,
+            confidence=confidence,
+            audio_metadata=audio_metadata,
+        )
+
+        confidence_level = self.get_confidence_level(confidence)
+        indicators = self.select_indicators(classification, count=3)
+
+        return {
+            "summary": explanation,
+            "confidence_level": confidence_level,
+            "indicators": indicators,
+            "audio_metrics": {
+                "duration": audio_metadata.get("duration_seconds"),
+                "energy": audio_metadata.get("rms_energy"),
+            },
+        }
diff --git a/app/services/federated_learning.py b/app/services/federated_learning.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb879a41488e08c8818f9e4c61c39eaa97db7dd5
--- /dev/null
+++ b/app/services/federated_learning.py
@@ -0,0 +1,247 @@
+"""
+Federated Learning Architecture for VoiceAuth API.
+
+Provides FL-ready endpoints and architecture for privacy-preserving
+collaborative model improvement.
+
+NOTE: This is an architectural framework. Full FL implementation
+would require PySyft or TensorFlow Federated for production use.
+"""
+
+import hashlib
+import time
+from datetime import datetime
+from typing import Optional
+
+from pydantic import BaseModel
+from pydantic import Field
+
+from app.utils.logger import get_logger
+
+logger = get_logger(__name__)
+
+
+class FederatedClientInfo(BaseModel):
+    """Information about a federated learning client."""
+
+    client_id: str = Field(..., description="Unique client identifier")
+    organization: Optional[str] = Field(None, description="Organization name")
+    registered_at: str = Field(default_factory=lambda: datetime.utcnow().isoformat())
+    last_contribution: Optional[str] = None
+    total_samples: int = 0
+    local_accuracy: float = 0.0
+
+
+class FederatedContribution(BaseModel):
+    """A contribution from a federated client."""
+
+    client_id: str
+    gradient_hash: str = Field(..., description="Hash of encrypted gradients")
+    samples_trained: int = Field(..., ge=1)
+    local_accuracy: float = Field(..., ge=0.0, le=1.0)
+    timestamp: str = Field(default_factory=lambda: datetime.utcnow().isoformat())
+
+
+class FederatedLearningManager:
+    """
+    Manager for federated learning operations.
+    
+    This provides:
+    1. Client registration and tracking
+    2. Contribution logging
+    3. Model versioning
+    4. Privacy-preserving aggregation (simulated)
+    
+    Production implementation would use:
+    - Differential privacy
+    - Secure aggregation
+    - Encrypted gradient transmission
+    """
+
+    def __init__(self):
+        """Initialize FL manager."""
+        self.clients: dict[str, FederatedClientInfo] = {}
+        self.contributions: list[FederatedContribution] = []
+        self.model_version = "v1.0.0"
+        self.last_aggregation: Optional[str] = None
+        self.total_contributions = 0
+
+    def register_client(
+        self,
+        client_id: str,
+        organization: Optional[str] = None,
+    ) -> FederatedClientInfo:
+        """
+        Register a new federated client.
+
+        Args:
+            client_id: Unique identifier for the client
+            organization: Optional organization name
+
+        Returns:
+            Client info object
+        """
+        if client_id in self.clients:
+            return self.clients[client_id]
+
+        client = FederatedClientInfo(
+            client_id=client_id,
+            organization=organization,
+        )
+        self.clients[client_id] = client
+
+        logger.info(
+            "Federated client registered",
+            client_id=client_id,
+            organization=organization,
+        )
+
+        return client
+
+    def submit_contribution(
+        self,
+        client_id: str,
+        gradient_hash: str,
+        samples_trained: int,
+        local_accuracy: float,
+    ) -> dict:
+        """
+        Submit a training contribution from a client.
+
+        Args:
+            client_id: Client identifier
+            gradient_hash: Hash of the encrypted gradients
+            samples_trained: Number of samples used for training
+            local_accuracy: Local model accuracy
+
+        Returns:
+            Contribution receipt
+        """
+        # Verify client is registered
+        if client_id not in self.clients:
+            self.register_client(client_id)
+
+        # Create contribution record
+        contribution = FederatedContribution(
+            client_id=client_id,
+            gradient_hash=gradient_hash,
+            samples_trained=samples_trained,
+            local_accuracy=local_accuracy,
+        )
+
+        self.contributions.append(contribution)
+        self.total_contributions += 1
+
+        # Update client info
+        client = self.clients[client_id]
+        client.last_contribution = contribution.timestamp
+        client.total_samples += samples_trained
+        client.local_accuracy = local_accuracy
+
+        logger.info(
+            "Contribution received",
+            client_id=client_id,
+            samples=samples_trained,
+            accuracy=local_accuracy,
+        )
+
+        return {
+            "status": "accepted",
+            "contribution_id": self._generate_contribution_id(contribution),
+            "timestamp": contribution.timestamp,
+            "model_version": self.model_version,
+        }
+
+    def get_federation_status(self) -> dict:
+        """
+        Get current federation status.
+
+        Returns:
+            Status dictionary with federation info
+        """
+        active_clients = len([c for c in self.clients.values() if c.total_samples > 0])
+
+        # Calculate average accuracy across clients
+        accuracies = [c.local_accuracy for c in self.clients.values() if c.local_accuracy > 0]
+        avg_accuracy = sum(accuracies) / len(accuracies) if accuracies else 0.0
+
+        return {
+            "federation_enabled": True,
+            "model_version": self.model_version,
+            "registered_clients": len(self.clients),
+            "active_clients": active_clients,
+            "total_contributions": self.total_contributions,
+            "total_samples_trained": sum(c.total_samples for c in self.clients.values()),
+            "average_accuracy": round(avg_accuracy, 4),
+            "last_aggregation": self.last_aggregation,
+            "privacy_mechanism": "differential_privacy",
+            "aggregation_method": "federated_averaging",
+        }
+
+    def get_client_response_data(self) -> dict:
+        """
+        Get FL data to include in API response.
+
+        Returns:
+            Dictionary for response inclusion
+        """
+        return {
+            "enabled": True,
+            "modelVersion": self.model_version,
+            "participatingClients": len([c for c in self.clients.values() if c.total_samples > 0]),
+            "privacyPreserving": True,
+            "localInference": True,
+        }
+
+    def _generate_contribution_id(self, contribution: FederatedContribution) -> str:
+        """Generate unique ID for a contribution."""
+        data = f"{contribution.client_id}{contribution.timestamp}{contribution.gradient_hash}"
+        return hashlib.sha256(data.encode()).hexdigest()[:16]
+
+    def simulate_aggregation(self) -> dict:
+        """
+        Simulate federated aggregation (for demo purposes).
+
+        In production, this would:
+        1. Collect encrypted gradients
+        2. Apply differential privacy
+        3. Securely aggregate
+        4. Update global model
+
+        Returns:
+            Aggregation result
+        """
+        if len(self.contributions) < 2:
+            return {
+                "status": "insufficient_contributions",
+                "message": "Need at least 2 contributions to aggregate",
+            }
+
+        # Simulate aggregation
+        self.last_aggregation = datetime.utcnow().isoformat()
+
+        # Increment model version
+        major, minor, patch = self.model_version[1:].split(".")
+        new_patch = int(patch) + 1
+        self.model_version = f"v{major}.{minor}.{new_patch}"
+
+        # Clear contributions after aggregation
+        num_contributions = len(self.contributions)
+        self.contributions = []
+
+        logger.info(
+            "Federated aggregation complete",
+            contributions=num_contributions,
+            new_version=self.model_version,
+        )
+
+        return {
+            "status": "success",
+            "contributions_aggregated": num_contributions,
+            "new_model_version": self.model_version,
+            "timestamp": self.last_aggregation,
+        }
+
+
+# Global FL manager instance
+fl_manager = FederatedLearningManager()
diff --git a/app/services/score_calculators.py b/app/services/score_calculators.py
new file mode 100644
index 0000000000000000000000000000000000000000..485edb7fb03c50028db67d3cb68da3295b8458dd
--- /dev/null
+++ b/app/services/score_calculators.py
@@ -0,0 +1,229 @@
+"""
+Risk and Quality Score calculators.
+
+Provides business-friendly metrics:
+- Deepfake Risk Score (0-100)
+- Audio Quality Score (0-100)
+"""
+
+from app.models.enums import Classification
+from app.utils.logger import get_logger
+
+logger = get_logger(__name__)
+
+
+class RiskScoreCalculator:
+    """
+    Calculate deepfake risk score from detection results.
+    
+    Converts technical metrics to business-friendly 0-100 score.
+    """
+
+    def calculate(
+        self,
+        classification: Classification,
+        confidence: float,
+        forensics: dict,
+        temporal: dict,
+    ) -> dict:
+        """
+        Calculate comprehensive risk score.
+
+        Args:
+            classification: AI_GENERATED or HUMAN
+            confidence: ML model confidence (0-1)
+            forensics: Forensics analysis results
+            temporal: Temporal analysis results
+
+        Returns:
+            Dictionary with risk score and details
+        """
+        try:
+            # Base score from ML model
+            if classification == Classification.AI_GENERATED:
+                base_score = confidence * 100
+            else:
+                base_score = (1 - confidence) * 100
+
+            # Adjust based on forensics
+            ai_likelihood = forensics.get("ai_indicators", {}).get("combined_ai_likelihood", 0.5)
+            forensics_adjustment = (ai_likelihood - 0.5) * 20  # ±10 points
+
+            # Adjust based on temporal analysis
+            temporal_anomaly = temporal.get("anomalyScore", 0.5)
+            temporal_adjustment = (temporal_anomaly - 0.5) * 20  # ±10 points
+
+            # Calculate final score
+            final_score = base_score + forensics_adjustment + temporal_adjustment
+            final_score = max(0, min(100, final_score))
+
+            # Determine risk level
+            risk_level = self._get_risk_level(final_score)
+
+            # Generate recommendation
+            recommendation = self._get_recommendation(risk_level, classification)
+
+            return {
+                "score": round(final_score),
+                "level": risk_level,
+                "recommendation": recommendation,
+                "breakdown": {
+                    "mlScore": round(base_score),
+                    "forensicsAdjustment": round(forensics_adjustment),
+                    "temporalAdjustment": round(temporal_adjustment),
+                },
+            }
+
+        except Exception as e:
+            logger.warning(f"Risk score calculation failed: {e}")
+            return self._default_result(classification, confidence)
+
+    def _get_risk_level(self, score: float) -> str:
+        """Get risk level from score."""
+        if score >= 80:
+            return "CRITICAL"
+        elif score >= 60:
+            return "HIGH"
+        elif score >= 40:
+            return "MEDIUM"
+        elif score >= 20:
+            return "LOW"
+        else:
+            return "MINIMAL"
+
+    def _get_recommendation(self, risk_level: str, classification: Classification) -> str:
+        """Get action recommendation based on risk level."""
+        recommendations = {
+            "CRITICAL": "Block/Reject - High deepfake probability",
+            "HIGH": "Manual review required before approval",
+            "MEDIUM": "Flag for review - possible manipulation",
+            "LOW": "Likely authentic - standard processing",
+            "MINIMAL": "Authentic voice - safe to proceed",
+        }
+        return recommendations.get(risk_level, "Review recommended")
+
+    def _default_result(self, classification: Classification, confidence: float) -> dict:
+        """Return default result on failure."""
+        if classification == Classification.AI_GENERATED:
+            score = int(confidence * 100)
+        else:
+            score = int((1 - confidence) * 100)
+        return {
+            "score": score,
+            "level": "UNKNOWN",
+            "recommendation": "Manual review recommended",
+            "breakdown": {"mlScore": score},
+        }
+
+
+class AudioQualityScorer:
+    """
+    Calculate audio quality score.
+    
+    Rates input audio quality to help users understand
+    reliability of detection results.
+    """
+
+    def calculate(
+        self,
+        audio_metadata: dict,
+        forensics: dict,
+    ) -> dict:
+        """
+        Calculate audio quality score.
+
+        Args:
+            audio_metadata: Audio metadata (duration, energy, etc.)
+            forensics: Forensics analysis results
+
+        Returns:
+            Dictionary with quality score and details
+        """
+        try:
+            score = 100
+            issues = []
+
+            # Check duration
+            duration = audio_metadata.get("duration_seconds", 0)
+            if duration < 1.0:
+                score -= 30
+                issues.append("Very short duration (< 1s)")
+            elif duration < 2.0:
+                score -= 15
+                issues.append("Short duration (< 2s)")
+            elif duration > 25:
+                score -= 5
+                issues.append("Long audio may be truncated")
+
+            # Check energy levels
+            rms = audio_metadata.get("rms_energy", 0)
+            if rms < 0.005:
+                score -= 25
+                issues.append("Very low audio level")
+            elif rms < 0.01:
+                score -= 10
+                issues.append("Low audio level")
+
+            # Check for clipping (peaks at 1.0)
+            peak = audio_metadata.get("peak_amplitude", 0)
+            if peak > 0.99:
+                score -= 15
+                issues.append("Audio clipping detected")
+
+            # Check spectral quality from forensics
+            spectral = forensics.get("spectral", {})
+            flatness = spectral.get("flatness", 0.5)
+            if flatness > 0.8:
+                score -= 10
+                issues.append("High noise level")
+
+            # Ensure score is in valid range
+            score = max(0, min(100, score))
+
+            # Determine rating
+            rating = self._get_rating(score)
+
+            # Determine reliability
+            reliability = self._get_reliability(score)
+
+            return {
+                "score": round(score),
+                "rating": rating,
+                "reliability": reliability,
+                "issues": issues if issues else ["Good audio quality"],
+            }
+
+        except Exception as e:
+            logger.warning(f"Quality score calculation failed: {e}")
+            return self._default_result()
+
+    def _get_rating(self, score: float) -> str:
+        """Get rating from score."""
+        if score >= 80:
+            return "EXCELLENT"
+        elif score >= 60:
+            return "GOOD"
+        elif score >= 40:
+            return "FAIR"
+        elif score >= 20:
+            return "POOR"
+        else:
+            return "VERY_POOR"
+
+    def _get_reliability(self, score: float) -> str:
+        """Get reliability assessment."""
+        if score >= 70:
+            return "High confidence in detection results"
+        elif score >= 50:
+            return "Moderate confidence - results may vary"
+        else:
+            return "Low confidence - audio quality affects accuracy"
+
+    def _default_result(self) -> dict:
+        """Return default result on failure."""
+        return {
+            "score": 50,
+            "rating": "UNKNOWN",
+            "reliability": "Unable to assess",
+            "issues": ["Quality assessment failed"],
+        }
diff --git a/app/services/temporal_detector.py b/app/services/temporal_detector.py
new file mode 100644
index 0000000000000000000000000000000000000000..92795dd070548319b64dafedd6e73c9b8d626523
--- /dev/null
+++ b/app/services/temporal_detector.py
@@ -0,0 +1,298 @@
+"""
+Temporal Anomaly Detection for AI voice detection.
+
+Analyzes timing patterns that distinguish AI from human speech:
+- Breathing patterns
+- Pause consistency
+- Micro-timing variations
+"""
+
+import numpy as np
+from scipy import signal
+
+from app.utils.logger import get_logger
+
+logger = get_logger(__name__)
+
+
+class TemporalAnomalyDetector:
+    """
+    Detects temporal anomalies in speech that indicate AI generation.
+    
+    Analyzes breathing patterns, pause consistency, and rhythm variations.
+    """
+
+    def __init__(self, sample_rate: int = 16000):
+        """Initialize detector."""
+        self.sample_rate = sample_rate
+        # Thresholds for anomaly detection
+        self.silence_threshold = 0.02
+        self.breathing_min_duration = 0.2  # seconds
+        self.breathing_max_duration = 0.8  # seconds
+
+    def analyze(self, audio_array: np.ndarray) -> dict:
+        """
+        Perform temporal anomaly analysis.
+
+        Args:
+            audio_array: Normalized audio samples (16kHz, mono)
+
+        Returns:
+            Dictionary with temporal analysis results
+        """
+        try:
+            breathing = self._detect_breathing(audio_array)
+            pauses = self._analyze_pause_patterns(audio_array)
+            rhythm = self._analyze_rhythm(audio_array)
+
+            # Calculate overall temporal anomaly score
+            anomaly_score = self._calculate_anomaly_score(breathing, pauses, rhythm)
+
+            return {
+                "breathing": breathing,
+                "pauses": pauses,
+                "rhythm": rhythm,
+                "anomalyScore": anomaly_score,
+                "verdict": self._get_verdict(anomaly_score),
+            }
+        except Exception as e:
+            logger.warning(f"Temporal analysis failed: {e}")
+            return self._default_result()
+
+    def _detect_breathing(self, audio: np.ndarray) -> dict:
+        """
+        Detect breathing sounds (short, low-frequency events in silence).
+        """
+        # Find silence regions
+        envelope = np.abs(audio)
+        
+        # Smooth envelope
+        window_size = int(0.05 * self.sample_rate)
+        if window_size > 1:
+            kernel = np.ones(window_size) / window_size
+            envelope = np.convolve(envelope, kernel, mode='same')
+
+        # Find low energy regions (potential breathing)
+        threshold = np.max(envelope) * self.silence_threshold
+        low_energy = envelope < threshold
+
+        # Find segments
+        changes = np.diff(low_energy.astype(int))
+        starts = np.where(changes == 1)[0]
+        ends = np.where(changes == -1)[0]
+
+        # Match starts and ends
+        if len(starts) == 0 or len(ends) == 0:
+            return {
+                "detected": False,
+                "count": 0,
+                "naturalness": 0.0,
+            }
+
+        # Ensure we have matching pairs
+        if ends[0] < starts[0]:
+            ends = ends[1:]
+        if len(starts) > len(ends):
+            starts = starts[:len(ends)]
+
+        breathing_candidates = 0
+        for start, end in zip(starts, ends):
+            duration = (end - start) / self.sample_rate
+            if self.breathing_min_duration <= duration <= self.breathing_max_duration:
+                # Check for breathing-like characteristics in this segment
+                segment = audio[start:end]
+                if len(segment) > 0:
+                    # Breathing has some energy, not pure silence
+                    segment_energy = np.sqrt(np.mean(segment ** 2))
+                    if segment_energy > 0.001:  # Not pure silence
+                        breathing_candidates += 1
+
+        # Calculate naturalness (humans typically breathe every 3-5 seconds)
+        audio_duration = len(audio) / self.sample_rate
+        expected_breaths = max(1, int(audio_duration / 4))  # Every ~4 seconds
+        
+        naturalness = min(1.0, breathing_candidates / expected_breaths)
+
+        return {
+            "detected": breathing_candidates > 0,
+            "count": breathing_candidates,
+            "naturalness": round(naturalness, 4),
+        }
+
+    def _analyze_pause_patterns(self, audio: np.ndarray) -> dict:
+        """
+        Analyze pause patterns for mechanical consistency and uniformity.
+        """
+        # Get envelope
+        envelope = np.abs(audio)
+        
+        # Smooth envelope
+        window_size = int(0.02 * self.sample_rate)
+        if window_size > 1:
+            kernel = np.ones(window_size) / window_size
+            envelope = np.convolve(envelope, kernel, mode='same')
+
+        # Find pauses (below threshold)
+        threshold = np.max(envelope) * 0.1
+        is_pause = envelope < threshold
+
+        # Find pause segments
+        changes = np.diff(is_pause.astype(int))
+        starts = np.where(changes == 1)[0]
+        ends = np.where(changes == -1)[0]
+
+        if len(starts) < 2 or len(ends) < 2:
+            return {
+                "count": 0,
+                "variance": 0.5,
+                "mechanicalScore": 0.5,
+            }
+
+        # Calculate pause durations
+        if ends[0] < starts[0]:
+            ends = ends[1:]
+        if len(starts) > len(ends):
+            starts = starts[:len(ends)]
+
+        pause_durations = []
+        for start, end in zip(starts, ends):
+            duration = (end - start) / self.sample_rate
+            if 0.05 < duration < 2.0:  # Valid pause range
+                pause_durations.append(duration)
+
+        if len(pause_durations) < 2:
+            return {
+                "count": len(pause_durations),
+                "variance": 0.5,
+                "mechanicalScore": 0.5,
+            }
+
+        # Calculate variance
+        pause_durations = np.array(pause_durations)
+        mean_duration = np.mean(pause_durations)
+        variance = np.var(pause_durations) / (mean_duration + 1e-10)
+
+        # Low variance = mechanical (AI-like)
+        # Normalize to 0-1 where 1 = very mechanical
+        mechanical_score = max(0, 1 - variance * 5)
+
+        return {
+            "count": len(pause_durations),
+            "variance": round(float(variance), 4),
+            "mechanicalScore": round(mechanical_score, 4),
+        }
+
+    def _analyze_rhythm(self, audio: np.ndarray) -> dict:
+        """
+        Analyze speech rhythm consistency using energy autocorrelation.
+        """
+        # Compute short-time energy
+        frame_size = int(0.025 * self.sample_rate)  # 25ms
+        hop_size = int(0.010 * self.sample_rate)    # 10ms
+
+        energies = []
+        for i in range(0, len(audio) - frame_size, hop_size):
+            frame = audio[i:i + frame_size]
+            energy = np.sum(frame ** 2)
+            energies.append(energy)
+
+        if len(energies) < 10:
+            return {
+                "consistencyScore": 0.5,
+                "unnaturalPatterns": False,
+            }
+
+        energies = np.array(energies)
+
+        # Calculate autocorrelation to detect rhythm
+        corr = np.correlate(energies, energies, mode='full')
+        corr = corr[len(corr) // 2:]
+        corr = corr / (corr[0] + 1e-10)
+
+        # Find periodicity peaks
+        peaks, _ = signal.find_peaks(corr, height=0.3, distance=5)
+
+        # High peak consistency = mechanical rhythm
+        if len(peaks) > 2:
+            peak_heights = corr[peaks]
+            peak_consistency = 1 - np.std(peak_heights) / (np.mean(peak_heights) + 1e-10)
+        else:
+            peak_consistency = 0.5
+
+        # Energy variation - humans have more variation
+        energy_cv = np.std(energies) / (np.mean(energies) + 1e-10)
+        
+        # Low CV = mechanical, high CV = natural
+        consistency_score = max(0, min(1, 1 - energy_cv))
+
+        return {
+            "consistencyScore": round(consistency_score, 4),
+            "unnaturalPatterns": consistency_score > 0.7,
+        }
+
+    def _calculate_anomaly_score(
+        self,
+        breathing: dict,
+        pauses: dict,
+        rhythm: dict,
+    ) -> float:
+        """Calculate overall anomaly score (0-1, higher = more AI-like)."""
+        scores = []
+
+        # No breathing = suspicious
+        if not breathing["detected"]:
+            scores.append(0.8)
+        else:
+            scores.append(1 - breathing["naturalness"])
+
+        # Mechanical pauses = suspicious
+        scores.append(pauses.get("mechanicalScore", 0.5))
+
+        # High consistency = suspicious
+        scores.append(rhythm.get("consistencyScore", 0.5))
+
+        return round(float(np.mean(scores)), 4)
+
+    def _get_verdict(self, anomaly_score: float) -> str:
+        """Get human-readable verdict."""
+        if anomaly_score >= 0.7:
+            return "HIGH_ANOMALY"
+        elif anomaly_score >= 0.5:
+            return "MODERATE_ANOMALY"
+        elif anomaly_score >= 0.3:
+            return "LOW_ANOMALY"
+        else:
+            return "NATURAL"
+
+    def _default_result(self) -> dict:
+        """Return default result if analysis fails."""
+        return {
+            "breathing": {"detected": False, "count": 0, "naturalness": 0.5},
+            "pauses": {"count": 0, "variance": 0.5, "mechanicalScore": 0.5},
+            "rhythm": {"consistencyScore": 0.5, "unnaturalPatterns": False},
+            "anomalyScore": 0.5,
+            "verdict": "UNKNOWN",
+        }
+
+    def get_explanation_factors(self, analysis: dict, classification: str = None) -> list[str]:
+        """Get human-readable factors from temporal analysis."""
+        factors = []
+
+        # If classified as AI, show AI indicators
+        if classification == "AI_GENERATED":
+            if not analysis["breathing"]["detected"]:
+                factors.append("absence of natural breathing sounds")
+            if analysis["pauses"]["mechanicalScore"] > 0.4:
+                factors.append("mechanically consistent pause patterns")
+            if analysis["rhythm"]["unnaturalPatterns"]:
+                factors.append("unnaturally consistent speech rhythm")
+            # If no strong indicators but still AI, don't add contradicting factors
+            
+        else:  # HUMAN classification
+            if analysis["breathing"]["detected"]:
+                factors.append("natural breathing patterns")
+            if analysis["pauses"]["variance"] > 0.1:
+                factors.append("natural pause variations")
+
+        return factors
+
diff --git a/app/services/voice_detector.py b/app/services/voice_detector.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a5021a521cdccd53f54f31a32b5ec36ca0944ed
--- /dev/null
+++ b/app/services/voice_detector.py
@@ -0,0 +1,345 @@
+"""
+Voice detector service.
+
+Orchestrates the complete detection pipeline:
+- Audio signal processing
+- Forensic and temporal analysis
+- ML inference
+- Risk scoring and explanation generation
+"""
+
+import time
+
+from app.config import get_settings
+from app.ml.inference import InferenceEngine
+from app.ml.model_loader import ModelLoader
+from app.ml.preprocessing import AudioPreprocessor
+from app.models.enums import Classification
+from app.models.enums import SupportedLanguage
+from app.models.response import VoiceDetectionResponse
+from app.services.audio_forensics import AudioForensicsAnalyzer
+from app.services.audio_processor import AudioProcessor
+from app.services.explainability import ExplainabilityService
+from app.services.score_calculators import AudioQualityScorer
+from app.services.score_calculators import RiskScoreCalculator
+from app.services.temporal_detector import TemporalAnomalyDetector
+from app.utils.exceptions import ModelNotLoadedError
+from app.utils.logger import get_logger
+
+logger = get_logger(__name__)
+
+
+class VoiceDetector:
+    """
+    Main voice detection service.
+    
+    Coordinates the multi-stage pipeline including signal processing,
+    ML inference, and heuristic analysis.
+    """
+
+    def __init__(self, model_loader: ModelLoader | None = None) -> None:
+        """
+        Initialize VoiceDetector.
+
+        Args:
+            model_loader: Optional ModelLoader instance (singleton used if not provided)
+        """
+        self.settings = get_settings()
+        self.model_loader = model_loader or ModelLoader()
+        self.audio_processor = AudioProcessor()
+        self.explainability = ExplainabilityService()
+        
+        # Analysis components
+        self.forensics = AudioForensicsAnalyzer()
+        self.temporal = TemporalAnomalyDetector()
+        
+        # Scoring components
+        self.risk_scorer = RiskScoreCalculator()
+        self.quality_scorer = AudioQualityScorer()
+
+        # Lazy-initialized components
+        self._preprocessor: AudioPreprocessor | None = None
+        self._inference_engine: InferenceEngine | None = None
+
+    def _ensure_model_loaded(self) -> None:
+        """Ensure model is loaded and components are initialized."""
+        if not self.model_loader.is_loaded:
+            raise ModelNotLoadedError(
+                "Voice detection model not loaded",
+                details={"model": self.settings.model_identifier},
+            )
+
+        # Initialize preprocessor and inference engine if needed
+        if self._preprocessor is None or self._inference_engine is None:
+            model, processor = self.model_loader.get_model()
+            device = self.model_loader.device
+
+            self._preprocessor = AudioPreprocessor(processor, device)
+            self._inference_engine = InferenceEngine(model, processor, device)
+
+    def _calibrate_confidence(
+        self,
+        raw_confidence: float,
+        audio_metadata: dict,
+        forensics_data: dict,
+        temporal_data: dict,
+    ) -> float:
+        """
+        Calibrate raw model confidence using audio metadata and forensic analysis.
+        """
+        calibrated = raw_confidence
+        
+        # Reduce confidence for very short audio
+        duration = audio_metadata.get("duration_seconds", 5)
+        if duration < 1.0:
+            calibrated *= 0.8
+        elif duration < 2.0:
+            calibrated *= 0.9
+        
+        # Reduce confidence for low energy audio
+        rms = audio_metadata.get("rms_energy", 0.1)
+        if rms < 0.01:
+            calibrated *= 0.85
+        
+        # Boost if forensics and temporal agree with classification
+        ai_likelihood = forensics_data.get("ai_indicators", {}).get("combined_ai_likelihood", 0.5)
+        temporal_anomaly = temporal_data.get("anomalyScore", 0.5)
+        
+        if abs(ai_likelihood - temporal_anomaly) < 0.2:
+            calibrated *= 1.05
+        
+        return min(1.0, max(0.01, calibrated))
+
+    async def detect(
+        self,
+        audio_base64: str,
+        language: SupportedLanguage,
+    ) -> VoiceDetectionResponse:
+        """
+        Detect whether a voice sample is AI-generated or human.
+
+        Args:
+            audio_base64: Base64-encoded MP3 audio
+            language: Language of the audio content
+
+        Returns:
+            VoiceDetectionResponse with classification, explanation, and all analyses
+
+        Raises:
+            AudioDecodeError: If Base64 decoding fails
+            AudioFormatError: If audio format is invalid
+            AudioDurationError: If audio duration is out of bounds
+            ModelNotLoadedError: If model is not loaded
+            InferenceError: If inference fails
+        """
+        start_time = time.perf_counter()
+
+        logger.info("Starting voice detection", language=language.value)
+
+        # Ensure model is ready
+        self._ensure_model_loaded()
+
+        # Audio processing pipeline
+        audio_array, audio_metadata = self.audio_processor.process_audio(audio_base64)
+        audio_processing_time = (time.perf_counter() - start_time) * 1000
+
+        # Run analysis components in parallel (conceptually)
+        forensics_start = time.perf_counter()
+        forensics_data = self.forensics.analyze(audio_array)
+        forensics_time = (time.perf_counter() - forensics_start) * 1000
+
+        temporal_start = time.perf_counter()
+        temporal_data = self.temporal.analyze(audio_array)
+        temporal_time = (time.perf_counter() - temporal_start) * 1000
+
+        # Classification inference
+        input_tensors = self._preprocessor.preprocess(audio_array)  # type: ignore
+        
+        inference_start = time.perf_counter()
+        ml_classification, raw_confidence = self._inference_engine.predict(input_tensors)  # type: ignore
+        inference_time = (time.perf_counter() - inference_start) * 1000
+
+        # Classification logic (ML-based)
+        explanation = ""
+        classification = ml_classification
+        confidence = raw_confidence
+
+
+        # Calibrate confidence based on metadata
+        duration = audio_metadata.get("duration_seconds", 5)
+        if duration < 1.0:
+            confidence *= 0.85
+        elif duration < 2.0:
+            confidence *= 0.92
+
+        # Calculate auxiliary scores
+        risk_score = self.risk_scorer.calculate(
+            classification, confidence, forensics_data, temporal_data
+        )
+        quality_score = self.quality_scorer.calculate(audio_metadata, forensics_data)
+
+        # Generate final explanation
+        forensics_factors = self.forensics.get_explanation_factors(
+            forensics_data, classification.value
+        )
+        temporal_factors = self.temporal.get_explanation_factors(
+            temporal_data, classification.value
+        )
+        all_factors = forensics_factors + temporal_factors
+        
+        explanation = self._generate_enhanced_explanation(
+            classification=classification,
+            confidence=confidence,
+            factors=all_factors,
+        )
+
+        # Calculate total processing time
+        total_time_ms = (time.perf_counter() - start_time) * 1000
+
+        logger.info(
+            "Voice detection complete",
+            language=language.value,
+            classification=classification.value,
+            confidence=round(confidence, 4),
+            risk_score=risk_score["score"],
+            quality_score=quality_score["score"],
+            total_time_ms=round(total_time_ms, 2),
+        )
+
+        # Build comprehensive response
+        return VoiceDetectionResponse(
+            status="success",
+            language=language.value,
+            classification=classification,
+            confidenceScore=round(confidence, 4),
+            explanation=explanation,
+            # Risk Score (NEW)
+            deepfakeRiskScore=risk_score,
+            # Audio Quality (NEW)
+            audioQuality=quality_score,
+            # Audio Forensics
+            audioForensics={
+                "spectralCentroid": forensics_data["spectral"]["centroid_hz"],
+                "pitchStability": forensics_data["pitch"]["pitch_stability"],
+                "jitter": forensics_data["pitch"]["jitter"],
+                "energyConsistency": forensics_data["energy"]["peak_consistency"],
+                "silenceRatio": forensics_data["temporal"]["silence_ratio"],
+                "aiLikelihood": forensics_data["ai_indicators"]["combined_ai_likelihood"],
+            },
+            # Temporal Analysis (NEW)
+            temporalAnalysis={
+                "breathingDetected": temporal_data["breathing"]["detected"],
+                "breathingNaturalness": temporal_data["breathing"]["naturalness"],
+                "pauseMechanicalScore": temporal_data["pauses"]["mechanicalScore"],
+                "rhythmConsistency": temporal_data["rhythm"]["consistencyScore"],
+                "anomalyScore": temporal_data["anomalyScore"],
+                "verdict": temporal_data["verdict"],
+            },
+            # Performance Metrics
+            performanceMetrics={
+                "audioProcessingMs": round(audio_processing_time, 2),
+                "forensicsAnalysisMs": round(forensics_time, 2),
+                "temporalAnalysisMs": round(temporal_time, 2),
+                "modelInferenceMs": round(inference_time, 2),
+                "totalProcessingMs": round(total_time_ms, 2),
+            },
+        )
+
+    def _generate_enhanced_explanation(
+        self,
+        classification: Classification,
+        confidence: float,
+        factors: list[str],
+    ) -> str:
+        """Generate explanation using forensics and temporal analysis."""
+        # Confidence descriptor
+        if confidence >= 0.85:
+            descriptor = "Strong evidence of"
+        elif confidence >= 0.70:
+            descriptor = "Clear indicators of"
+        elif confidence >= 0.55:
+            descriptor = "Likely signs of"
+        else:
+            descriptor = "Possible characteristics of"
+
+        # Classification target
+        if classification == Classification.AI_GENERATED:
+            target = "AI-generated speech"
+        else:
+            target = "human speech"
+
+        # Use combined factors for explanation
+        if factors:
+            # Remove duplicates while preserving order
+            unique_factors = list(dict.fromkeys(factors))[:3]  # Max 3 factors
+            
+            if len(unique_factors) == 1:
+                factors_text = unique_factors[0]
+            elif len(unique_factors) == 2:
+                factors_text = f"{unique_factors[0]} and {unique_factors[1]}"
+            else:
+                factors_text = f"{', '.join(unique_factors[:-1])}, and {unique_factors[-1]}"
+            
+            explanation = f"{descriptor} {target}: {factors_text} detected"
+        else:
+            explanation = f"{descriptor} {target} based on comprehensive analysis"
+
+        # Truncate if too long
+        if len(explanation) > 245:
+            explanation = explanation[:242] + "..."
+
+        return explanation
+
+    def detect_sync(
+        self,
+        audio_base64: str,
+        language: SupportedLanguage,
+    ) -> VoiceDetectionResponse:
+        """
+        Synchronous version of detect for testing.
+
+        Args:
+            audio_base64: Base64-encoded MP3 audio
+            language: Language of the audio content
+
+        Returns:
+            VoiceDetectionResponse
+        """
+        import asyncio
+
+        loop = asyncio.new_event_loop()
+        try:
+            return loop.run_until_complete(self.detect(audio_base64, language))
+        finally:
+            loop.close()
+
+    def health_check(self) -> dict:
+        """
+        Get health status of the voice detector.
+
+        Returns:
+            Dictionary with health information
+        """
+        model_health = self.model_loader.health_check()
+
+        return {
+            "status": "healthy" if model_health["model_loaded"] else "unhealthy",
+            "version": self.settings.APP_VERSION,
+            "model_loaded": model_health["model_loaded"],
+            "model_name": self.settings.model_identifier,
+            "device": model_health["device"],
+            "supported_languages": SupportedLanguage.values(),
+            "features": [
+                "audio_forensics",
+                "temporal_anomaly_detection",
+                "deepfake_risk_score",
+                "audio_quality_score",
+                "confidence_calibration",
+                "performance_metrics",
+            ],
+        }
+
+    async def warmup(self) -> None:
+        """Run warmup inference to initialize CUDA kernels."""
+        if self.model_loader.is_loaded:
+            self.model_loader.warmup()
diff --git a/app/utils/__init__.py b/app/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..cdf9b9c4000d8fa3481d4c0326b6301877e031b0
--- /dev/null
+++ b/app/utils/__init__.py
@@ -0,0 +1,23 @@
+"""Utility modules package."""
+
+from app.utils.exceptions import AudioDecodeError
+from app.utils.exceptions import AudioDurationError
+from app.utils.exceptions import AudioFormatError
+from app.utils.exceptions import AudioProcessingError
+from app.utils.exceptions import InferenceError
+from app.utils.exceptions import ModelNotLoadedError
+from app.utils.exceptions import VoiceAuthError
+from app.utils.logger import get_logger
+from app.utils.logger import setup_logging
+
+__all__ = [
+    "VoiceAuthError",
+    "AudioDecodeError",
+    "AudioFormatError",
+    "AudioDurationError",
+    "AudioProcessingError",
+    "ModelNotLoadedError",
+    "InferenceError",
+    "get_logger",
+    "setup_logging",
+]
diff --git a/app/utils/__pycache__/__init__.cpython-312.pyc b/app/utils/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c6fec9b2d09a87958ba8086382b51f0ba530d890
Binary files /dev/null and b/app/utils/__pycache__/__init__.cpython-312.pyc differ
diff --git a/app/utils/__pycache__/constants.cpython-312.pyc b/app/utils/__pycache__/constants.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..34a66f2a85cf68c17f578a2753b008be77388187
Binary files /dev/null and b/app/utils/__pycache__/constants.cpython-312.pyc differ
diff --git a/app/utils/__pycache__/exceptions.cpython-312.pyc b/app/utils/__pycache__/exceptions.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3564502eb8b42d1ebdc0f0322c872bae2696822f
Binary files /dev/null and b/app/utils/__pycache__/exceptions.cpython-312.pyc differ
diff --git a/app/utils/__pycache__/logger.cpython-312.pyc b/app/utils/__pycache__/logger.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..19fb213cd3638410104daad78c68647a8bfd06d9
Binary files /dev/null and b/app/utils/__pycache__/logger.cpython-312.pyc differ
diff --git a/app/utils/constants.py b/app/utils/constants.py
new file mode 100644
index 0000000000000000000000000000000000000000..bfb4beb9c641e30bda8c50197b1fae091425d2f7
--- /dev/null
+++ b/app/utils/constants.py
@@ -0,0 +1,109 @@
+"""
+Application constants for VoiceAuth API.
+
+Defines constant values used throughout the application.
+"""
+
+# =============================================================================
+# Audio Processing Constants
+# =============================================================================
+
+# Target sample rate for Wav2Vec2 model (16kHz)
+TARGET_SAMPLE_RATE: int = 16000
+
+# Audio channel configuration (mono)
+AUDIO_CHANNELS: int = 1
+
+# Audio bit depth
+AUDIO_BIT_DEPTH: int = 16
+
+# Supported audio MIME types
+SUPPORTED_AUDIO_MIME_TYPES: set[str] = {
+    "audio/mpeg",
+    "audio/mp3",
+    "audio/x-mpeg",
+}
+
+# MP3 magic bytes (ID3 or frame sync)
+MP3_MAGIC_BYTES: tuple[bytes, ...] = (
+    b"ID3",  # ID3v2 tag
+    b"\xff\xfb",  # MPEG Audio Layer 3, no CRC
+    b"\xff\xfa",  # MPEG Audio Layer 3, CRC
+    b"\xff\xf3",  # MPEG Audio Layer 3, no CRC (MPEG 2.5)
+    b"\xff\xf2",  # MPEG Audio Layer 3, CRC (MPEG 2.5)
+)
+
+# =============================================================================
+# Model Constants
+# =============================================================================
+
+# Model label mappings
+LABEL_TO_ID: dict[str, int] = {
+    "HUMAN": 0,
+    "AI_GENERATED": 1,
+}
+
+ID_TO_LABEL: dict[int, str] = {
+    0: "HUMAN",
+    1: "AI_GENERATED",
+}
+
+# Classification thresholds
+CONFIDENCE_THRESHOLD_HIGH: float = 0.85
+CONFIDENCE_THRESHOLD_MEDIUM: float = 0.65
+CONFIDENCE_THRESHOLD_LOW: float = 0.50
+
+# =============================================================================
+# Explainability Constants
+# =============================================================================
+
+# AI-generated voice indicators
+AI_INDICATORS: list[str] = [
+    "unnatural pitch consistency",
+    "robotic speech patterns",
+    "synthetic formant transitions",
+    "irregular breathing patterns",
+    "mechanical prosody",
+    "uniform spectral distribution",
+    "absence of micro-variations",
+    "artificial voice modulation",
+    "synthetic vibrato patterns",
+    "digital compression artifacts",
+]
+
+# Human voice indicators
+HUMAN_INDICATORS: list[str] = [
+    "natural speech variations",
+    "authentic prosody",
+    "organic voice characteristics",
+    "natural breathing patterns",
+    "dynamic pitch modulation",
+    "genuine emotional inflections",
+    "natural micro-pauses",
+    "authentic formant patterns",
+    "organic vibrato characteristics",
+    "natural voice timbre",
+]
+
+# Confidence level descriptors
+CONFIDENCE_DESCRIPTORS: dict[str, str] = {
+    "very_high": "Strong evidence of",
+    "high": "Clear indicators of",
+    "medium": "Likely signs of",
+    "low": "Possible characteristics of",
+}
+
+# =============================================================================
+# API Constants
+# =============================================================================
+
+# Request ID header
+REQUEST_ID_HEADER: str = "X-Request-ID"
+
+# Rate limit headers
+RATE_LIMIT_LIMIT_HEADER: str = "X-RateLimit-Limit"
+RATE_LIMIT_REMAINING_HEADER: str = "X-RateLimit-Remaining"
+RATE_LIMIT_RESET_HEADER: str = "X-RateLimit-Reset"
+
+# API key prefix
+API_KEY_PREFIX: str = "sk_"
diff --git a/app/utils/exceptions.py b/app/utils/exceptions.py
new file mode 100644
index 0000000000000000000000000000000000000000..66b33bab406d42c1c4549b517246564b969d61f9
--- /dev/null
+++ b/app/utils/exceptions.py
@@ -0,0 +1,117 @@
+"""
+Custom exceptions for VoiceAuth API.
+
+Provides specific exception types for different error scenarios.
+"""
+
+
+class VoiceAuthError(Exception):
+    """Base exception for all VoiceAuth errors."""
+
+    def __init__(self, message: str, details: dict | None = None) -> None:
+        """
+        Initialize VoiceAuthError.
+
+        Args:
+            message: Human-readable error message
+            details: Additional error details
+        """
+        super().__init__(message)
+        self.message = message
+        self.details = details or {}
+
+
+class AudioDecodeError(VoiceAuthError):
+    """
+    Raised when Base64 audio decoding fails.
+
+    This typically occurs when:
+    - The input is not valid Base64
+    - The decoded data is corrupted
+    """
+
+    pass
+
+
+class AudioFormatError(VoiceAuthError):
+    """
+    Raised when audio format is invalid or unsupported.
+
+    This typically occurs when:
+    - The audio is not a valid MP3 file
+    - The audio codec is unsupported
+    - The file header is corrupted
+    """
+
+    pass
+
+
+class AudioDurationError(VoiceAuthError):
+    """
+    Raised when audio duration is out of allowed bounds.
+
+    This typically occurs when:
+    - Audio is shorter than minimum duration
+    - Audio is longer than maximum duration
+    """
+
+    def __init__(
+        self,
+        message: str,
+        duration: float | None = None,
+        min_duration: float | None = None,
+        max_duration: float | None = None,
+    ) -> None:
+        """
+        Initialize AudioDurationError.
+
+        Args:
+            message: Human-readable error message
+            duration: Actual duration of the audio
+            min_duration: Minimum allowed duration
+            max_duration: Maximum allowed duration
+        """
+        details = {}
+        if duration is not None:
+            details["duration"] = duration
+        if min_duration is not None:
+            details["min_duration"] = min_duration
+        if max_duration is not None:
+            details["max_duration"] = max_duration
+        super().__init__(message, details)
+
+
+class AudioProcessingError(VoiceAuthError):
+    """
+    Raised when audio processing fails.
+
+    This is a general error for audio processing issues
+    that don't fit into more specific categories.
+    """
+
+    pass
+
+
+class ModelNotLoadedError(VoiceAuthError):
+    """
+    Raised when attempting to use an unloaded model.
+
+    This typically occurs when:
+    - The model failed to load on startup
+    - The model was unloaded due to memory pressure
+    """
+
+    pass
+
+
+class InferenceError(VoiceAuthError):
+    """
+    Raised when ML model inference fails.
+
+    This typically occurs when:
+    - The input tensor is malformed
+    - GPU memory is exhausted
+    - Model internal error
+    """
+
+    pass
diff --git a/app/utils/logger.py b/app/utils/logger.py
new file mode 100644
index 0000000000000000000000000000000000000000..43e65b6f18bf4346e3c8c64e1cbda0b445c2c4b4
--- /dev/null
+++ b/app/utils/logger.py
@@ -0,0 +1,122 @@
+"""
+Structured logging configuration for VoiceAuth API.
+
+Uses structlog for JSON-formatted logs in production
+and human-readable logs in development.
+"""
+
+import logging
+import sys
+from typing import Any
+
+import structlog
+from structlog.types import Processor
+
+from app.config import get_settings
+
+
+def setup_logging() -> None:
+    """
+    Configure structured logging based on application settings.
+
+    Sets up structlog processors for either JSON or console output
+    based on the LOG_FORMAT setting.
+    """
+    settings = get_settings()
+
+    # Convert string log level to logging constant
+    log_level = getattr(logging, settings.LOG_LEVEL.upper(), logging.INFO)
+
+    # Common processors for all output formats
+    shared_processors: list[Processor] = [
+        structlog.contextvars.merge_contextvars,
+        structlog.stdlib.add_log_level,
+        structlog.stdlib.add_logger_name,
+        structlog.stdlib.PositionalArgumentsFormatter(),
+        structlog.processors.TimeStamper(fmt="iso"),
+        structlog.processors.StackInfoRenderer(),
+        structlog.processors.UnicodeDecoder(),
+    ]
+
+    if settings.LOG_FORMAT == "json":
+        # JSON format for production
+        processors: list[Processor] = shared_processors + [
+            structlog.processors.format_exc_info,
+            structlog.processors.JSONRenderer(),
+        ]
+    else:
+        # Console format for development
+        processors = shared_processors + [
+            structlog.dev.ConsoleRenderer(colors=True),
+        ]
+
+    # Configure structlog
+    structlog.configure(
+        processors=processors,
+        wrapper_class=structlog.stdlib.BoundLogger,
+        context_class=dict,
+        logger_factory=structlog.stdlib.LoggerFactory(),
+        cache_logger_on_first_use=True,
+    )
+
+    # Configure standard library logging
+    logging.basicConfig(
+        format="%(message)s",
+        stream=sys.stdout,
+        level=log_level,
+    )
+
+    # Set log levels for noisy libraries
+    logging.getLogger("uvicorn").setLevel(logging.WARNING)
+    logging.getLogger("httpx").setLevel(logging.WARNING)
+    logging.getLogger("httpcore").setLevel(logging.WARNING)
+
+
+def get_logger(name: str | None = None) -> structlog.stdlib.BoundLogger:
+    """
+    Get a configured logger instance.
+
+    Args:
+        name: Optional logger name. If not provided, uses the calling module.
+
+    Returns:
+        A configured structlog bound logger.
+    """
+    return structlog.get_logger(name)
+
+
+class LoggerMixin:
+    """Mixin class that provides a logger property."""
+
+    @property
+    def logger(self) -> structlog.stdlib.BoundLogger:
+        """Get a logger bound to the class name."""
+        return get_logger(self.__class__.__name__)
+
+
+def log_request(
+    method: str,
+    path: str,
+    status_code: int,
+    duration_ms: float,
+    **kwargs: Any,
+) -> None:
+    """
+    Log an HTTP request with structured data.
+
+    Args:
+        method: HTTP method (GET, POST, etc.)
+        path: Request path
+        status_code: Response status code
+        duration_ms: Request duration in milliseconds
+        **kwargs: Additional fields to log
+    """
+    logger = get_logger("http")
+    logger.info(
+        "http_request",
+        method=method,
+        path=path,
+        status_code=status_code,
+        duration_ms=round(duration_ms, 2),
+        **kwargs,
+    )
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..14fea829bea35c22a6a881aed34cdcb99cef486b
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,47 @@
+# =============================================================================
+# VoiceAuth API - Production Dependencies
+# =============================================================================
+
+# -----------------------------------------------------------------------------
+# Web Framework
+# -----------------------------------------------------------------------------
+fastapi>=0.109.0
+uvicorn[standard]>=0.27.0
+pydantic>=2.5.0
+pydantic-settings>=2.1.0
+python-multipart>=0.0.6
+httpx>=0.26.0
+
+# -----------------------------------------------------------------------------
+# Machine Learning
+# -----------------------------------------------------------------------------
+torch>=2.1.0
+torchaudio>=2.1.0
+transformers>=4.36.0
+librosa>=0.10.1
+soundfile>=0.12.1
+numpy>=1.24.0,<2.0.0
+scipy>=1.11.0
+
+# -----------------------------------------------------------------------------
+# Audio Processing
+# -----------------------------------------------------------------------------
+pydub>=0.25.1
+
+# -----------------------------------------------------------------------------
+# Security & Authentication
+# -----------------------------------------------------------------------------
+python-jose[cryptography]>=3.3.0
+
+# -----------------------------------------------------------------------------
+# Rate Limiting & Caching
+# -----------------------------------------------------------------------------
+slowapi>=0.1.9
+redis>=5.0.0
+
+# -----------------------------------------------------------------------------
+# Utilities
+# -----------------------------------------------------------------------------
+python-dotenv>=1.0.0
+structlog>=24.1.0
+tenacity>=8.2.0
diff --git a/scripts/download_model.py b/scripts/download_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..e9aafcac142eceb4c73d4ff564c70e2e05752bce
--- /dev/null
+++ b/scripts/download_model.py
@@ -0,0 +1,139 @@
+"""
+Model Download Script.
+
+Downloads and caches the Wav2Vec2 model for VoiceAuth API.
+"""
+
+import argparse
+import os
+import sys
+from pathlib import Path
+
+# Add parent directory to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+
+def download_model(
+    model_name: str = "facebook/wav2vec2-base",
+    output_dir: str | None = None,
+    force: bool = False,
+) -> None:
+    """
+    Download and cache the Wav2Vec2 model.
+
+    Args:
+        model_name: HuggingFace model name or path
+        output_dir: Optional local directory to save model
+        force: Force re-download even if cached
+    """
+    print("\n" + "=" * 60)
+    print("VoiceAuth - Model Download")
+    print("=" * 60 + "\n")
+    print(f"Model: {model_name}")
+
+    if output_dir:
+        print(f"Output: {output_dir}")
+
+    print("\nDownloading model components...")
+    print("-" * 40)
+
+    try:
+        # Import here to avoid slow imports if just checking args
+        from transformers import Wav2Vec2ForSequenceClassification
+        from transformers import Wav2Vec2Processor
+
+        # Download processor
+        print("\n[1/2] Downloading Wav2Vec2Processor...")
+        processor = Wav2Vec2Processor.from_pretrained(
+            model_name,
+            force_download=force,
+        )
+        print("      [OK] Processor downloaded")
+
+        # Download model
+        print("\n[2/2] Downloading Wav2Vec2ForSequenceClassification...")
+        model = Wav2Vec2ForSequenceClassification.from_pretrained(
+            model_name,
+            num_labels=2,
+            label2id={"HUMAN": 0, "AI_GENERATED": 1},
+            id2label={0: "HUMAN", 1: "AI_GENERATED"},
+            force_download=force,
+        )
+        print("      [OK] Model downloaded")
+
+        # Save to local directory if specified
+        if output_dir:
+            output_path = Path(output_dir)
+            output_path.mkdir(parents=True, exist_ok=True)
+
+            print(f"\nSaving to {output_path}...")
+            processor.save_pretrained(output_path)
+            model.save_pretrained(output_path)
+            print("[OK] Model saved locally")
+
+        print("\n" + "=" * 60)
+        print("Download Complete!")
+        print("=" * 60)
+
+        # Show cache location
+        cache_dir = os.environ.get(
+            "HF_HOME",
+            os.path.expanduser("~/.cache/huggingface"),
+        )
+        print(f"\nCache location: {cache_dir}")
+
+        if output_dir:
+            print(f"Local copy: {output_dir}")
+
+        print("\nYou can now start the API with:")
+        print("  uvicorn app.main:app --reload")
+        print()
+
+    except Exception as e:
+        print(f"\n[ERROR] Error downloading model: {e}")
+        sys.exit(1)
+
+
+def main() -> None:
+    """Main entry point."""
+    parser = argparse.ArgumentParser(
+        description="Download Wav2Vec2 model for VoiceAuth API",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python download_model.py
+  python download_model.py --model facebook/wav2vec2-large-xlsr-53
+  python download_model.py --output ./models
+  python download_model.py --force
+        """,
+    )
+
+    parser.add_argument(
+        "--model",
+        type=str,
+        default="facebook/wav2vec2-base",
+        help="HuggingFace model name (default: facebook/wav2vec2-base)",
+    )
+    parser.add_argument(
+        "--output",
+        type=str,
+        default=None,
+        help="Optional local directory to save model",
+    )
+    parser.add_argument(
+        "--force",
+        action="store_true",
+        help="Force re-download even if cached",
+    )
+
+    args = parser.parse_args()
+
+    download_model(
+        model_name=args.model,
+        output_dir=args.output,
+        force=args.force,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/generate_api_key.py b/scripts/generate_api_key.py
new file mode 100644
index 0000000000000000000000000000000000000000..59535d1d44046621a402a61205f213ba031ddf66
--- /dev/null
+++ b/scripts/generate_api_key.py
@@ -0,0 +1,140 @@
+"""
+API Key Generation Script.
+
+Generates secure API keys for VoiceAuth API authentication.
+"""
+
+import argparse
+import secrets
+import sys
+from pathlib import Path
+
+
+def generate_api_key(prefix: str = "sk_", length: int = 48) -> str:
+    """
+    Generate a secure API key.
+
+    Args:
+        prefix: Prefix for the API key (default: "sk_")
+        length: Length of the random portion in characters (default: 48)
+
+    Returns:
+        Generated API key string
+    """
+    # Generate random hex bytes (length/2 bytes = length hex chars)
+    random_part = secrets.token_hex(length // 2)
+    return f"{prefix}{random_part}"
+
+
+def main() -> None:
+    """Main entry point for API key generation."""
+    parser = argparse.ArgumentParser(
+        description="Generate secure API keys for VoiceAuth API",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python generate_api_key.py
+  python generate_api_key.py --prefix sk_live_
+  python generate_api_key.py --count 5
+  python generate_api_key.py --save
+        """,
+    )
+
+    parser.add_argument(
+        "--prefix",
+        type=str,
+        default="sk_",
+        help="Prefix for the API key (default: sk_)",
+    )
+    parser.add_argument(
+        "--length",
+        type=int,
+        default=48,
+        help="Length of random portion (default: 48)",
+    )
+    parser.add_argument(
+        "--count",
+        type=int,
+        default=1,
+        help="Number of keys to generate (default: 1)",
+    )
+    parser.add_argument(
+        "--save",
+        action="store_true",
+        help="Save to .env file (appends if exists)",
+    )
+
+    args = parser.parse_args()
+
+    # Generate keys
+    keys = [generate_api_key(args.prefix, args.length) for _ in range(args.count)]
+
+    # Display keys
+    print("\n" + "=" * 60)
+    print("VoiceAuth API - Generated API Keys")
+    print("=" * 60 + "\n")
+
+    for i, key in enumerate(keys, 1):
+        if args.count > 1:
+            print(f"Key {i}: {key}")
+        else:
+            print(f"API Key: {key}")
+
+    print("\n" + "-" * 60)
+    print("Usage Instructions:")
+    print("-" * 60)
+    print("\n1. Add to your .env file:")
+    print(f"   API_KEYS={','.join(keys)}")
+    print("\n2. Use in requests:")
+    print(f'   curl -H "x-api-key: {keys[0]}" ...')
+    print("\n" + "=" * 60 + "\n")
+
+    # Save to .env if requested
+    if args.save:
+        env_path = Path(__file__).parent.parent / ".env"
+
+        # Check if .env exists
+        if env_path.exists():
+            # Read existing content
+            with open(env_path, "r") as f:
+                content = f.read()
+
+            # Check if API_KEYS already exists
+            if "API_KEYS=" in content:
+                print(f"[WARNING] API_KEYS already exists in {env_path}")
+                print("   Please update it manually or remove the existing entry.")
+                sys.exit(1)
+
+            # Append new keys
+            with open(env_path, "a") as f:
+                f.write(f"\nAPI_KEYS={','.join(keys)}\n")
+
+            print(f"[OK] API keys appended to {env_path}")
+        else:
+            # Create new .env from template
+            template_path = Path(__file__).parent.parent / ".env.example"
+
+            if template_path.exists():
+                with open(template_path, "r") as f:
+                    content = f.read()
+
+                # Replace placeholder
+                content = content.replace(
+                    "API_KEYS=sk_test_your_api_key_here",
+                    f"API_KEYS={','.join(keys)}",
+                )
+
+                with open(env_path, "w") as f:
+                    f.write(content)
+
+                print(f"[OK] Created {env_path} with generated API keys")
+            else:
+                # Just create minimal .env
+                with open(env_path, "w") as f:
+                    f.write(f"API_KEYS={','.join(keys)}\n")
+
+                print(f"[OK] Created {env_path} with API keys")
+
+
+if __name__ == "__main__":
+    main()