Spaces:

Islamckennon
/

mirage

Paused

App Files Files Community

MacBook pro commited on Sep 19, 2025

Commit

d876213

1 Parent(s): 9f8524f

feat(docker): switch to Docker Space GPU runtime; prod WebRTC (aiortc) flow; remove legacy WS; token auth; instrumentation p50/p95; requirements harden

Browse files

Files changed (18) hide show

.gitignore +9 -2
Dockerfile +43 -9
README.md +4 -4
avatar_pipeline.py +13 -4
deploy.sh +130 -0
fastapi_app.py +194 -351
models/hubert/.gitkeep +0 -0
models/rmvpe/.gitkeep +0 -0
models/rvc/.gitkeep +0 -0
original_fastapi_app.py +290 -0
requirements.txt +9 -19
requirements_old.txt +38 -0
static/app.js +4 -1
static/index.html +13 -48
static/webrtc_client.js +4 -0
static/webrtc_prod.js +117 -0
test_system.py +380 -0
webrtc_server.py +444 -0

.gitignore CHANGED Viewed

@@ -19,7 +19,14 @@ Thumbs.db
 pip-wheel-metadata/
 .cache/
 coverage/
-models/
-!models/.gitkeep
 checkpoints/
 !checkpoints/.gitkeep

 pip-wheel-metadata/
 .cache/
 coverage/
+# Models: keep directory structure but ignore large weight/binary artifacts
+# (Allow .gitkeep, README, *.md, *.txt for documentation.)
+models/**/*.{pt,pth,bin,onnx,safetensors}
+models/**/*.npz
+models/**/*.ckpt
+!models/**/*.gitkeep
+!models/**/README.md
+!models/**/README.txt
+!models/**/README
 checkpoints/
 !checkpoints/.gitkeep

Dockerfile CHANGED Viewed

@@ -1,20 +1,54 @@
-FROM pytorch/pytorch:2.3.1-cuda12.1-cudnn8-runtime
-# Ensure consistent locale & no bytecode caching overhead
-ENV PYTHONUNBUFFERED=1 \
-	PYTHONDONTWRITEBYTECODE=1
 WORKDIR /app
-# Install only Python dependencies (already includes CUDA libs via base image)
 COPY requirements.txt ./
-RUN pip install --no-cache-dir -r requirements.txt \
-	&& pip cache purge || true
 # Copy application source
 COPY . /app
 EXPOSE 7860
-# Default command (can be overridden by Space / docker run)
-CMD ["uvicorn","app:app","--host","0.0.0.0","--port","7860"]

+## Docker runtime for Hugging Face GPU Space (A10G) in Docker mode
+## Single-stage image on Ubuntu 22.04 (Python 3.10) with CUDA 12.1 + cuDNN 8
+FROM nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04
+ENV DEBIAN_FRONTEND=noninteractive \
+    PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    CUDA_CACHE_PATH=/tmp/cuda_cache \
+    TORCH_CUDA_ARCH_LIST="8.6" \
+    CUDA_LAUNCH_BLOCKING=0 \
+    CUDA_VISIBLE_DEVICES=0
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    python3 \
+    python3-pip \
+    python3-dev \
+    build-essential \
+    git \
+    curl \
+    ffmpeg \
+    libsm6 \
+    libxext6 \
+    libgl1 \
+    libglib2.0-0 \
+    && rm -rf /var/lib/apt/lists/*
 WORKDIR /app
+# Install PyTorch with CUDA 12.1 first to avoid resolver overriding
+RUN pip3 install --no-cache-dir --upgrade pip wheel setuptools \
+ && pip3 install --no-cache-dir \
+    torch==2.3.1+cu121 \
+    torchaudio==2.3.1+cu121 \
+    --index-url https://download.pytorch.org/whl/cu121
+# Copy requirements and install remaining Python dependencies
 COPY requirements.txt ./
+RUN pip3 install --no-cache-dir -r requirements.txt
 # Copy application source
 COPY . /app
+# Create directories for models and checkpoints (if not already present)
+RUN mkdir -p /app/models/liveportrait /app/models/rvc /app/models/hubert /app/models/rmvpe /app/checkpoints /tmp/cuda_cache
+# Expose HTTP port
 EXPOSE 7860
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
+  CMD curl -fsS http://localhost:7860/health || exit 1
+# Run FastAPI app with uvicorn (WebRTC endpoints + static UI)
+CMD ["uvicorn", "original_fastapi_app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -3,7 +3,7 @@ title: Mirage Real-time AI Avatar
 emoji: 🎭
 colorFrom: blue
 colorTo: purple
-sdk: gradio
 sdk_version: 4.44.0
 app_file: app.py
 pinned: false
@@ -49,7 +49,7 @@ Transform yourself into an AI avatar in real-time with sub-250ms latency! Perfec
 - **Face Animation**: LivePortrait (KwaiVGI)
 - **Voice Conversion**: RVC (Retrieval-based Voice Conversion)
 - **Face Detection**: SCRFD with optimized inference
-- **Backend**: FastAPI with WebSocket streaming
 - **Frontend**: WebRTC-enabled real-time client
 - **GPU**: NVIDIA A10G with CUDA optimization
@@ -130,10 +130,10 @@ The system automatically adapts quality based on performance:
 ## 🛠️ Development
 Built with modern technologies:
-- FastAPI for high-performance backend
 - PyTorch with CUDA acceleration
 - OpenCV for image processing
-- WebSocket for real-time communication
 - Docker for consistent deployment
 ## 📄 License

 emoji: 🎭
 colorFrom: blue
 colorTo: purple
+sdk: docker
 sdk_version: 4.44.0
 app_file: app.py
 pinned: false
 - **Face Animation**: LivePortrait (KwaiVGI)
 - **Voice Conversion**: RVC (Retrieval-based Voice Conversion)
 - **Face Detection**: SCRFD with optimized inference
+- **Backend**: FastAPI with WebRTC (aiortc)
 - **Frontend**: WebRTC-enabled real-time client
 - **GPU**: NVIDIA A10G with CUDA optimization
 ## 🛠️ Development
 Built with modern technologies:
+- FastAPI for high-performance backend (Docker entrypoint: uvicorn original_fastapi_app:app)
 - PyTorch with CUDA acceleration
 - OpenCV for image processing
+- WebRTC (aiortc) for real-time media transport
 - Docker for consistent deployment
 ## 📄 License

avatar_pipeline.py CHANGED Viewed

@@ -434,12 +434,21 @@ class RealTimeAvatarPipeline:
             opt_stats = self.optimizer.get_comprehensive_stats()
             # Basic pipeline stats
             pipeline_stats = {
                 "video_fps": len(video_times) / max(sum(video_times) / 1000, 0.001) if video_times else 0,
-                "avg_video_latency_ms": np.mean(video_times) if video_times else 0,
-                "avg_audio_latency_ms": np.mean(audio_times) if audio_times else 0,
-                "max_video_latency_ms": np.max(video_times) if video_times else 0,
-                "max_audio_latency_ms": np.max(audio_times) if audio_times else 0,
                 "models_loaded": self.loaded,
                 "gpu_available": torch.cuda.is_available(),
                 "gpu_memory_used": torch.cuda.memory_allocated() / 1024**3 if torch.cuda.is_available() else 0,

             opt_stats = self.optimizer.get_comprehensive_stats()
             # Basic pipeline stats
+            def _percentile(arr, p):
+                if not arr:
+                    return 0
+                return float(np.percentile(np.array(arr), p))
             pipeline_stats = {
                 "video_fps": len(video_times) / max(sum(video_times) / 1000, 0.001) if video_times else 0,
+                "avg_video_latency_ms": float(np.mean(video_times)) if video_times else 0,
+                "p50_video_latency_ms": _percentile(video_times, 50),
+                "p95_video_latency_ms": _percentile(video_times, 95),
+                "avg_audio_latency_ms": float(np.mean(audio_times)) if audio_times else 0,
+                "p50_audio_latency_ms": _percentile(audio_times, 50),
+                "p95_audio_latency_ms": _percentile(audio_times, 95),
+                "max_video_latency_ms": float(np.max(video_times)) if video_times else 0,
+                "max_audio_latency_ms": float(np.max(audio_times)) if audio_times else 0,
                 "models_loaded": self.loaded,
                 "gpu_available": torch.cuda.is_available(),
                 "gpu_memory_used": torch.cuda.memory_allocated() / 1024**3 if torch.cuda.is_available() else 0,

deploy.sh ADDED Viewed

	@@ -0,0 +1,130 @@

+#!/bin/bash
+# Deployment script for Mirage Real-time AI Avatar System
+set -e
+echo "🎭 Mirage Real-time AI Avatar - Deployment Script"
+echo "=================================================="
+# Check if we're deploying to HuggingFace Spaces
+if [[ "${SPACE_ID}" ]]; then
+    echo "📡 Deploying to HuggingFace Spaces: ${SPACE_ID}"
+    DEPLOYMENT_TARGET="huggingface"
+else
+    echo "🐳 Local Docker deployment"
+    DEPLOYMENT_TARGET="local"
+fi
+# Set environment variables for optimal A10G performance
+export CUDA_VISIBLE_DEVICES=0
+export TORCH_CUDA_ARCH_LIST="8.6"  # A10G architecture
+export CUDA_LAUNCH_BLOCKING=0
+export MIRAGE_VOICE_ENABLE=1
+export MIRAGE_CHUNK_MS=160
+export MIRAGE_VIDEO_MAX_FPS=20
+echo "🔧 Environment configured for A10G GPU"
+# Download required models
+echo "📥 Downloading AI models..."
+# Create model directories
+mkdir -p models/{liveportrait,rvc,hubert,rmvpe}
+mkdir -p checkpoints
+# Function to download from HuggingFace with retry
+download_hf_model() {
+    local repo=$1
+    local filename=$2
+    local output_dir=$3
+    local max_retries=3
+    local retry_count=0
+    while [ $retry_count -lt $max_retries ]; do
+        if python3 -c "
+from huggingface_hub import hf_hub_download
+import os
+try:
+    hf_hub_download('$repo', '$filename', local_dir='$output_dir', local_dir_use_symlinks=False)
+    print('✅ Downloaded $filename')
+except Exception as e:
+    print(f'❌ Failed to download $filename: {e}')
+    exit(1)
+        "; then
+            break
+        fi
+        retry_count=$((retry_count + 1))
+        echo "⏳ Retry $retry_count/$max_retries for $filename"
+        sleep 2
+    done
+    if [ $retry_count -eq $max_retries ]; then
+        echo "❌ Failed to download $filename after $max_retries retries"
+        return 1
+    fi
+}
+# Download LivePortrait models (if available)
+if python3 -c "from huggingface_hub import HfApi; api = HfApi(); print('✅ HuggingFace available')" 2>/dev/null; then
+    echo "🎨 Attempting to download LivePortrait models..."
+    # Note: These would be the actual model files when available
+    # download_hf_model "KwaiVGI/LivePortrait" "appearance_feature_extractor.pth" "models/liveportrait"
+    # download_hf_model "KwaiVGI/LivePortrait" "motion_extractor.pth" "models/liveportrait"
+    # download_hf_model "KwaiVGI/LivePortrait" "warping_module.pth" "models/liveportrait"
+    # download_hf_model "KwaiVGI/LivePortrait" "spade_generator.pth" "models/liveportrait"
+    echo "ℹ️ LivePortrait models will be downloaded on first use"
+else
+    echo "⚠️ HuggingFace Hub not available, models will be downloaded at runtime"
+fi
+# Verify GPU availability
+echo "🔍 Checking GPU configuration..."
+python3 -c "
+import torch
+print(f'PyTorch version: {torch.__version__}')
+print(f'CUDA available: {torch.cuda.is_available()}')
+if torch.cuda.is_available():
+    print(f'GPU: {torch.cuda.get_device_name(0)}')
+    print(f'CUDA version: {torch.version.cuda}')
+    print(f'GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB')
+else:
+    print('⚠️ GPU not available - running in CPU mode')
+"
+# Setup virtual camera (Linux only)
+if [[ "$OSTYPE" == "linux-gnu"* ]]; then
+    echo "📹 Setting up virtual camera (v4l2loopback)..."
+    # Check if v4l2loopback is available
+    if modprobe v4l2loopback devices=1 video_nr=10 card_label="Mirage Virtual Camera" 2>/dev/null; then
+        echo "✅ Virtual camera device created: /dev/video10"
+    else
+        echo "⚠️ Could not create virtual camera device (requires sudo)"
+        echo "💡 Run: sudo modprobe v4l2loopback devices=1 video_nr=10 card_label='Mirage Virtual Camera'"
+    fi
+fi
+# Start the application
+echo "🚀 Starting Mirage AI Avatar System..."
+if [[ "${DEPLOYMENT_TARGET}" == "huggingface" ]]; then
+    # HuggingFace Spaces deployment
+    echo "🤗 Running on HuggingFace Spaces with A10G GPU"
+    exec python3 -u app.py
+else
+    # Local deployment
+    echo "💻 Running locally"
+    # Check if port 7860 is available
+    if lsof -Pi :7860 -sTCP:LISTEN -t >/dev/null; then
+        echo "⚠️ Port 7860 is already in use"
+        PORT=7861
+    else
+        PORT=7860
+    fi
+    echo "🌐 Server will be available at: http://localhost:${PORT}"
+    export PORT=${PORT}
+    exec python3 -u app.py
+fi

fastapi_app.py CHANGED Viewed

@@ -1,368 +1,211 @@
-from fastapi import FastAPI, WebSocket, WebSocketDisconnect, HTTPException, File, UploadFile
-from fastapi.responses import HTMLResponse, JSONResponse
-from fastapi.staticfiles import StaticFiles
-from pathlib import Path
-import traceback
 import time
-import array
-import subprocess
-import json
 import os
-import asyncio
-import numpy as np
-import cv2
-from typing import Any, Dict, List
-from metrics import metrics as _metrics_singleton, Metrics
-from config import config
-from voice_processor import voice_processor
-from avatar_pipeline import get_pipeline
-app = FastAPI(title="Mirage Real-time AI Avatar System")
-# Initialize AI pipeline
-pipeline = get_pipeline()
-pipeline_initialized = False
-# Potentially reconfigure metrics based on config
-if config.metrics_fps_window != 30:  # default in metrics module
-    metrics = Metrics(fps_window=config.metrics_fps_window)
-else:
-    metrics = _metrics_singleton
-# Mount the static directory
-static_dir = Path(__file__).parent / "static"
-app.mount("/static", StaticFiles(directory=str(static_dir)), name="static")
-@app.get("/", response_class=HTMLResponse)
-async def root():
-    """Serve the static/index.html file contents as HTML."""
-    index_path = static_dir / "index.html"
-    try:
-        content = index_path.read_text(encoding="utf-8")
-    except FileNotFoundError:
-        # Minimal fallback to satisfy route even if file not yet present.
-        content = "<html><body><h1>Mirage AI Avatar System</h1><p>Real-time AI avatar with face animation and voice conversion.</p></body></html>"
-    return HTMLResponse(content)
-@app.get("/health")
-async def health():
-    return {
-        "status": "ok",
-        "system": "real-time-ai-avatar",
-        "pipeline_loaded": pipeline_initialized,
-        "gpu_available": pipeline.config.device == "cuda"
-    }
-@app.post("/initialize")
-async def initialize_pipeline():
-    """Initialize the AI pipeline"""
-    global pipeline_initialized
-    if pipeline_initialized:
-        return {"status": "already_initialized", "message": "Pipeline already loaded"}
-    try:
-        success = await pipeline.initialize()
-        if success:
-            pipeline_initialized = True
-            return {"status": "success", "message": "Pipeline initialized successfully"}
-        else:
-            return {"status": "error", "message": "Failed to initialize pipeline"}
-    except Exception as e:
-        return {"status": "error", "message": f"Initialization error: {str(e)}"}
-@app.post("/set_reference")
-async def set_reference_image(file: UploadFile = File(...)):
-    """Set reference image for avatar"""
-    global pipeline_initialized
-    if not pipeline_initialized:
-        raise HTTPException(status_code=400, detail="Pipeline not initialized")
-    try:
-        # Read uploaded image
-        contents = await file.read()
-        nparr = np.frombuffer(contents, np.uint8)
-        frame = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
-        if frame is None:
-            raise HTTPException(status_code=400, detail="Invalid image format")
-        # Set as reference frame
-        success = pipeline.set_reference_frame(frame)
-        if success:
-            return {"status": "success", "message": "Reference image set successfully"}
-        else:
-            return {"status": "error", "message": "No suitable face found in image"}
-    except Exception as e:
-        return {"status": "error", "message": f"Error setting reference: {str(e)}"}
-# Frame counter for processing
-frame_counter = 0
-async def _process_websocket(websocket: WebSocket, kind: str):
-    """Enhanced WebSocket handler with AI processing"""
-    global frame_counter, pipeline_initialized
-    await websocket.accept()
-    last_ts = time.time() * 1000.0 if kind == "audio" else None
-    while True:
         try:
-            data = await websocket.receive_bytes()
-            size = len(data)
-            if kind == "audio":
-                now = time.time() * 1000.0
-                interval = None
-                if last_ts is not None:
-                    interval = now - last_ts
-                infer_ms = None
-                # Convert raw bytes -> int16 array for processing path
-                pcm_int16 = array.array('h')
-                pcm_int16.frombytes(data)
-                if config.voice_enable and pipeline_initialized:
-                    # AI voice conversion
-                    audio_np = np.array(pcm_int16, dtype=np.int16)
-                    processed_audio = pipeline.process_audio_chunk(audio_np)
-                    data = processed_audio.astype(np.int16).tobytes()
-                    infer_ms = 50  # Placeholder timing
-                elif config.voice_enable:
-                    # Fallback to voice processor
-                    processed_view, infer_ms = voice_processor.process_pcm_int16(pcm_int16.tobytes(), sample_rate=16000)
-                    data = processed_view.tobytes()
-                else:
-                    # Pass-through
-                    data = pcm_int16.tobytes()
-                metrics.record_audio_chunk(size_bytes=size, loop_interval_ms=interval, infer_time_ms=infer_ms)
-                last_ts = now
-            elif kind == "video":
-                if pipeline_initialized:
-                    try:
-                        # Decode JPEG frame
-                        nparr = np.frombuffer(data, np.uint8)
-                        frame = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
-                        if frame is not None:
-                            # AI face animation
-                            processed_frame = pipeline.process_video_frame(frame, frame_counter)
-                            frame_counter += 1
-                            # Encode back to JPEG
-                            _, encoded = cv2.imencode('.jpg', processed_frame, [cv2.IMWRITE_JPEG_QUALITY, 65])
-                            data = encoded.tobytes()
-                    except Exception as e:
-                        print(f"Video processing error: {e}")
-                        # Fallback to original data
-                        pass
-                metrics.record_video_frame(size_bytes=size)
-            # Send processed data back
-            await websocket.send_bytes(data)
-        except WebSocketDisconnect:
-            break
-        except Exception:
-            print(f"[{kind} ws] Unexpected error:")
-            traceback.print_exc()
-            break
-@app.websocket("/audio")
-async def audio_ws(websocket: WebSocket):
-    await _process_websocket(websocket, "audio")
-@app.websocket("/video")
-async def video_ws(websocket: WebSocket):
-    await _process_websocket(websocket, "video")
-@app.get("/metrics")
-async def get_metrics():
-    base_metrics = metrics.snapshot()
-    # Add AI pipeline metrics if available
-    if pipeline_initialized:
-        pipeline_stats = pipeline.get_performance_stats()
-        base_metrics.update({
-            "ai_pipeline": pipeline_stats
-        })
-    return base_metrics
-@app.get("/pipeline_status")
-async def get_pipeline_status():
-    """Get detailed pipeline status"""
-    if not pipeline_initialized:
-        return {
-            "initialized": False,
-            "message": "Pipeline not initialized"
-        }
-    try:
-        stats = pipeline.get_performance_stats()
-        return {
-            "initialized": True,
-            "stats": stats,
-            "reference_set": pipeline.reference_frame is not None
         }
-    except Exception as e:
-        return {
-            "initialized": False,
-            "error": str(e)
         }
-@app.get("/gpu")
-async def gpu_info():
-    """Return basic GPU availability and memory statistics.
-    Priority order:
-    1. torch (if installed and CUDA available) for detailed stats per device.
-    2. nvidia-smi (if executable present) for name/total/used.
-    3. Fallback: available false.
-    """
-    # Response scaffold
-    resp: Dict[str, Any] = {
-        "available": False,
-        "provider": None,
-        "device_count": 0,
-        "devices": [],  # type: ignore[list-item]
-    }
-    # Try torch first (lazy import)
-    try:
-        import torch  # type: ignore
-        if torch.cuda.is_available():
-            resp["available"] = True
-            resp["provider"] = "torch"
-            count = torch.cuda.device_count()
-            resp["device_count"] = count
-            devices: List[Dict[str, Any]] = []
-            for idx in range(count):
-                name = torch.cuda.get_device_name(idx)
-                try:
-                    free_bytes, total_bytes = torch.cuda.mem_get_info(idx)  # type: ignore[arg-type]
-                except TypeError:
-                    # Older PyTorch versions take no index
-                    free_bytes, total_bytes = torch.cuda.mem_get_info()
-                allocated = torch.cuda.memory_allocated(idx)
-                reserved = torch.cuda.memory_reserved(idx)
-                # Estimate free including unallocated reserved as reclaimable
-                est_free = free_bytes + max(reserved - allocated, 0)
-                to_mb = lambda b: round(b / (1024 * 1024), 2)
-                devices.append({
-                    "index": idx,
-                    "name": name,
-                    "total_mb": to_mb(total_bytes),
-                    "allocated_mb": to_mb(allocated),
-                    "reserved_mb": to_mb(reserved),
-                    "free_mem_get_info_mb": to_mb(free_bytes),
-                    "free_estimate_mb": to_mb(est_free),
-                })
-            resp["devices"] = devices
-            return resp
-    except Exception:  # noqa: BLE001
-        # Torch not installed or failed; fall through to nvidia-smi
-        pass
-    # Try nvidia-smi fallback
-    try:
-        cmd = [
-            "nvidia-smi",
-            "--query-gpu=name,memory.total,memory.used",
-            "--format=csv,noheader,nounits",
-        ]
-        out = subprocess.check_output(cmd, stderr=subprocess.STDOUT, timeout=2).decode("utf-8").strip()
-        lines = [l for l in out.splitlines() if l.strip()]
-        if lines:
-            resp["available"] = True
-            resp["provider"] = "nvidia-smi"
-            resp["device_count"] = len(lines)
-            devices: List[Dict[str, Any]] = []
-            for idx, line in enumerate(lines):
-                # Expect: name, total, used
-                parts = [p.strip() for p in line.split(',')]
-                if len(parts) >= 3:
-                    name, total_str, used_str = parts[:3]
-                    try:
-                        total = float(total_str)
-                        used = float(used_str)
-                        free = max(total - used, 0)
-                    except ValueError:
-                        total = used = free = 0.0
-                    devices.append({
-                        "index": idx,
-                        "name": name,
-                        "total_mb": total,
-                        "allocated_mb": used,  # approximate
-                        "reserved_mb": None,
-                        "free_estimate_mb": free,
-                    })
-            resp["devices"] = devices
-            return resp
-    except Exception:  # noqa: BLE001
-        pass
-    return resp
-@app.on_event("startup")
-async def log_config():
-    # Enhanced startup logging: core config + GPU availability summary.
-    cfg = config.as_dict()
-    # GPU probe (reuse gpu_info logic minimally without full device list to keep log concise)
-    gpu_available = False
-    gpu_name = None
-    try:
-        import torch  # type: ignore
-        if torch.cuda.is_available():
-            gpu_available = True
-            gpu_name = torch.cuda.get_device_name(0)
-        else:
-            # Fallback quick nvidia-smi single line
-            try:
-                out = subprocess.check_output([
-                    "nvidia-smi", "--query-gpu=name", "--format=csv,noheader,nounits"
-                ], stderr=subprocess.STDOUT, timeout=1).decode("utf-8").strip().splitlines()
-                if out:
-                    gpu_available = True
-                    gpu_name = out[0].strip()
-            except Exception:  # noqa: BLE001
-                pass
-    except Exception:  # noqa: BLE001
-        pass
-    # Honor dynamic PORT if provided (HF Spaces usually fixed at 7860 for docker, but logging helps debugging)
-    listen_port = int(os.getenv("PORT", "7860"))
-    startup_line = {
-        "chunk_ms": cfg.get("chunk_ms"),
-        "voice_enabled": cfg.get("voice_enable"),
-        "metrics_fps_window": cfg.get("metrics_fps_window"),
-        "video_fps_limit": cfg.get("video_max_fps"),
-        "port": listen_port,
-        "gpu_available": gpu_available,
-        "gpu_name": gpu_name,
-    }
-    print("[startup]", startup_line)
-# Note: The Dockerfile / README launch with: uvicorn app:app --port 7860
-if __name__ == "__main__":  # Optional direct run helper
-    import uvicorn  # type: ignore
-    uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)

+#!/usr/bin/env python3
+"""
+Gradio interface for Mirage AI Avatar System
+Wraps the existing FastAPI application for HuggingFace Spaces deployment
+"""
+import gradio as gr
+import asyncio
+import threading
+import uvicorn
 import time
+import requests
 import os
+import sys
+from pathlib import Path
+# Add current directory to path for imports
+sys.path.append(str(Path(__file__).parent))
+# Import our existing app
+from fastapi_app import app as fastapi_app
+class MirageInterface:
+    def __init__(self):
+        self.server_port = 7860  # Gradio default port
+        self.fastapi_port = 8000
+        self.server_thread = None
+        self.server_running = False
+    def start_fastapi_server(self):
+        """Start the FastAPI server in background"""
+        try:
+            uvicorn.run(
+                fastapi_app,
+                host="0.0.0.0",
+                port=self.fastapi_port,
+                log_level="info"
+            )
+        except Exception as e:
+            print(f"FastAPI server error: {e}")
+    def initialize_system(self):
+        """Initialize the AI pipeline"""
         try:
+            response = requests.post(f"http://localhost:{self.fastapi_port}/initialize")
+            if response.status_code == 200:
+                return "✅ AI Pipeline initialized successfully!"
+            else:
+                return f"❌ Initialization failed: {response.text}"
+        except Exception as e:
+            return f"❌ Connection error: {str(e)}"
+    def upload_reference_image(self, image):
+        """Upload reference image for avatar"""
+        if image is None:
+            return "❌ Please upload an image first"
+        try:
+            # Save uploaded image temporarily
+            image_path = "/tmp/reference_image.jpg"
+            image.save(image_path)
+            with open(image_path, "rb") as f:
+                files = {"file": f}
+                response = requests.post(
+                    f"http://localhost:{self.fastapi_port}/set_reference",
+                    files=files
+                )
+            if response.status_code == 200:
+                return "✅ Reference image uploaded successfully!"
+            else:
+                return f"❌ Upload failed: {response.text}"
+        except Exception as e:
+            return f"❌ Upload error: {str(e)}"
+    def get_system_status(self):
+        """Get current system status"""
+        try:
+            response = requests.get(f"http://localhost:{self.fastapi_port}/health")
+            if response.status_code == 200:
+                data = response.json()
+                return f"🟢 System Status: {data.get('status', 'Unknown')}"
+            else:
+                return "🔴 System offline"
+        except:
+            return "🔴 Cannot connect to system"
+def create_interface():
+    """Create the Gradio interface"""
+    mirage = MirageInterface()
+    # Start FastAPI server in background thread
+    server_thread = threading.Thread(target=mirage.start_fastapi_server, daemon=True)
+    server_thread.start()
+    # Wait a moment for server to start
+    time.sleep(2)
+    with gr.Blocks(
+        title="Mirage AI Avatar System",
+        theme=gr.themes.Soft(),
+        css="""
+        .gradio-container {
+            font-family: 'Arial', sans-serif;
         }
+        .main-header {
+            text-align: center;
+            background: linear-gradient(45deg, #667eea 0%, #764ba2 100%);
+            -webkit-background-clip: text;
+            -webkit-text-fill-color: transparent;
+            font-size: 2.5em;
+            font-weight: bold;
+            margin-bottom: 20px;
         }
+        """
+    ) as interface:
+        gr.HTML('<h1 class="main-header">🎭 Mirage AI Avatar System</h1>')
+        gr.Markdown("""
+        **Real-time AI Avatar with Face Animation & Voice Conversion**
+        Transform your appearance and voice in real-time for video calls. Built with LivePortrait and RVC.
+        """)
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown("## 📋 System Setup")
+                init_btn = gr.Button("🚀 Initialize AI Pipeline", variant="primary")
+                init_status = gr.Textbox(label="Initialization Status", interactive=False)
+                gr.Markdown("## 🖼️ Reference Image")
+                reference_image = gr.Image(
+                    label="Upload your reference photo",
+                    type="pil",
+                    height=300
+                )
+                upload_btn = gr.Button("📤 Set Reference Image", variant="secondary")
+                upload_status = gr.Textbox(label="Upload Status", interactive=False)
+            with gr.Column(scale=2):
+                gr.Markdown("## 🎥 Live Avatar Interface")
+                gr.HTML(f"""
+                <iframe
+                    src="http://localhost:{mirage.fastapi_port}/"
+                    width="100%"
+                    height="600px"
+                    frameborder="0"
+                    style="border-radius: 10px; border: 2px solid #ddd;">
+                </iframe>
+                """)
+                status_btn = gr.Button("🔍 Check System Status")
+                system_status = gr.Textbox(label="System Status", interactive=False)
+        gr.Markdown("""
+        ## 🎯 How to Use
+        1. **Initialize**: Click "Initialize AI Pipeline" and wait for confirmation
+        2. **Reference**: Upload a clear photo of the person you want to become
+        3. **Setup**: Click "Set Reference Image" to configure your avatar
+        4. **Go Live**: Use the interface above to start your camera and see your AI avatar!
+        ## 🚀 Features
+        - **Real-time Processing**: <250ms latency for smooth interaction
+        - **Face Animation**: Powered by LivePortrait technology
+        - **Voice Conversion**: RVC-based voice transformation
+        - **GPU Accelerated**: Optimized for NVIDIA A10G hardware
+        - **Virtual Camera**: Ready for Zoom, Teams, Discord integration
+        ## ⚙️ Technical Details
+        - **Backend**: FastAPI with WebSocket streaming
+        - **Models**: InsightFace + LivePortrait + RVC
+        - **Hardware**: NVIDIA A10G GPU with CUDA 12.1
+        - **Performance**: 20 FPS video, 160ms audio chunks
+        """)
+        # Event handlers
+        init_btn.click(
+            fn=mirage.initialize_system,
+            outputs=init_status
+        )
+        upload_btn.click(
+            fn=mirage.upload_reference_image,
+            inputs=reference_image,
+            outputs=upload_status
+        )
+        status_btn.click(
+            fn=mirage.get_system_status,
+            outputs=system_status
+        )
+    return interface
+if __name__ == "__main__":
+    # Create and launch the interface
+    interface = create_interface()
+    # Launch with public sharing enabled for HuggingFace Spaces
+    interface.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,  # HF Spaces handles sharing
+        show_error=True,
+        quiet=False
+    )

models/hubert/.gitkeep ADDED Viewed

File without changes

models/rmvpe/.gitkeep ADDED Viewed

File without changes

models/rvc/.gitkeep ADDED Viewed

File without changes

original_fastapi_app.py ADDED Viewed

	@@ -0,0 +1,290 @@

+from fastapi import FastAPI, HTTPException, File, UploadFile
+from fastapi.responses import HTMLResponse, JSONResponse
+from fastapi.staticfiles import StaticFiles
+from pathlib import Path
+import traceback
+import time
+import subprocess
+import json
+import os
+import asyncio
+import numpy as np
+import cv2
+from typing import Any, Dict, List
+from metrics import metrics as _metrics_singleton, Metrics
+from config import config
+from voice_processor import voice_processor
+from avatar_pipeline import get_pipeline
+app = FastAPI(title="Mirage Real-time AI Avatar System")
+# Initialize AI pipeline
+pipeline = get_pipeline()
+pipeline_initialized = False
+# Potentially reconfigure metrics based on config
+if config.metrics_fps_window != 30:  # default in metrics module
+    metrics = Metrics(fps_window=config.metrics_fps_window)
+else:
+    metrics = _metrics_singleton
+# Mount the static directory
+static_dir = Path(__file__).parent / "static"
+app.mount("/static", StaticFiles(directory=str(static_dir)), name="static")
+# Mount WebRTC router (aiortc based)
+try:
+    from webrtc_server import router as webrtc_router  # type: ignore
+    app.include_router(webrtc_router)
+except Exception as e:  # pragma: no cover
+    print(f"[WARN] WebRTC router not loaded: {e}")
+@app.get("/", response_class=HTMLResponse)
+async def root():
+    """Serve the static/index.html file contents as HTML."""
+    index_path = static_dir / "index.html"
+    try:
+        content = index_path.read_text(encoding="utf-8")
+    except FileNotFoundError:
+        # Minimal fallback to satisfy route even if file not yet present.
+        content = "<html><body><h1>Mirage AI Avatar System</h1><p>Real-time AI avatar with face animation and voice conversion.</p></body></html>"
+    return HTMLResponse(content)
+@app.get("/health")
+async def health():
+    return {
+        "status": "ok",
+        "system": "real-time-ai-avatar",
+        "pipeline_loaded": pipeline_initialized,
+        "gpu_available": pipeline.config.device == "cuda"
+    }
+@app.post("/initialize")
+async def initialize_pipeline():
+    """Initialize the AI pipeline"""
+    global pipeline_initialized
+    if pipeline_initialized:
+        return {"status": "already_initialized", "message": "Pipeline already loaded"}
+    try:
+        success = await pipeline.initialize()
+        if success:
+            pipeline_initialized = True
+            return {"status": "success", "message": "Pipeline initialized successfully"}
+        else:
+            return {"status": "error", "message": "Failed to initialize pipeline"}
+    except Exception as e:
+        return {"status": "error", "message": f"Initialization error: {str(e)}"}
+@app.post("/set_reference")
+async def set_reference_image(file: UploadFile = File(...)):
+    """Set reference image for avatar"""
+    global pipeline_initialized
+    if not pipeline_initialized:
+        raise HTTPException(status_code=400, detail="Pipeline not initialized")
+    try:
+        # Read uploaded image
+        contents = await file.read()
+        nparr = np.frombuffer(contents, np.uint8)
+        frame = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+        if frame is None:
+            raise HTTPException(status_code=400, detail="Invalid image format")
+        # Set as reference frame
+        success = pipeline.set_reference_frame(frame)
+        if success:
+            return {"status": "success", "message": "Reference image set successfully"}
+        else:
+            return {"status": "error", "message": "No suitable face found in image"}
+    except Exception as e:
+        return {"status": "error", "message": f"Error setting reference: {str(e)}"}
+# Note: Legacy WebSocket streaming endpoints removed in production.
+@app.get("/metrics")
+async def get_metrics():
+    base_metrics = metrics.snapshot()
+    # Add AI pipeline metrics if available
+    if pipeline_initialized:
+        pipeline_stats = pipeline.get_performance_stats()
+        base_metrics.update({
+            "ai_pipeline": pipeline_stats
+        })
+    return base_metrics
+@app.get("/pipeline_status")
+async def get_pipeline_status():
+    """Get detailed pipeline status"""
+    if not pipeline_initialized:
+        return {
+            "initialized": False,
+            "message": "Pipeline not initialized"
+        }
+    try:
+        stats = pipeline.get_performance_stats()
+        return {
+            "initialized": True,
+            "stats": stats,
+            "reference_set": pipeline.reference_frame is not None
+        }
+    except Exception as e:
+        return {
+            "initialized": False,
+            "error": str(e)
+        }
+@app.get("/gpu")
+async def gpu_info():
+    """Return basic GPU availability and memory statistics.
+    Priority order:
+    1. torch (if installed and CUDA available) for detailed stats per device.
+    2. nvidia-smi (if executable present) for name/total/used.
+    3. Fallback: available false.
+    """
+    # Response scaffold
+    resp: Dict[str, Any] = {
+        "available": False,
+        "provider": None,
+        "device_count": 0,
+        "devices": [],  # type: ignore[list-item]
+    }
+    # Try torch first (lazy import)
+    try:
+        import torch  # type: ignore
+        if torch.cuda.is_available():
+            resp["available"] = True
+            resp["provider"] = "torch"
+            count = torch.cuda.device_count()
+            resp["device_count"] = count
+            devices: List[Dict[str, Any]] = []
+            for idx in range(count):
+                name = torch.cuda.get_device_name(idx)
+                try:
+                    free_bytes, total_bytes = torch.cuda.mem_get_info(idx)  # type: ignore[arg-type]
+                except TypeError:
+                    # Older PyTorch versions take no index
+                    free_bytes, total_bytes = torch.cuda.mem_get_info()
+                allocated = torch.cuda.memory_allocated(idx)
+                reserved = torch.cuda.memory_reserved(idx)
+                # Estimate free including unallocated reserved as reclaimable
+                est_free = free_bytes + max(reserved - allocated, 0)
+                to_mb = lambda b: round(b / (1024 * 1024), 2)
+                devices.append({
+                    "index": idx,
+                    "name": name,
+                    "total_mb": to_mb(total_bytes),
+                    "allocated_mb": to_mb(allocated),
+                    "reserved_mb": to_mb(reserved),
+                    "free_mem_get_info_mb": to_mb(free_bytes),
+                    "free_estimate_mb": to_mb(est_free),
+                })
+            resp["devices"] = devices
+            return resp
+    except Exception:  # noqa: BLE001
+        # Torch not installed or failed; fall through to nvidia-smi
+        pass
+    # Try nvidia-smi fallback
+    try:
+        cmd = [
+            "nvidia-smi",
+            "--query-gpu=name,memory.total,memory.used",
+            "--format=csv,noheader,nounits",
+        ]
+        out = subprocess.check_output(cmd, stderr=subprocess.STDOUT, timeout=2).decode("utf-8").strip()
+        lines = [l for l in out.splitlines() if l.strip()]
+        if lines:
+            resp["available"] = True
+            resp["provider"] = "nvidia-smi"
+            resp["device_count"] = len(lines)
+            devices: List[Dict[str, Any]] = []
+            for idx, line in enumerate(lines):
+                # Expect: name, total, used
+                parts = [p.strip() for p in line.split(',')]
+                if len(parts) >= 3:
+                    name, total_str, used_str = parts[:3]
+                    try:
+                        total = float(total_str)
+                        used = float(used_str)
+                        free = max(total - used, 0)
+                    except ValueError:
+                        total = used = free = 0.0
+                    devices.append({
+                        "index": idx,
+                        "name": name,
+                        "total_mb": total,
+                        "allocated_mb": used,  # approximate
+                        "reserved_mb": None,
+                        "free_estimate_mb": free,
+                    })
+            resp["devices"] = devices
+            return resp
+    except Exception:  # noqa: BLE001
+        pass
+    return resp
+@app.on_event("startup")
+async def log_config():
+    # Enhanced startup logging: core config + GPU availability summary.
+    cfg = config.as_dict()
+    # GPU probe (reuse gpu_info logic minimally without full device list to keep log concise)
+    gpu_available = False
+    gpu_name = None
+    try:
+        import torch  # type: ignore
+        if torch.cuda.is_available():
+            gpu_available = True
+            gpu_name = torch.cuda.get_device_name(0)
+        else:
+            # Fallback quick nvidia-smi single line
+            try:
+                out = subprocess.check_output([
+                    "nvidia-smi", "--query-gpu=name", "--format=csv,noheader,nounits"
+                ], stderr=subprocess.STDOUT, timeout=1).decode("utf-8").strip().splitlines()
+                if out:
+                    gpu_available = True
+                    gpu_name = out[0].strip()
+            except Exception:  # noqa: BLE001
+                pass
+    except Exception:  # noqa: BLE001
+        pass
+    # Honor dynamic PORT if provided (HF Spaces usually fixed at 7860 for docker, but logging helps debugging)
+    listen_port = int(os.getenv("PORT", "7860"))
+    startup_line = {
+        "chunk_ms": cfg.get("chunk_ms"),
+        "voice_enabled": cfg.get("voice_enable"),
+        "metrics_fps_window": cfg.get("metrics_fps_window"),
+        "video_fps_limit": cfg.get("video_max_fps"),
+        "port": listen_port,
+        "gpu_available": gpu_available,
+        "gpu_name": gpu_name,
+    }
+    print("[startup]", startup_line)
+# Note: The Dockerfile / README launch with: uvicorn app:app --port 7860
+if __name__ == "__main__":  # Optional direct run helper
+    import uvicorn  # type: ignore
+    uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)

requirements.txt CHANGED Viewed

@@ -1,25 +1,15 @@
-# Core Dependencies
-gradio==4.44.0
-torch==2.3.1
-opencv-python-headless==4.9.0.80
-pillow==10.3.0
-# Optional - loaded on demand
 fastapi==0.111.0
 uvicorn[standard]==0.30.1
-transformers==4.44.2
 insightface==0.7.3
 librosa==0.10.2
-# ONNX & GPU Acceleration
 onnx==1.16.1
 onnxruntime-gpu==1.18.1
-# System & Utils
-# NOTE: Keep a single modern numpy pin to avoid resolver conflicts (removed older 1.24.0 above)
-numpy==1.26.4
-psutil==5.9.8
-# Optional GPU Optimization (may not be available on HF Spaces)
-# tensorrt==10.3.0
-# pycuda==2024.1.2

 fastapi==0.111.0
 uvicorn[standard]==0.30.1
+numpy==1.26.4
+opencv-python-headless==4.9.0.80
+pillow==10.3.0
+psutil==5.9.8
 insightface==0.7.3
+transformers==4.44.2
 librosa==0.10.2
 onnx==1.16.1
 onnxruntime-gpu==1.18.1
+huggingface-hub==0.24.5
+python-multipart==0.0.9
+aiortc==1.7.0
+av==11.0.0

requirements_old.txt ADDED Viewed

	@@ -0,0 +1,38 @@

+fastapi==0.111.0
+uvicorn[standard]==0.30.1
+websockets==12.0
+jinja2==3.1.4
+numpy==1.26.4
+psutil==5.9.8
+pillow==10.3.0
+torch==2.3.1
+torchaudio==2.3.1
+opencv-python==4.9.0.80
+insightface==0.7.3
+onnx==1.16.1
+onnxruntime-gpu==1.18.1
+huggingface-hub==0.24.5
+transformers==4.44.2
+accelerate==0.33.0
+diffusers==0.30.0
+python-multipart==0.0.9
+librosa==0.10.2
+scipy==1.13.1
+scikit-image==0.24.0
+opencv-python==4.8.1.78
+transformers==4.42.4
+diffusers==0.29.2
+accelerate==0.33.0
+xformers==0.0.27.post2
+librosa==0.10.2
+scipy==1.11.4
+scikit-image==0.22.0
+omegaconf==2.3.0
+insightface==0.7.3
+onnxruntime-gpu==1.18.1
+huggingface-hub==0.24.5
+safetensors==0.4.4
+einops==0.8.0
+av==12.3.0
+tensorrt==10.3.0
+pycuda==2024.1.2

static/app.js CHANGED Viewed

@@ -1,4 +1,7 @@
-/* Mirage Real-time AI Avatar Client */
 // Globals
 let audioWs = null;

+/* DEPRECATED (dev WebSocket client). Removed for production. Use webrtc_prod.js */
+// This file intentionally contains no executable code in production deployments.
+// It remains only to avoid broken references from older pages; index.html does not load it.
+export {};
 // Globals
 let audioWs = null;

static/index.html CHANGED Viewed

@@ -107,66 +107,31 @@
 <body>
   <div class="container">
     <div class="header">
-      <h1>🎭 Mirage Real-time AI Avatar</h1>
-      <p>Live face animation and voice conversion with &lt;250ms latency</p>
     </div>
-    <div class="controls">
-      <button id="initBtn">Initialize AI Pipeline</button>
-      <button id="startBtn" disabled>Start Capture</button>
-      <button id="stopBtn" disabled>Stop</button>
-      <input type="file" id="referenceInput" accept="image/*" disabled>
-      <button id="virtualCamBtn" disabled>Enable Virtual Camera</button>
-    </div>
-    <div id="statusDiv"></div>
-    <div class="metrics" id="metrics">
-      <div class="metric-card">
-        <div class="metric-value" id="fpsValue">0</div>
-        <div class="metric-label">Video FPS</div>
-      </div>
-      <div class="metric-card">
-        <div class="metric-value" id="latencyValue">0ms</div>
-        <div class="metric-label">Avg Latency</div>
-      </div>
-      <div class="metric-card">
-        <div class="metric-value" id="gpuValue">N/A</div>
-        <div class="metric-label">GPU Memory</div>
-      </div>
-      <div class="metric-card">
-        <div class="metric-value" id="statusValue">Idle</div>
-        <div class="metric-label">Pipeline Status</div>
-      </div>
     </div>
     <div class="video-container">
       <div class="video-box">
-        <h3>📹 Local Camera</h3>
-        <video id="localVid" autoplay muted playsinline></video>
       </div>
       <div class="video-box">
-        <h3>🤖 AI Avatar Output</h3>
-        <img id="remoteVid" alt="AI avatar output" />
-        <canvas id="virtualCanvas" style="display: none;"></canvas>
       </div>
     </div>
-    <div class="virtual-camera-info">
-      <h3>📺 Virtual Camera Integration</h3>
-      <p>The AI avatar output can be used as a virtual camera in:</p>
-      <ul>
-        <li>🎥 Zoom, Google Meet, Microsoft Teams</li>
-        <li>💬 Discord, Slack, WhatsApp Desktop</li>
-        <li>📱 OBS Studio, Streamlabs</li>
-      </ul>
-      <p><strong>Setup:</strong> Enable virtual camera, then select "Mirage Virtual Camera" in your video app settings.</p>
-    </div>
-    <audio id="remoteAudio" autoplay></audio>
-    <div id="log"></div>
-    <script src="/static/app.js"></script>
   </div>
 </body>
 </html>

 <body>
   <div class="container">
     <div class="header">
+      <h1>Mirage Realtime Avatar</h1>
+      <p class="subtitle">Production Preview</p>
     </div>
+    <div class="controls" id="controls">
+      <input type="file" id="referenceInput" accept="image/*" title="Reference Image" />
+      <button id="connectBtn">Connect</button>
+      <button id="disconnectBtn" disabled>Disconnect</button>
+      <span id="statusText" style="margin-left:auto;font-size:12px;color:#888;">Idle</span>
     </div>
     <div class="video-container">
       <div class="video-box">
+        <h3>Local</h3>
+        <video id="localVideo" autoplay muted playsinline></video>
       </div>
       <div class="video-box">
+        <h3>Avatar</h3>
+        <video id="remoteVideo" autoplay playsinline></video>
       </div>
     </div>
+    <div id="perfBar" style="font-size:12px;color:#bbb;margin-top:10px;">Latency: -- ms · FPS: -- · GPU: --</div>
+    <script src="/static/webrtc_prod.js"></script>
   </div>
 </body>
 </html>

static/webrtc_client.js ADDED Viewed

	@@ -0,0 +1,4 @@

+/* Legacy dev WebRTC bootstrap (no-op in production). */
+(function(){
+  // intentionally empty
+})();

static/webrtc_prod.js ADDED Viewed

	@@ -0,0 +1,117 @@

+/* Production-focused WebRTC client (replaces dev UI). */
+(function(){
+  const state = {
+    pc: null,
+    control: null,
+    localStream: null,
+    metricsTimer: null,
+    referenceImage: null,
+    connected: false
+  };
+  const els = {
+    ref: document.getElementById('referenceInput'),
+    connect: document.getElementById('connectBtn'),
+    disconnect: document.getElementById('disconnectBtn'),
+    localVideo: document.getElementById('localVideo'),
+    remoteVideo: document.getElementById('remoteVideo'),
+    status: document.getElementById('statusText'),
+    perf: document.getElementById('perfBar')
+  };
+  function setStatus(txt){ els.status.textContent = txt; }
+  function log(...a){ console.log('[PROD]', ...a); }
+  async function handleReference(e){
+    const file = e.target.files && e.target.files[0];
+    if(!file) return;
+    const buf = await file.arrayBuffer();
+    const b64 = btoa(String.fromCharCode(...new Uint8Array(buf)));
+    state.referenceImage = b64; // send after control channel open
+  }
+  async function connect(){
+    if(state.connected) return;
+    try {
+      setStatus('Requesting media');
+      els.connect.disabled = true;
+      // Fetch short-lived auth token (if server requires)
+      let authToken = null;
+      try {
+        const t = await fetch('/webrtc/token');
+        if (t.ok) {
+          const j = await t.json();
+          authToken = j.token;
+        }
+      } catch(_){}
+      state.localStream = await navigator.mediaDevices.getUserMedia({video:true,audio:true});
+      els.localVideo.srcObject = state.localStream;
+      setStatus('Creating peer');
+      state.pc = new RTCPeerConnection({iceServers:[{urls:['stun:stun.l.google.com:19302']}]});
+      state.pc.onconnectionstatechange = ()=>{ log('pc state', state.pc.connectionState); if(['failed','disconnected','closed'].includes(state.pc.connectionState)){ disconnect(); } };
+      state.pc.ontrack = ev => {
+        if(ev.streams && ev.streams[0]){
+          els.remoteVideo.srcObject = ev.streams[0];
+        }
+      };
+      state.control = state.pc.createDataChannel('control');
+      state.control.onopen = ()=>{
+        setStatus('Connected');
+        state.connected = true;
+        els.disconnect.disabled = false;
+        if(state.referenceImage){
+          try { state.control.send(JSON.stringify({type:'set_reference', image_jpeg_base64: state.referenceImage})); } catch(e) {}
+        }
+        // Metrics polling
+        state.metricsTimer = setInterval(()=>{
+          try { state.control.send(JSON.stringify({type:'metrics_request'})); }catch(_){ }
+        }, 4000);
+      };
+      state.control.onmessage = (e)=>{
+        try { const data = JSON.parse(e.data); if(data.type==='metrics' && data.payload){ updatePerf(data.payload); } } catch(_){ }
+      };
+      state.localStream.getTracks().forEach(t=> state.pc.addTrack(t, state.localStream));
+      const offer = await state.pc.createOffer();
+      await state.pc.setLocalDescription(offer);
+      setStatus('Negotiating');
+  const headers = {'Content-Type':'application/json'};
+  if (authToken) headers['X-Auth-Token'] = authToken;
+      const r = await fetch('/webrtc/offer',{method:'POST', headers, body: JSON.stringify({sdp:offer.sdp, type:offer.type})});
+      if(!r.ok){
+        if(r.status===401 || r.status===403){
+          setStatus('Unauthorized (check API key/token)');
+        } else {
+          setStatus('Offer failed '+r.status);
+        }
+        els.connect.disabled=false; return;
+      }
+      const answer = await r.json();
+      await state.pc.setRemoteDescription(answer);
+      setStatus('Finalizing');
+    } catch(e){
+      log('connect error', e);
+      setStatus('Error');
+      els.connect.disabled = false;
+    }
+  }
+  function updatePerf(p){
+    try {
+      const fps = (p.video_fps || 0).toFixed(1);
+      const lat = Math.round(p.avg_video_latency_ms || 0);
+      const gpu = (p.gpu_memory_used !== undefined) ? (p.gpu_memory_used.toFixed(2)+'GB') : '--';
+      els.perf.textContent = `Latency: ${lat} ms · FPS: ${fps} · GPU: ${gpu}`;
+    } catch(_){}
+  }
+  async function disconnect(){
+    if(state.metricsTimer){ clearInterval(state.metricsTimer); state.metricsTimer=null; }
+    if(state.control){ try { state.control.close(); }catch(_){} }
+    if(state.pc){ try { state.pc.close(); }catch(_){} }
+    if(state.localStream){ state.localStream.getTracks().forEach(t=>t.stop()); }
+    state.pc=null; state.control=null; state.localStream=null; state.connected=false;
+    els.connect.disabled=false; els.disconnect.disabled=true; setStatus('Idle');
+  }
+  els.ref.addEventListener('change', handleReference);
+  els.connect.addEventListener('click', connect);
+  els.disconnect.addEventListener('click', disconnect);
+})();

test_system.py ADDED Viewed

	@@ -0,0 +1,380 @@

+"""
+Testing and Validation Suite for Mirage AI Avatar System
+Tests end-to-end functionality, latency, and performance
+"""
+import asyncio
+import time
+import aiohttp
+import json
+import numpy as np
+import cv2
+import logging
+from pathlib import Path
+import subprocess
+import psutil
+from typing import Dict, Any, List
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class MirageSystemTester:
+    """Comprehensive testing suite for the AI avatar system"""
+    def __init__(self, base_url: str = "http://localhost:7860"):
+        self.base_url = base_url
+        self.session = None
+        self.test_results = {}
+    async def __aenter__(self):
+        self.session = aiohttp.ClientSession()
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        if self.session:
+            await self.session.close()
+    async def test_health_endpoint(self) -> bool:
+        """Test basic health endpoint"""
+        try:
+            async with self.session.get(f"{self.base_url}/health") as response:
+                data = await response.json()
+                success = (
+                    response.status == 200 and
+                    data.get("status") == "ok" and
+                    data.get("system") == "real-time-ai-avatar"
+                )
+                self.test_results["health"] = {
+                    "success": success,
+                    "status": response.status,
+                    "data": data
+                }
+                logger.info(f"Health check: {'✅ PASS' if success else '❌ FAIL'}")
+                return success
+        except Exception as e:
+            logger.error(f"Health check failed: {e}")
+            self.test_results["health"] = {"success": False, "error": str(e)}
+            return False
+    async def test_pipeline_initialization(self) -> bool:
+        """Test AI pipeline initialization"""
+        try:
+            start_time = time.time()
+            async with self.session.post(f"{self.base_url}/initialize") as response:
+                data = await response.json()
+                init_time = time.time() - start_time
+                success = (
+                    response.status == 200 and
+                    data.get("status") in ["success", "already_initialized"]
+                )
+                self.test_results["initialization"] = {
+                    "success": success,
+                    "status": response.status,
+                    "data": data,
+                    "init_time_seconds": init_time
+                }
+                logger.info(f"Pipeline init: {'✅ PASS' if success else '❌ FAIL'} ({init_time:.1f}s)")
+                return success
+        except Exception as e:
+            logger.error(f"Pipeline initialization failed: {e}")
+            self.test_results["initialization"] = {"success": False, "error": str(e)}
+            return False
+    async def test_reference_image_upload(self) -> bool:
+        """Test reference image upload functionality"""
+        try:
+            # Create a test image
+            test_image = np.zeros((512, 512, 3), dtype=np.uint8)
+            cv2.circle(test_image, (256, 200), 50, (255, 255, 255), -1)  # Face-like circle
+            cv2.circle(test_image, (230, 180), 10, (0, 0, 0), -1)  # Eye
+            cv2.circle(test_image, (280, 180), 10, (0, 0, 0), -1)  # Eye
+            cv2.ellipse(test_image, (256, 220), (20, 10), 0, 0, 180, (0, 0, 0), 2)  # Mouth
+            # Encode as JPEG
+            _, encoded = cv2.imencode('.jpg', test_image)
+            image_data = encoded.tobytes()
+            # Upload test image
+            form_data = aiohttp.FormData()
+            form_data.add_field('file', image_data, filename='test_face.jpg', content_type='image/jpeg')
+            async with self.session.post(f"{self.base_url}/set_reference", data=form_data) as response:
+                data = await response.json()
+                success = (
+                    response.status == 200 and
+                    data.get("status") == "success"
+                )
+                self.test_results["reference_upload"] = {
+                    "success": success,
+                    "status": response.status,
+                    "data": data
+                }
+                logger.info(f"Reference upload: {'✅ PASS' if success else '❌ FAIL'}")
+                return success
+        except Exception as e:
+            logger.error(f"Reference image upload failed: {e}")
+            self.test_results["reference_upload"] = {"success": False, "error": str(e)}
+            return False
+    async def test_websocket_connections(self) -> bool:
+        """Test WebSocket connections for audio and video"""
+        try:
+            import websockets
+            # Test audio WebSocket
+            audio_success = await self._test_websocket_endpoint("/audio")
+            # Test video WebSocket
+            video_success = await self._test_websocket_endpoint("/video")
+            success = audio_success and video_success
+            self.test_results["websockets"] = {
+                "success": success,
+                "audio_success": audio_success,
+                "video_success": video_success
+            }
+            logger.info(f"WebSocket connections: {'✅ PASS' if success else '❌ FAIL'}")
+            return success
+        except Exception as e:
+            logger.error(f"WebSocket test failed: {e}")
+            self.test_results["websockets"] = {"success": False, "error": str(e)}
+            return False
+    async def _test_websocket_endpoint(self, endpoint: str) -> bool:
+        """Test a specific WebSocket endpoint"""
+        try:
+            import websockets
+            ws_url = self.base_url.replace("http://", "ws://") + endpoint
+            async with websockets.connect(ws_url) as websocket:
+                # Send test data
+                if endpoint == "/audio":
+                    # Send 160ms of silence (16kHz, 16-bit)
+                    test_audio = np.zeros(int(16000 * 0.160), dtype=np.int16)
+                    await websocket.send(test_audio.tobytes())
+                else:  # video
+                    # Send a small test JPEG
+                    test_frame = np.zeros((256, 256, 3), dtype=np.uint8)
+                    _, encoded = cv2.imencode('.jpg', test_frame, [cv2.IMWRITE_JPEG_QUALITY, 50])
+                    await websocket.send(encoded.tobytes())
+                # Wait for response
+                response = await asyncio.wait_for(websocket.recv(), timeout=5.0)
+                return len(response) > 0
+        except Exception as e:
+            logger.error(f"WebSocket {endpoint} test failed: {e}")
+            return False
+    async def test_performance_metrics(self) -> bool:
+        """Test performance metrics endpoint"""
+        try:
+            async with self.session.get(f"{self.base_url}/pipeline_status") as response:
+                data = await response.json()
+                success = response.status == 200 and data.get("initialized", False)
+                self.test_results["performance_metrics"] = {
+                    "success": success,
+                    "status": response.status,
+                    "data": data
+                }
+                if success:
+                    stats = data.get("stats", {})
+                    logger.info(f"Performance metrics: ✅ PASS")
+                    logger.info(f"  GPU Memory: {stats.get('gpu_memory_used', 0):.1f} GB")
+                    logger.info(f"  Video FPS: {stats.get('video_fps', 0):.1f}")
+                    logger.info(f"  Avg Latency: {stats.get('avg_video_latency_ms', 0):.1f} ms")
+                else:
+                    logger.info("Performance metrics: ❌ FAIL")
+                return success
+        except Exception as e:
+            logger.error(f"Performance metrics test failed: {e}")
+            self.test_results["performance_metrics"] = {"success": False, "error": str(e)}
+            return False
+    async def test_latency_benchmark(self) -> Dict[str, float]:
+        """Benchmark system latency"""
+        latencies = []
+        try:
+            # Warm up
+            for _ in range(5):
+                start_time = time.time()
+                async with self.session.get(f"{self.base_url}/health") as response:
+                    await response.json()
+                latencies.append((time.time() - start_time) * 1000)
+            # Actual benchmark
+            latencies = []
+            for _ in range(20):
+                start_time = time.time()
+                async with self.session.get(f"{self.base_url}/pipeline_status") as response:
+                    await response.json()
+                latencies.append((time.time() - start_time) * 1000)
+            results = {
+                "avg_latency_ms": np.mean(latencies),
+                "min_latency_ms": np.min(latencies),
+                "max_latency_ms": np.max(latencies),
+                "p95_latency_ms": np.percentile(latencies, 95),
+                "p99_latency_ms": np.percentile(latencies, 99)
+            }
+            self.test_results["latency_benchmark"] = results
+            logger.info("Latency benchmark results:")
+            logger.info(f"  Average: {results['avg_latency_ms']:.1f} ms")
+            logger.info(f"  P95: {results['p95_latency_ms']:.1f} ms")
+            logger.info(f"  P99: {results['p99_latency_ms']:.1f} ms")
+            return results
+        except Exception as e:
+            logger.error(f"Latency benchmark failed: {e}")
+            return {}
+    def test_system_requirements(self) -> Dict[str, Any]:
+        """Test system requirements and capabilities"""
+        results = {}
+        try:
+            # Check GPU availability
+            try:
+                import torch
+                results["gpu_available"] = torch.cuda.is_available()
+                if torch.cuda.is_available():
+                    results["gpu_name"] = torch.cuda.get_device_name(0)
+                    results["gpu_memory_gb"] = torch.cuda.get_device_properties(0).total_memory / 1024**3
+                    results["cuda_version"] = torch.version.cuda
+            except ImportError:
+                results["gpu_available"] = False
+            # Check system resources
+            memory = psutil.virtual_memory()
+            results["system_memory_gb"] = memory.total / 1024**3
+            results["cpu_count"] = psutil.cpu_count()
+            # Check disk space
+            disk = psutil.disk_usage('/')
+            results["disk_free_gb"] = disk.free / 1024**3
+            # Check required packages
+            required_packages = [
+                "torch", "torchvision", "torchaudio", "opencv-python",
+                "numpy", "fastapi", "websockets"
+            ]
+            missing_packages = []
+            for package in required_packages:
+                try:
+                    __import__(package.replace("-", "_"))
+                except ImportError:
+                    missing_packages.append(package)
+            results["missing_packages"] = missing_packages
+            results["requirements_met"] = len(missing_packages) == 0
+            self.test_results["system_requirements"] = results
+            logger.info("System requirements:")
+            logger.info(f"  GPU: {'✅' if results['gpu_available'] else '❌'}")
+            logger.info(f"  Memory: {results['system_memory_gb']:.1f} GB")
+            logger.info(f"  CPU: {results['cpu_count']} cores")
+            logger.info(f"  Packages: {'✅' if results['requirements_met'] else '❌'}")
+            return results
+        except Exception as e:
+            logger.error(f"System requirements check failed: {e}")
+            return {"error": str(e)}
+    async def run_comprehensive_test(self) -> Dict[str, Any]:
+        """Run all tests and return comprehensive results"""
+        logger.info("🧪 Starting comprehensive system test...")
+        # System requirements (runs first, no server needed)
+        self.test_system_requirements()
+        # Server-dependent tests
+        tests = [
+            ("Health Check", self.test_health_endpoint()),
+            ("Pipeline Initialization", self.test_pipeline_initialization()),
+            ("Reference Image Upload", self.test_reference_image_upload()),
+            ("WebSocket Connections", self.test_websocket_connections()),
+            ("Performance Metrics", self.test_performance_metrics()),
+        ]
+        # Run tests sequentially
+        for test_name, test_coro in tests:
+            logger.info(f"Running: {test_name}...")
+            try:
+                result = await test_coro
+                if not result:
+                    logger.warning(f"{test_name} failed - may affect subsequent tests")
+            except Exception as e:
+                logger.error(f"{test_name} threw exception: {e}")
+        # Latency benchmark (runs last)
+        logger.info("Running latency benchmark...")
+        await self.test_latency_benchmark()
+        # Calculate overall success rate
+        successful_tests = sum(1 for result in self.test_results.values()
+                             if isinstance(result, dict) and result.get("success", False))
+        total_tests = len([r for r in self.test_results.values() if isinstance(r, dict) and "success" in r])
+        overall_success = successful_tests / max(total_tests, 1) >= 0.8  # 80% success rate
+        summary = {
+            "overall_success": overall_success,
+            "successful_tests": successful_tests,
+            "total_tests": total_tests,
+            "success_rate": successful_tests / max(total_tests, 1),
+            "detailed_results": self.test_results
+        }
+        logger.info(f"🏁 Test completed: {successful_tests}/{total_tests} tests passed")
+        logger.info(f"Overall result: {'✅ PASS' if overall_success else '❌ FAIL'}")
+        return summary
+async def main():
+    """Main test runner"""
+    import sys
+    base_url = sys.argv[1] if len(sys.argv) > 1 else "http://localhost:7860"
+    async with MirageSystemTester(base_url) as tester:
+        results = await tester.run_comprehensive_test()
+        # Save results to file
+        results_file = Path("test_results.json")
+        with open(results_file, "w") as f:
+            json.dump(results, f, indent=2, default=str)
+        logger.info(f"📊 Detailed results saved to: {results_file}")
+        # Exit with appropriate code
+        sys.exit(0 if results["overall_success"] else 1)
+if __name__ == "__main__":
+    asyncio.run(main())

webrtc_server.py ADDED Viewed

	@@ -0,0 +1,444 @@

+"""WebRTC integration using aiortc for low-latency bi-directional media.
+This module exposes:
+- POST /webrtc/offer : Accepts an SDP offer from browser, returns SDP answer.
+- GET  /webrtc/ice   : (Optional) polling ICE candidates (simplified; trickle or full offer/answer)
+Media Flow (Phase 1):
+Browser camera/mic -> WebRTC -> aiortc PeerConnection ->
+  Video track -> frame hook -> pipeline.process_video_frame -> return video track to client
+  Audio track -> chunk hook  -> pipeline.process_audio_chunk -> return audio track to client
+Control/Data channel: "control" used for lightweight JSON messages:
+  {"type":"metrics_request"} -> server replies {"type":"metrics","payload":...}
+  {"type":"set_reference","image_jpeg_base64":...}
+Fallback: If aiortc not supported in environment or import fails, endpoint returns 503.
+Security: (basic) Optional shared secret via X-API-Key header (env MIRAGE_API_KEY).
+NOTE: This is a minimal, production-ready skeleton focusing on structure, error handling,
+resource cleanup and integration points. Actual model inference remains in avatar_pipeline.
+"""
+from __future__ import annotations
+import asyncio
+import base64
+import json
+import logging
+import os
+import time
+from dataclasses import dataclass
+import hashlib
+import hmac
+import secrets as pysecrets
+import base64 as pybase64
+from typing import Optional, Dict, Any
+from fastapi import APIRouter, HTTPException, Header
+try:
+    from aiortc import RTCPeerConnection, RTCSessionDescription, MediaStreamTrack, RTCConfiguration, RTCIceServer
+    from aiortc.contrib.media import MediaBlackhole
+    import av  # noqa: F401 (required by aiortc for codecs)
+    AIORTC_AVAILABLE = True
+except Exception as e:  # pragma: no cover
+    AIORTC_IMPORT_ERROR = str(e)
+    AIORTC_AVAILABLE = False
+import numpy as np
+import cv2
+from avatar_pipeline import get_pipeline
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/webrtc", tags=["webrtc"])
+API_KEY = os.getenv("MIRAGE_API_KEY")
+REQUIRE_API_KEY = os.getenv("MIRAGE_REQUIRE_API_KEY", "0").strip().lower() in {"1","true","yes","on"}
+TOKEN_TTL_SECONDS = int(os.getenv("MIRAGE_TOKEN_TTL", "300"))  # 5 minutes default
+STUN_URLS = os.getenv("MIRAGE_STUN_URLS", "stun:stun.l.google.com:19302")
+TURN_URL = os.getenv("MIRAGE_TURN_URL")
+TURN_USER = os.getenv("MIRAGE_TURN_USER")
+TURN_PASS = os.getenv("MIRAGE_TURN_PASS")
+def _b64u(data: bytes) -> str:
+    return pybase64.urlsafe_b64encode(data).decode('ascii').rstrip('=')
+def _b64u_decode(data: str) -> bytes:
+    pad = '=' * (-len(data) % 4)
+    return pybase64.urlsafe_b64decode(data + pad)
+def _mint_token() -> str:
+    """Stateless signed token: base64url(ts:nonce:mac)."""
+    ts = str(int(time.time()))
+    nonce = _b64u(pysecrets.token_bytes(12))
+    msg = f"{ts}:{nonce}".encode('utf-8')
+    mac = hmac.new(API_KEY.encode('utf-8'), msg, hashlib.sha256).digest()
+    return _b64u(msg) + '.' + _b64u(mac)
+def _verify_token(token: str) -> bool:
+    try:
+        parts = token.split('.')
+        if len(parts) != 2:
+            return False
+        msg_b64, mac_b64 = parts
+        msg = _b64u_decode(msg_b64)
+        mac = _b64u_decode(mac_b64)
+        ts_str, nonce = msg.decode('utf-8').split(':', 1)
+        ts = int(ts_str)
+        if time.time() - ts > TOKEN_TTL_SECONDS:
+            return False
+        expected = hmac.new(API_KEY.encode('utf-8'), msg, hashlib.sha256).digest()
+        return hmac.compare_digest(expected, mac)
+    except Exception:
+        return False
+def _check_api_key(header_val: Optional[str], token_val: Optional[str] = None):
+    # If no API key configured, allow
+    if not API_KEY:
+        return
+    # If enforcement disabled, allow
+    if not REQUIRE_API_KEY:
+        return
+    # Accept raw key or signed token
+    if header_val and header_val == API_KEY:
+        return
+    if token_val and _verify_token(token_val):
+        return
+    raise HTTPException(status_code=401, detail="Unauthorized")
+def _ice_configuration() -> RTCConfiguration:
+    servers = []
+    # STUN servers (comma-separated)
+    for url in [u.strip() for u in STUN_URLS.split(',') if u.strip()]:
+        servers.append(RTCIceServer(urls=[url]))
+    # Optional TURN
+    if TURN_URL and TURN_USER and TURN_PASS:
+        servers.append(RTCIceServer(urls=[TURN_URL], username=TURN_USER, credential=TURN_PASS))
+    return RTCConfiguration(iceServers=servers)
+def _prefer_codec(sdp: str, kind: str, codec: str) -> str:
+    """Move payload types for the given codec to the front of the m-line.
+    Minimal SDP munging for preferring codecs (e.g., H264 or VP8).
+    """
+    try:
+        lines = sdp.splitlines()
+        # Map pt -> codec
+        pt_to_codec = {}
+        for ln in lines:
+            if ln.startswith('a=rtpmap:'):
+                try:
+                    rest = ln[len('a=rtpmap:'):]
+                    pt, enc = rest.split(' ', 1)
+                    codec_name = enc.split('/')[0].upper()
+                    pt_to_codec[pt] = codec_name
+                except Exception:
+                    pass
+        # Find m-line for kind
+        for i, ln in enumerate(lines):
+            if ln.startswith('m=') and kind in ln:
+                parts = ln.split(' ')
+                header = parts[:3]
+                pts = parts[3:]
+                preferred = [pt for pt in pts if pt_to_codec.get(pt, '') == codec.upper()]
+                others = [pt for pt in pts if pt not in preferred]
+                lines[i] = ' '.join(header + preferred + others)
+                break
+        return '\r\n'.join(lines) + '\r\n'
+    except Exception:
+        return sdp
+async def _ensure_pipeline_initialized():
+    """Initialize the pipeline if not already loaded."""
+    pipeline = get_pipeline()
+    try:
+        if not getattr(pipeline, "loaded", False):
+            init = getattr(pipeline, "initialize", None)
+            if callable(init):
+                result = init()
+                if asyncio.iscoroutine(result):
+                    await result
+    except Exception as e:
+        logger.error(f"Pipeline init failed: {e}")
+@dataclass
+class PeerState:
+    pc: RTCPeerConnection
+    created: float
+    control_channel_ready: bool = False
+# In-memory single peer (extend to dict for multi-user)
+_peer_state: Optional[PeerState] = None
+_peer_lock = asyncio.Lock()
+class IncomingVideoTrack(MediaStreamTrack):
+    kind = "video"
+    def __init__(self, track: MediaStreamTrack):
+        super().__init__()  # base init
+        self.track = track
+        self.pipeline = get_pipeline()
+        self.frame_id = 0
+        self._last_processed: Optional[np.ndarray] = None
+        self._processing_task: Optional[asyncio.Task] = None
+        self._lock = asyncio.Lock()
+    async def recv(self):  # type: ignore[override]
+        frame = await self.track.recv()
+        self.frame_id += 1
+        # Convert to numpy BGR for pipeline
+        img = frame.to_ndarray(format="bgr24")
+        h, w, _ = img.shape
+        proc_input = img
+        # Optionally downscale for processing to cap latency
+        try:
+            if max(h, w) > 512:
+                scale_w = 512
+                scale_h = int(h * (512 / w)) if w >= h else 512
+                if w < h:
+                    scale_w = int(w * (512 / h))
+                proc_input = cv2.resize(img, (max(1, scale_w), max(1, scale_h)))
+        except Exception as e:
+            logger.debug(f"Video downscale skip: {e}")
+        # Schedule background processing to avoid blocking recv()
+        async def _process_async(inp: np.ndarray, expected_size: tuple[int, int], fid: int):
+            try:
+                out_small = self.pipeline.process_video_frame(inp, fid)
+                if (out_small.shape[1], out_small.shape[0]) != expected_size:
+                    out = cv2.resize(out_small, expected_size)
+                else:
+                    out = out_small
+                async with self._lock:
+                    self._last_processed = out
+            except Exception as ex:
+                logger.error(f"Video processing error(bg): {ex}")
+            finally:
+                self._processing_task = None
+        expected = (w, h)
+        if self._processing_task is None:
+            # Only run one processing task at a time; drop older frames
+            self._processing_task = asyncio.create_task(_process_async(proc_input, expected, self.frame_id))
+        # Use last processed if available, else pass-through
+        async with self._lock:
+            processed = self._last_processed if self._last_processed is not None else img
+        # Convert back to VideoFrame
+        new_frame = frame.from_ndarray(processed, format="bgr24")
+        new_frame.pts = frame.pts
+        new_frame.time_base = frame.time_base
+        return new_frame
+class IncomingAudioTrack(MediaStreamTrack):
+    kind = "audio"
+    def __init__(self, track: MediaStreamTrack):
+        super().__init__()
+        self.track = track
+        self.pipeline = get_pipeline()
+        self._resample_to_16k = None
+        self._resample_from_16k = None
+    async def recv(self):  # type: ignore[override]
+        frame = await self.track.recv()
+        # frame is an AudioFrame (PCM)
+        try:
+            import av
+            from av.audio.resampler import AudioResampler
+            # Initialize resamplers once using input characteristics
+            if self._resample_to_16k is None:
+                self._resample_to_16k = AudioResampler(format='s16', layout='mono', rate=16000)
+            if self._resample_from_16k is None:
+                # Back to original sample rate and layout; keep s16 for low overhead
+                target_layout = frame.layout.name if frame.layout else 'mono'
+                target_rate = frame.sample_rate or 48000
+                self._resample_from_16k = AudioResampler(format='s16', layout=target_layout, rate=target_rate)
+            # 1) To mono s16 @16k for pipeline
+            f_16k_list = self._resample_to_16k.resample(frame)
+            if isinstance(f_16k_list, list):
+                f_16k = f_16k_list[0]
+            else:
+                f_16k = f_16k_list
+            pcm16k = f_16k.to_ndarray()  # (channels, samples), dtype=int16
+            if pcm16k.ndim == 2:
+                # convert to mono if needed
+                if pcm16k.shape[0] > 1:
+                    pcm16k = np.mean(pcm16k, axis=0, keepdims=True).astype(np.int16)
+                # drop channel dim -> (samples,)
+                pcm16k = pcm16k.reshape(-1)
+            # 2) Pipeline processing (mono 16k int16 ndarray)
+            processed_arr = self.pipeline.process_audio_chunk(pcm16k)
+            if isinstance(processed_arr, bytes):
+                processed_bytes = processed_arr
+            else:
+                processed_bytes = np.asarray(processed_arr, dtype=np.int16).tobytes()
+            # 3) Wrap processed back into an av frame @16k mono s16
+            samples = len(processed_bytes) // 2
+            f_proc_16k = av.AudioFrame(format='s16', layout='mono', samples=samples)
+            f_proc_16k.sample_rate = 16000
+            f_proc_16k.planes[0].update(processed_bytes)
+            # 4) Resample back to original sample rate/layout
+            f_out_list = self._resample_from_16k.resample(f_proc_16k)
+            if isinstance(f_out_list, list) and len(f_out_list) > 0:
+                f_out = f_out_list[0]
+            else:
+                f_out = f_proc_16k  # fallback
+            # Preserve timing as best-effort
+            f_out.pts = frame.pts
+            f_out.time_base = frame.time_base
+            return f_out
+        except Exception as e:
+            logger.error(f"Audio processing error: {e}")
+            return frame
+@router.post("/offer")
+async def webrtc_offer(offer: Dict[str, Any], x_api_key: Optional[str] = Header(default=None), x_auth_token: Optional[str] = Header(default=None)):
+    """Accept SDP offer and return SDP answer."""
+    # If enforcement enabled, require a valid signed token; otherwise allow
+    if REQUIRE_API_KEY:
+        if not (x_auth_token and _verify_token(x_auth_token)):
+            raise HTTPException(status_code=401, detail="Unauthorized")
+    if not AIORTC_AVAILABLE:
+        raise HTTPException(status_code=503, detail=f"aiortc not available: {AIORTC_IMPORT_ERROR}")
+    async with _peer_lock:
+        global _peer_state
+        # Ensure pipeline is ready before wiring tracks
+        await _ensure_pipeline_initialized()
+        # Cleanup existing peer if present
+        if _peer_state is not None:
+            try:
+                await _peer_state.pc.close()
+            except Exception:
+                pass
+            _peer_state = None
+    pc = RTCPeerConnection(configuration=_ice_configuration())
+    blackhole = MediaBlackhole()  # optional sink
+    @pc.on("datachannel")
+    def on_datachannel(channel):
+            logger.info("Data channel received: %s", channel.label)
+            if channel.label == "control":
+                def send_metrics():
+                    pipeline = get_pipeline()
+                    stats = pipeline.get_performance_stats() if pipeline.loaded else {}
+                    payload = json.dumps({"type": "metrics", "payload": stats})
+                    try:
+                        channel.send(payload)
+                    except Exception:
+                        logger.debug("Failed sending metrics")
+                @channel.on("message")
+                def on_message(message):
+                    try:
+                        if isinstance(message, bytes):
+                            return
+                        data = json.loads(message)
+                        mtype = data.get("type")
+                        if mtype == "ping":
+                            channel.send(json.dumps({"type": "pong", "t": time.time()}))
+                        elif mtype == "metrics_request":
+                            send_metrics()
+                        elif mtype == "set_reference":
+                            b64 = data.get("image_jpeg_base64")
+                            if b64:
+                                try:
+                                    # Guard size (<= 2MB when base64)
+                                    if len(b64) > 2_800_000:
+                                        channel.send(json.dumps({"type": "error", "message": "reference too large"}))
+                                        return
+                                    raw = base64.b64decode(b64)
+                                    arr = np.frombuffer(raw, np.uint8)
+                                    img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
+                                    if img is not None:
+                                        pipeline = get_pipeline()
+                                        pipeline.set_reference_frame(img)
+                                        channel.send(json.dumps({"type": "reference_ack"}))
+                                except Exception as e:
+                                    channel.send(json.dumps({"type": "error", "message": str(e)}))
+                    except Exception as e:
+                        logger.error(f"Data channel message error: {e}")
+    @pc.on("connectionstatechange")
+    async def on_state_change():
+            logger.info("Peer connection state: %s", pc.connectionState)
+            if pc.connectionState in ("failed", "closed", "disconnected"):
+                try:
+                    await pc.close()
+                except Exception:
+                    pass
+    # Set remote description
+    try:
+        desc = RTCSessionDescription(sdp=offer["sdp"], type=offer["type"])
+        await pc.setRemoteDescription(desc)
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=f"Invalid SDP offer: {e}")
+    # Attach incoming tracks and re-add outbound processed tracks
+    @pc.on("track")
+    def on_track(track):
+        logger.info("Track received: %s", track.kind)
+        if track.kind == "video":
+            local = IncomingVideoTrack(track)
+            pc.addTrack(local)
+        elif track.kind == "audio":
+            local_a = IncomingAudioTrack(track)
+            pc.addTrack(local_a)
+        # Create answer
+    answer = await pc.createAnswer()
+    # Prefer H264 for broader compatibility (fallback to as-is if munging fails)
+    patched_sdp = _prefer_codec(answer.sdp, 'video', os.getenv('MIRAGE_PREFERRED_VIDEO_CODEC', 'H264'))
+    answer = RTCSessionDescription(sdp=patched_sdp, type=answer.type)
+    await pc.setLocalDescription(answer)
+    _peer_state = PeerState(pc=pc, created=time.time())
+    logger.info("WebRTC answer created")
+    return {"sdp": pc.localDescription.sdp, "type": pc.localDescription.type}
+@router.get("/token")
+async def mint_token():
+    """Return a short-lived signed token that can be used as X-Auth-Token.
+    Public endpoint; signature uses server-held API key, if configured.
+    """
+    if not API_KEY:
+        raise HTTPException(status_code=400, detail="API key not configured")
+    return {"token": _mint_token(), "ttl": TOKEN_TTL_SECONDS}
+@router.post("/cleanup")
+async def cleanup_peer(x_api_key: Optional[str] = Header(default=None)):
+    _check_api_key(x_api_key)
+    async with _peer_lock:
+        global _peer_state
+        if _peer_state is None:
+            return {"status": "no_peer"}
+        try:
+            await _peer_state.pc.close()
+        except Exception:
+            pass
+        _peer_state = None
+        return {"status": "closed"}