Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

router/__init__.py +0 -0
router/agents.py +344 -0
router/main.py +504 -0
router/metrics.py +150 -0

router/__init__.py ADDED Viewed

File without changes

router/agents.py ADDED Viewed

	@@ -0,0 +1,344 @@

+"""EDEN OS V2 — Grok-Powered Intelligent Pipeline Agents.
+Four specialized agents + AgentManager coordinator.
+Primary: xAI Grok API for fast intelligent pipeline switching.
+Fallback: Static SIZE_ORDER routing with zero API calls.
+All agents are optional — if XAI_API_KEY not set, falls back to static routing.
+Uses Grok-4-fast (non-reasoning) for sub-second decisions with 2s timeout, 10s caching.
+"""
+import asyncio
+import json
+import logging
+import os
+import time
+from openai import AsyncOpenAI
+from .metrics import MetricsStore, SIZE_ORDER, PIPELINE_NAMES, PIPELINE_SIZES_GB
+logger = logging.getLogger("eden.agents")
+_xai_key = os.environ.get("XAI_API_KEY", "")
+AGENTS_ENABLED = bool(_xai_key)
+class BaseAgent:
+    """Base class for all Grok-powered agents."""
+    def __init__(self, name: str):
+        self.name = name
+        self.enabled = AGENTS_ENABLED
+        self._client: AsyncOpenAI | None = None
+        self._cache: dict[str, tuple[float, any]] = {}
+        self._cache_ttl = 10.0  # seconds
+    def _get_client(self) -> AsyncOpenAI | None:
+        if self._client is None and self.enabled:
+            self._client = AsyncOpenAI(
+                api_key=_xai_key,
+                base_url="https://api.x.ai/v1",
+            )
+        return self._client
+    def _get_cached(self, key: str):
+        if key in self._cache:
+            ts, val = self._cache[key]
+            if time.time() - ts < self._cache_ttl:
+                return val
+        return None
+    def _set_cached(self, key: str, val):
+        self._cache[key] = (time.time(), val)
+    async def _ask_grok(self, system: str, prompt: str, max_tokens: int = 200) -> str:
+        """Send a message to Grok-4-fast with 2s timeout. Returns empty string on failure."""
+        client = self._get_client()
+        if not client:
+            return ""
+        try:
+            resp = await asyncio.wait_for(
+                client.chat.completions.create(
+                    model="grok-4-fast-non-reasoning",
+                    messages=[
+                        {"role": "system", "content": system},
+                        {"role": "user", "content": prompt},
+                    ],
+                    max_tokens=max_tokens,
+                    temperature=0.0,
+                ),
+                timeout=2.0,
+            )
+            return resp.choices[0].message.content
+        except asyncio.TimeoutError:
+            logger.warning(f"Agent {self.name}: Grok timeout (2s)")
+            return ""
+        except Exception as e:
+            logger.warning(f"Agent {self.name}: Grok error: {e}")
+            return ""
+    def _parse_json(self, text: str) -> dict | list | None:
+        """Safely parse JSON from Grok response, handling markdown fences."""
+        if not text:
+            return None
+        # Strip markdown code fences if present
+        cleaned = text.strip()
+        if cleaned.startswith("```"):
+            lines = cleaned.split("\n")
+            lines = [l for l in lines if not l.strip().startswith("```")]
+            cleaned = "\n".join(lines).strip()
+        try:
+            return json.loads(cleaned)
+        except (json.JSONDecodeError, ValueError):
+            return None
+class PipelineSpeedAgent(BaseAgent):
+    """Monitors response times, dynamically reranks pipelines for fastest switching."""
+    def __init__(self):
+        super().__init__("speed")
+    async def evaluate(self, metrics_store: MetricsStore) -> list[int]:
+        """Return recommended pipeline order based on speed + success rate."""
+        cached = self._get_cached("order")
+        if cached is not None:
+            return cached
+        if not self.enabled:
+            return list(SIZE_ORDER)
+        all_metrics = metrics_store.get_all_metrics()
+        # Don't call Grok if we have insufficient data
+        total_calls = sum(all_metrics[pid].total_calls for pid in range(5))
+        if total_calls < 5:
+            return list(SIZE_ORDER)
+        summary_lines = []
+        for pid in SIZE_ORDER:
+            m = all_metrics[pid]
+            summary_lines.append(
+                f"P{pid} ({m.name}): size={m.model_size_gb}GB, "
+                f"avg_response={m.avg_response_time_ms:.0f}ms, "
+                f"success_rate={m.success_rate:.1%}, "
+                f"avg_motion={m.avg_motion_score:.3f}, "
+                f"calls={m.total_calls}"
+            )
+        result = await self._ask_grok(
+            system="You are an AI pipeline optimizer. Return ONLY a JSON array of integer pipeline IDs. No explanation.",
+            prompt=(
+                "Given these pipeline performance metrics:\n"
+                + "\n".join(summary_lines)
+                + "\n\nReturn the optimal pipeline order as a JSON array. "
+                "Prioritize: 1) fastest response time, 2) highest success rate, "
+                "3) smallest model size as tiebreaker."
+            ),
+        )
+        parsed = self._parse_json(result)
+        if isinstance(parsed, list) and len(parsed) == 5 and all(isinstance(x, int) for x in parsed):
+            self._set_cached("order", parsed)
+            logger.info(f"Speed agent recommended order: {parsed}")
+            return parsed
+        return list(SIZE_ORDER)
+class QualityIntelligenceAgent(BaseAgent):
+    """Analyzes motion score trends, predicts best pipeline for current conditions."""
+    def __init__(self):
+        super().__init__("quality")
+    async def evaluate(self, metrics_store: MetricsStore) -> dict:
+        """Return best pipeline recommendation."""
+        cached = self._get_cached("best")
+        if cached is not None:
+            return cached
+        all_metrics = metrics_store.get_all_metrics()
+        if not self.enabled:
+            best_pid = max(
+                SIZE_ORDER,
+                key=lambda pid: all_metrics[pid].avg_motion_score if all_metrics[pid].total_calls > 0 else 0.0,
+            )
+            return {"best_pipeline": best_pid, "confidence": 0.5, "reason": "static_fallback"}
+        total_calls = sum(all_metrics[pid].total_calls for pid in range(5))
+        if total_calls < 5:
+            return {"best_pipeline": SIZE_ORDER[0], "confidence": 0.3, "reason": "insufficient_data"}
+        summary_lines = []
+        for pid in range(5):
+            m = all_metrics[pid]
+            summary_lines.append(
+                f"P{pid} ({m.name}): avg_motion={m.avg_motion_score:.3f}, "
+                f"trend={m.trend}, calls={m.total_calls}, "
+                f"success_rate={m.success_rate:.1%}"
+            )
+        result = await self._ask_grok(
+            system="You are an animation quality analyst. Return ONLY JSON, no explanation.",
+            prompt=(
+                "Pipeline motion quality metrics:\n"
+                + "\n".join(summary_lines)
+                + '\n\nWhich pipeline produces the best animation? '
+                'Return: {"best_pipeline": <int>, "confidence": <0-1>, "reason": "<brief>"}'
+            ),
+        )
+        parsed = self._parse_json(result)
+        if isinstance(parsed, dict) and "best_pipeline" in parsed:
+            self._set_cached("best", parsed)
+            logger.info(f"Quality agent: best=P{parsed['best_pipeline']}, reason={parsed.get('reason')}")
+            return parsed
+        return {"best_pipeline": SIZE_ORDER[0], "confidence": 0.3, "reason": "agent_error"}
+class FailoverDecisionAgent(BaseAgent):
+    """Makes intelligent failover decisions instead of hard thresholds."""
+    def __init__(self):
+        super().__init__("failover")
+    async def evaluate(
+        self,
+        motion_score: float,
+        pipeline_id: int,
+        consecutive_bad: int,
+        metrics_store: MetricsStore,
+    ) -> dict:
+        """Decide whether to failover — smarter than a dumb threshold."""
+        # Hard failover: always trigger if completely static for 2+ checks
+        if motion_score == 0.0 and consecutive_bad >= 2:
+            return {"should_failover": True, "reason": "completely_static"}
+        if not self.enabled:
+            return {"should_failover": motion_score < 0.05, "reason": "threshold_fallback"}
+        m = metrics_store.get_metrics(pipeline_id)
+        if not m:
+            return {"should_failover": motion_score < 0.05, "reason": "no_metrics"}
+        result = await self._ask_grok(
+            system="You are a real-time systems reliability engineer. Minimize unnecessary failovers. Return ONLY JSON.",
+            prompt=(
+                f"Pipeline P{pipeline_id} ({PIPELINE_NAMES.get(pipeline_id, '?')}) status:\n"
+                f"- Current motion score: {motion_score:.3f} (threshold: 0.05)\n"
+                f"- Consecutive bad checks: {consecutive_bad}\n"
+                f"- Average motion score: {m.avg_motion_score:.3f}\n"
+                f"- Trend: {m.trend}\n"
+                f"- Success rate: {m.success_rate:.1%}\n"
+                f"- Total calls: {m.total_calls}\n\n"
+                "Should we failover? Consider: is it warming up? Is the trend improving? "
+                "Is unnecessary switching costly?\n"
+                'Return: {"should_failover": true/false, "reason": "<brief>"}'
+            ),
+        )
+        parsed = self._parse_json(result)
+        if isinstance(parsed, dict) and "should_failover" in parsed:
+            logger.info(
+                f"Failover agent: P{pipeline_id} motion={motion_score:.3f} → "
+                f"failover={parsed['should_failover']}, reason={parsed.get('reason')}"
+            )
+            return parsed
+        # Fallback to threshold
+        return {"should_failover": motion_score < 0.05, "reason": "agent_fallback"}
+class PreemptiveWarmupAgent(BaseAgent):
+    """Predicts which pipeline will be needed next and recommends pre-warming."""
+    def __init__(self):
+        super().__init__("warmup")
+    async def evaluate(self, current_pipeline_id: int, metrics_store: MetricsStore) -> dict:
+        """Recommend which pipeline to pre-warm."""
+        idx = SIZE_ORDER.index(current_pipeline_id) if current_pipeline_id in SIZE_ORDER else 0
+        next_pid = SIZE_ORDER[idx + 1] if idx + 1 < len(SIZE_ORDER) else None
+        if not self.enabled:
+            return {"warmup_pipeline": next_pid, "reason": "static_next_in_order"}
+        m = metrics_store.get_metrics(current_pipeline_id)
+        if not m:
+            return {"warmup_pipeline": next_pid, "reason": "no_metrics"}
+        result = await self._ask_grok(
+            system="You are a predictive infrastructure manager. Return ONLY JSON.",
+            prompt=(
+                f"Current pipeline: P{current_pipeline_id} ({PIPELINE_NAMES.get(current_pipeline_id)})\n"
+                f"- Trend: {m.trend}, Success rate: {m.success_rate:.1%}, Avg motion: {m.avg_motion_score:.3f}\n"
+                f"Available (by size): {[f'P{pid} ({PIPELINE_SIZES_GB[pid]}GB)' for pid in SIZE_ORDER]}\n"
+                'Which to pre-warm? Return: {"warmup_pipeline": <int or null>, "reason": "<brief>"}'
+            ),
+        )
+        parsed = self._parse_json(result)
+        if isinstance(parsed, dict) and "warmup_pipeline" in parsed:
+            logger.info(f"Warmup agent: recommend P{parsed['warmup_pipeline']}, reason={parsed.get('reason')}")
+            return parsed
+        return {"warmup_pipeline": next_pid, "reason": "agent_fallback"}
+class AgentManager:
+    """Coordinator for all pipeline intelligence agents.
+    Primary: Grok-4-fast via xAI API (fast, cheap, already paid for)
+    Fallback: Static SIZE_ORDER routing (zero API calls)
+    """
+    def __init__(self, metrics_store: MetricsStore):
+        self.metrics_store = metrics_store
+        self.speed_agent = PipelineSpeedAgent()
+        self.quality_agent = QualityIntelligenceAgent()
+        self.failover_agent = FailoverDecisionAgent()
+        self.warmup_agent = PreemptiveWarmupAgent()
+        self.enabled = AGENTS_ENABLED
+        logger.info(f"AgentManager initialized. Grok agents enabled: {self.enabled}")
+    async def get_routing_order(self, force_strong: bool = False) -> list[int]:
+        """Get agent-recommended pipeline order."""
+        if force_strong:
+            return list(reversed(SIZE_ORDER))
+        return await self.speed_agent.evaluate(self.metrics_store)
+    async def should_failover(self, motion_score: float, pipeline_id: int, consecutive_bad: int = 0) -> bool:
+        """Agent-enhanced failover decision."""
+        result = await self.failover_agent.evaluate(
+            motion_score=motion_score,
+            pipeline_id=pipeline_id,
+            consecutive_bad=consecutive_bad,
+            metrics_store=self.metrics_store,
+        )
+        return result.get("should_failover", motion_score < 0.05)
+    async def recommend_warmup(self, current_pipeline_id: int) -> int | None:
+        """Get warmup recommendation."""
+        result = await self.warmup_agent.evaluate(current_pipeline_id, self.metrics_store)
+        return result.get("warmup_pipeline")
+    async def get_quality_recommendation(self) -> dict:
+        """Get quality agent's pipeline recommendation."""
+        return await self.quality_agent.evaluate(self.metrics_store)
+    def status(self) -> dict:
+        return {
+            "agents_enabled": self.enabled,
+            "engine": "grok-4-fast" if self.enabled else "static",
+            "xai_key_set": bool(_xai_key),
+            "agents": {
+                "speed": self.speed_agent.enabled,
+                "quality": self.quality_agent.enabled,
+                "failover": self.failover_agent.enabled,
+                "warmup": self.warmup_agent.enabled,
+            },
+        }

router/main.py ADDED Viewed

	@@ -0,0 +1,504 @@

+"""EDEN OS V2 — Intelligent Pipeline Router.
+Dual-Track Architecture:
+  Track 1 (Main Pipeline): Smallest/fastest pipeline first (P4 → P0 → P2)
+  Track 2 (Backup Router): Escalates through remaining (P3 → P1)
+5 Auto-Routing Features (preserved):
+  Feature 1: Pre-Warm Greeting Sequence
+  Feature 2: Dedicated Eve-Greeting Sub-Pipeline (force strongest)
+  Feature 3: Auto-Retry with Immediate Failover (up to 3 retries)
+  Feature 4: Heartbeat Pre-Check (quality gate before client delivery)
+  Feature 5: Client-Side Animation Fallback flag + server push
+Intelligence Layer:
+  - 4 Claude-powered agents for speed, quality, failover, and warmup decisions
+  - Graceful fallback to static SIZE_ORDER if agents unavailable
+RunPod Cost Management:
+  - 5-minute idle sleep timer (auto-stop pod after inactivity)
+"""
+import asyncio
+import base64
+import logging
+import os
+import time
+from enum import Enum
+import httpx
+import numpy as np
+from fastapi import FastAPI
+from pydantic import BaseModel
+from pydantic_settings import BaseSettings
+from .metrics import MetricsStore, SIZE_ORDER, PIPELINE_SIZES_GB, PIPELINE_NAMES
+from .agents import AgentManager
+logger = logging.getLogger("eden.router")
+class RouterSettings(BaseSettings):
+    p0_url: str = "http://pipeline0:8010"
+    p1_url: str = "http://pipeline1:8011"
+    p2_url: str = "http://pipeline2:8012"
+    p3_url: str = "http://pipeline3:8013"
+    p4_url: str = "http://pipeline4:8014"
+    models_dir: str = "/models"
+    xai_api_key: str = ""
+    runpod_idle_timeout_s: float = 600.0  # 10 minutes
+    runpod_api_key: str = ""
+    runpod_pod_id: str = ""
+    model_config = {"env_file": ".env", "extra": "ignore"}
+cfg = RouterSettings()
+app = FastAPI(title="EDEN Router", version="2.1.0")
+# ── Pipeline Status ──────────────────────────────────────────────────────────
+class PipelineStatus(str, Enum):
+    COLD = "cold"
+    WARMING = "warming"
+    READY = "ready"
+    BUSY = "busy"
+    FAILED = "failed"
+class PipelineInfo:
+    def __init__(self, pid: int, name: str, url: str, size_gb: float):
+        self.pid = pid
+        self.name = name
+        self.url = url
+        self.size_gb = size_gb
+        self.status = PipelineStatus.COLD
+        self.last_motion_score: float = 0.0
+        self.fail_count: int = 0
+    def to_dict(self):
+        return {
+            "id": self.pid,
+            "name": self.name,
+            "status": self.status.value,
+            "size_gb": self.size_gb,
+            "last_motion_score": self.last_motion_score,
+            "fail_count": self.fail_count,
+        }
+# ── Pipeline Registry (ordered by SIZE: smallest → largest) ──────────────────
+_pipeline_urls = {
+    0: cfg.p0_url,
+    1: cfg.p1_url,
+    2: cfg.p2_url,
+    3: cfg.p3_url,
+    4: cfg.p4_url,
+}
+pipelines_by_id: dict[int, PipelineInfo] = {
+    pid: PipelineInfo(
+        pid=pid,
+        name=PIPELINE_NAMES[pid],
+        url=_pipeline_urls[pid],
+        size_gb=PIPELINE_SIZES_GB[pid],
+    )
+    for pid in range(5)
+}
+# Canonical order: smallest → largest (for iteration and failover)
+pipelines: list[PipelineInfo] = [pipelines_by_id[pid] for pid in SIZE_ORDER]
+# ── Intelligence Layer ───────────────────────────────────────────────────────
+metrics_store = MetricsStore()
+agent_manager = AgentManager(metrics_store)
+# ── RunPod Idle Sleep Timer ──────────────────────────────────────────────────
+_last_activity: float = time.time()
+_sleep_task: asyncio.Task | None = None
+def _touch_activity():
+    """Reset the idle timer on any request."""
+    global _last_activity
+    _last_activity = time.time()
+async def _idle_sleep_loop():
+    """Monitor for inactivity and stop RunPod pod after 5 minutes idle."""
+    while True:
+        await asyncio.sleep(30)  # check every 30s
+        idle_seconds = time.time() - _last_activity
+        if idle_seconds >= cfg.runpod_idle_timeout_s:
+            logger.warning(
+                f"RunPod idle for {idle_seconds:.0f}s (limit={cfg.runpod_idle_timeout_s:.0f}s). "
+                "Requesting pod stop..."
+            )
+            await _stop_runpod_pod()
+            break  # stop the loop after requesting shutdown
+async def _stop_runpod_pod():
+    """Stop the RunPod pod via API to save costs."""
+    if not cfg.runpod_api_key or not cfg.runpod_pod_id:
+        logger.info("RunPod auto-sleep: no API key or pod ID configured, skipping")
+        return
+    try:
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            resp = await client.post(
+                f"https://api.runpod.io/v2/{cfg.runpod_pod_id}/stop",
+                headers={"Authorization": f"Bearer {cfg.runpod_api_key}"},
+            )
+            logger.info(f"RunPod pod stop requested: {resp.status_code}")
+    except Exception as e:
+        logger.error(f"Failed to stop RunPod pod: {e}")
+# ── Request Models ───────────────────────────────────────────────────────────
+class AnimateRequest(BaseModel):
+    audio_b64: str
+    reference_image: str = "eve-NATURAL.png"
+    force_strong: bool = False
+    request_id: str = ""
+class EvaluateFailoverRequest(BaseModel):
+    pipeline_id: int
+    motion_score: float
+    consecutive_bad: int = 0
+    frame_count: int = 0
+# ── Motion Scoring ───────────────────────────────────────────────────────────
+def compute_motion_score(frames_b64: list[str]) -> float:
+    """Compute motion score from base64-encoded frames. Returns 0.0–1.0."""
+    if len(frames_b64) < 2:
+        return 0.0
+    try:
+        decoded = []
+        for f in frames_b64[:15]:
+            raw = base64.b64decode(f)
+            arr = np.frombuffer(raw, dtype=np.uint8)
+            decoded.append(arr)
+        diffs = []
+        for i in range(1, len(decoded)):
+            min_len = min(len(decoded[i - 1]), len(decoded[i]))
+            diff = np.mean(np.abs(decoded[i][:min_len].astype(float) - decoded[i - 1][:min_len].astype(float)))
+            diffs.append(diff)
+        avg_diff = np.mean(diffs) if diffs else 0.0
+        return float(min(1.0, avg_diff / 25.0))
+    except Exception as e:
+        logger.warning(f"Motion score computation failed: {e}")
+        return 0.5
+# ── Dual-Track Architecture ─────────────────────────────────────────────────
+def _reset_stale_failures():
+    """Reset pipelines that were marked failed during startup or transiently.
+    This allows recovery when a pipeline comes online after the router starts."""
+    for p in pipelines:
+        if p.status == PipelineStatus.FAILED and p.fail_count < 10:
+            p.status = PipelineStatus.COLD
+            p.fail_count = 0
+            logger.info(f"Reset stale failure for {p.name}")
+class MainPipeline:
+    """Track 1: Always tries the smallest/fastest available pipeline."""
+    @staticmethod
+    def get_primary(agent_order: list[int] | None = None) -> PipelineInfo | None:
+        order = agent_order or SIZE_ORDER
+        for pid in order:
+            p = pipelines_by_id.get(pid)
+            if p and p.status != PipelineStatus.FAILED:
+                return p
+        # If all failed, reset and try again (recovery from startup failures)
+        _reset_stale_failures()
+        for pid in order:
+            p = pipelines_by_id.get(pid)
+            if p:
+                return p
+        return None
+class BackupRouter:
+    """Track 2: Escalates through remaining pipelines on failure."""
+    @staticmethod
+    def get_escalation_order(exclude_pid: int, agent_order: list[int] | None = None) -> list[PipelineInfo]:
+        order = agent_order or SIZE_ORDER
+        result = []
+        for pid in order:
+            if pid == exclude_pid:
+                continue
+            p = pipelines_by_id.get(pid)
+            if p and p.status != PipelineStatus.FAILED:
+                result.append(p)
+        # If empty, reset failures and try again
+        if not result:
+            _reset_stale_failures()
+            for pid in order:
+                if pid == exclude_pid:
+                    continue
+                p = pipelines_by_id.get(pid)
+                if p:
+                    result.append(p)
+        return result
+# ── Pipeline Communication ───────────────────────────────────────────────────
+async def _call_pipeline(pipeline: PipelineInfo, request: AnimateRequest) -> dict | None:
+    """Call a single pipeline and return result or None on failure."""
+    t0 = time.time()
+    try:
+        async with httpx.AsyncClient(timeout=90.0) as client:
+            resp = await client.post(
+                f"{pipeline.url}/animate",
+                json={
+                    "audio_b64": request.audio_b64,
+                    "reference_image": request.reference_image,
+                    "request_id": request.request_id,
+                },
+            )
+            elapsed_ms = (time.time() - t0) * 1000
+            if resp.status_code == 200:
+                result = resp.json()
+                pipeline.status = PipelineStatus.READY
+                pipeline.fail_count = 0
+                # Record success metrics
+                motion = compute_motion_score(result.get("frames", []))
+                metrics_store.record_call(pipeline.pid, elapsed_ms, motion, success=True)
+                return result
+            else:
+                pipeline.fail_count += 1
+                metrics_store.record_call(pipeline.pid, elapsed_ms, 0.0, success=False)
+                logger.warning(f"Pipeline {pipeline.name} returned {resp.status_code}")
+                return None
+    except Exception as e:
+        elapsed_ms = (time.time() - t0) * 1000
+        pipeline.fail_count += 1
+        pipeline.status = PipelineStatus.FAILED
+        metrics_store.record_call(pipeline.pid, elapsed_ms, 0.0, success=False)
+        logger.error(f"Pipeline {pipeline.name} call failed: {e}")
+        return None
+# ── Feature 1: Pre-Warm ─────────────────────────────────────────────────────
+prewarm_scores: dict[int, float] = {}
+async def _prewarm_pipeline(pipeline: PipelineInfo):
+    """Run a silent test animation to warm up the pipeline."""
+    t0 = time.time()
+    try:
+        pipeline.status = PipelineStatus.WARMING
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            resp = await client.post(
+                f"{pipeline.url}/warmup",
+                json={"reference_image": "eve-NATURAL.png"},
+            )
+            load_ms = (time.time() - t0) * 1000
+            if resp.status_code == 200:
+                data = resp.json()
+                score = data.get("motion_score", 0.0)
+                prewarm_scores[pipeline.pid] = score
+                pipeline.status = PipelineStatus.READY
+                pipeline.last_motion_score = score
+                metrics_store.record_load_time(pipeline.pid, load_ms)
+                logger.info(f"Pre-warm {pipeline.name} ({pipeline.size_gb}GB): score={score:.2f}, load={load_ms:.0f}ms")
+            else:
+                pipeline.status = PipelineStatus.COLD
+    except Exception as e:
+        logger.warning(f"Pre-warm {pipeline.name} failed: {e}")
+        pipeline.status = PipelineStatus.COLD
+# ── Main Animation Endpoint ─────────────────────────────────────────────────
+@app.post("/animate")
+async def animate(request: AnimateRequest):
+    """Route animation through dual-track architecture with agent intelligence."""
+    _touch_activity()
+    t0 = time.time()
+    # Get agent-recommended pipeline order
+    agent_order = await agent_manager.get_routing_order(force_strong=request.force_strong)
+    logger.info(f"Routing order: {[PIPELINE_NAMES[pid] for pid in agent_order]} (force_strong={request.force_strong})")
+    # Track 1: Try primary pipeline (smallest/fastest)
+    primary = MainPipeline.get_primary(agent_order)
+    if not primary:
+        return {"frames": [], "pipeline_used": "none", "error": "No pipelines available"}
+    logger.info(f"Track 1 — Primary: {primary.name} ({primary.size_gb}GB)")
+    result = await _call_pipeline(primary, request)
+    if result:
+        frames = result.get("frames", [])
+        if frames:
+            motion_score = compute_motion_score(frames)
+            primary.last_motion_score = motion_score
+            # Feature 4: Heartbeat Pre-Check
+            should_fail = await agent_manager.should_failover(motion_score, primary.pid)
+            if not should_fail:
+                elapsed = time.time() - t0
+                logger.info(f"Track 1 SUCCESS: {primary.name}, motion={motion_score:.3f}")
+                # Fire-and-forget: agent recommends next warmup
+                asyncio.create_task(_agent_warmup(primary.pid))
+                return {
+                    "frames": frames,
+                    "pipeline_used": primary.name,
+                    "pipeline_id": primary.pid,
+                    "motion_score": round(motion_score, 3),
+                    "attempt": 1,
+                    "elapsed_s": round(elapsed, 2),
+                    "track": "main",
+                    "force_strong_pipeline": request.force_strong,
+                }
+            else:
+                logger.warning(f"Track 1 QUALITY FAIL: {primary.name} motion={motion_score:.3f}, escalating to Track 2")
+                primary.status = PipelineStatus.FAILED
+    # Track 2: Backup Router — escalate through remaining pipelines
+    backups = BackupRouter.get_escalation_order(primary.pid, agent_order)
+    logger.info(f"Track 2 — Backups: {[p.name for p in backups]}")
+    for attempt, backup in enumerate(backups[:3]):  # Feature 3: up to 3 retries
+        logger.info(f"Track 2 attempt {attempt + 1}: trying {backup.name} ({backup.size_gb}GB)")
+        result = await _call_pipeline(backup, request)
+        if result is None:
+            continue
+        frames = result.get("frames", [])
+        if not frames:
+            continue
+        motion_score = compute_motion_score(frames)
+        backup.last_motion_score = motion_score
+        should_fail = await agent_manager.should_failover(motion_score, backup.pid)
+        if should_fail:
+            logger.warning(f"Track 2: {backup.name} motion={motion_score:.3f}, trying next...")
+            backup.status = PipelineStatus.FAILED
+            continue
+        elapsed = time.time() - t0
+        logger.info(f"Track 2 SUCCESS: {backup.name}, motion={motion_score:.3f}")
+        return {
+            "frames": frames,
+            "pipeline_used": backup.name,
+            "pipeline_id": backup.pid,
+            "motion_score": round(motion_score, 3),
+            "attempt": attempt + 2,
+            "elapsed_s": round(elapsed, 2),
+            "track": "backup",
+            "force_strong_pipeline": request.force_strong,
+        }
+    # All failed
+    elapsed = time.time() - t0
+    return {
+        "frames": [],
+        "pipeline_used": "none",
+        "error": "All pipelines failed or produced static frames",
+        "elapsed_s": round(elapsed, 2),
+        "force_strong_pipeline": True,  # Feature 5: CSS fallback
+    }
+async def _agent_warmup(current_pid: int):
+    """Fire-and-forget: ask agent which pipeline to pre-warm next."""
+    try:
+        warmup_pid = await agent_manager.recommend_warmup(current_pid)
+        if warmup_pid is not None and warmup_pid in pipelines_by_id:
+            p = pipelines_by_id[warmup_pid]
+            if p.status == PipelineStatus.COLD:
+                logger.info(f"Agent warmup: pre-warming {p.name}")
+                await _prewarm_pipeline(p)
+    except Exception as e:
+        logger.warning(f"Agent warmup failed: {e}")
+# ── Failover Endpoints ──────────────────────────────────────────────────────
+@app.post("/failover")
+async def failover(pipeline_id: int):
+    """Watchdog-triggered failover: mark pipeline as failed."""
+    _touch_activity()
+    p = pipelines_by_id.get(pipeline_id)
+    if p:
+        p.status = PipelineStatus.FAILED
+        p.fail_count += 1
+        logger.warning(f"Failover: {p.name} marked FAILED (count={p.fail_count})")
+    return {"status": "ok", "pipelines": [p.to_dict() for p in pipelines]}
+@app.post("/evaluate-failover")
+async def evaluate_failover(request: EvaluateFailoverRequest):
+    """Agent-enhanced failover decision (called by watchdog)."""
+    _touch_activity()
+    should = await agent_manager.should_failover(
+        motion_score=request.motion_score,
+        pipeline_id=request.pipeline_id,
+        consecutive_bad=request.consecutive_bad,
+    )
+    return {"should_failover": should}
+# ── Status & Metrics Endpoints ───────────────────────────────────────────────
+@app.get("/health")
+async def health():
+    return {
+        "status": "ok",
+        "version": "2.1.0",
+        "pipelines": [p.to_dict() for p in pipelines],
+        "pipeline_order": [PIPELINE_NAMES[pid] for pid in SIZE_ORDER],
+        "prewarm_scores": prewarm_scores,
+        "agents": agent_manager.status(),
+        "idle_timeout_s": cfg.runpod_idle_timeout_s,
+    }
+@app.get("/status")
+async def status():
+    return {
+        "pipelines": [p.to_dict() for p in pipelines],
+        "pipeline_order": SIZE_ORDER,
+        "agents": agent_manager.status(),
+    }
+@app.get("/metrics")
+async def metrics():
+    """Pipeline performance metrics."""
+    return metrics_store.get_summary()
+# ── Startup ─────────────────────────────────────────────────────────────────
+@app.on_event("startup")
+async def startup():
+    global _sleep_task
+    logger.info("=" * 60)
+    logger.info("EDEN Router v2.1.0 starting...")
+    logger.info(f"  Pipeline order (size): {[f'{PIPELINE_NAMES[pid]} ({PIPELINE_SIZES_GB[pid]}GB)' for pid in SIZE_ORDER]}")
+    logger.info(f"  Agents enabled: {agent_manager.enabled}")
+    logger.info(f"  RunPod idle timeout: {cfg.runpod_idle_timeout_s}s")
+    logger.info("=" * 60)
+    # Feature 1: Pre-Warm in SIZE_ORDER (smallest first = fastest to warm)
+    logger.info("Pre-warming pipelines (smallest first)...")
+    for pid in SIZE_ORDER:
+        p = pipelines_by_id[pid]
+        await _prewarm_pipeline(p)
+    logger.info(f"Pre-warm complete. Scores: {prewarm_scores}")
+    # Start RunPod idle sleep timer
+    _sleep_task = asyncio.create_task(_idle_sleep_loop())
+    logger.info("RunPod idle sleep timer started (5 min)")

router/metrics.py ADDED Viewed

	@@ -0,0 +1,150 @@

+"""EDEN OS V2 — Pipeline Performance Metrics.
+Tracks per-pipeline response times, motion scores, success rates,
+and load times in a thread-safe sliding window.
+"""
+import threading
+import time
+from collections import deque
+from dataclasses import dataclass, field
+# Pipeline sizes in GB — determines failover order (smallest = fastest to load)
+PIPELINE_SIZES_GB: dict[int, float] = {
+    4: 1.26,   # LiveAvatar FP8
+    0: 6.37,   # MuseTalk
+    2: 6.45,   # Ditto
+    3: 18.49,  # StableAvatar
+    1: 85.02,  # InfiniteTalk
+}
+# Canonical failover order: smallest → largest
+SIZE_ORDER: list[int] = [4, 0, 2, 3, 1]
+PIPELINE_NAMES: dict[int, str] = {
+    0: "musetalk",
+    1: "infinitetalk",
+    2: "ditto",
+    3: "stableavatar",
+    4: "liveavatar",
+}
+@dataclass
+class PipelineMetrics:
+    """Performance metrics for a single pipeline."""
+    pipeline_id: int
+    name: str = ""
+    model_size_gb: float = 0.0
+    response_times_ms: deque = field(default_factory=lambda: deque(maxlen=50))
+    motion_scores: deque = field(default_factory=lambda: deque(maxlen=50))
+    success_count: int = 0
+    failure_count: int = 0
+    total_calls: int = 0
+    last_load_time_ms: float = 0.0
+    estimated_memory_mb: float = 0.0
+    last_updated: float = 0.0
+    @property
+    def avg_response_time_ms(self) -> float:
+        if not self.response_times_ms:
+            return 0.0
+        return sum(self.response_times_ms) / len(self.response_times_ms)
+    @property
+    def success_rate(self) -> float:
+        if self.total_calls == 0:
+            return 0.0
+        return self.success_count / self.total_calls
+    @property
+    def avg_motion_score(self) -> float:
+        if not self.motion_scores:
+            return 0.0
+        return sum(self.motion_scores) / len(self.motion_scores)
+    @property
+    def trend(self) -> str:
+        """Compare last 10 motion scores to previous 10."""
+        scores = list(self.motion_scores)
+        if len(scores) < 10:
+            return "insufficient_data"
+        recent = scores[-10:]
+        previous = scores[-20:-10] if len(scores) >= 20 else scores[:10]
+        recent_avg = sum(recent) / len(recent)
+        previous_avg = sum(previous) / len(previous)
+        diff = recent_avg - previous_avg
+        if diff > 0.05:
+            return "improving"
+        elif diff < -0.05:
+            return "degrading"
+        return "stable"
+    def to_dict(self) -> dict:
+        return {
+            "pipeline_id": self.pipeline_id,
+            "name": self.name,
+            "model_size_gb": self.model_size_gb,
+            "avg_response_time_ms": round(self.avg_response_time_ms, 1),
+            "avg_motion_score": round(self.avg_motion_score, 3),
+            "success_rate": round(self.success_rate, 3),
+            "success_count": self.success_count,
+            "failure_count": self.failure_count,
+            "total_calls": self.total_calls,
+            "last_load_time_ms": round(self.last_load_time_ms, 1),
+            "trend": self.trend,
+            "last_updated": self.last_updated,
+        }
+class MetricsStore:
+    """Thread-safe metrics storage for all pipelines."""
+    def __init__(self):
+        self._lock = threading.Lock()
+        self._metrics: dict[int, PipelineMetrics] = {}
+        for pid in range(5):
+            self._metrics[pid] = PipelineMetrics(
+                pipeline_id=pid,
+                name=PIPELINE_NAMES.get(pid, f"pipeline{pid}"),
+                model_size_gb=PIPELINE_SIZES_GB.get(pid, 0.0),
+            )
+    def record_call(self, pid: int, response_time_ms: float, motion_score: float, success: bool):
+        with self._lock:
+            m = self._metrics.get(pid)
+            if not m:
+                return
+            m.response_times_ms.append(response_time_ms)
+            m.motion_scores.append(motion_score)
+            m.total_calls += 1
+            if success:
+                m.success_count += 1
+            else:
+                m.failure_count += 1
+            m.last_updated = time.time()
+    def record_load_time(self, pid: int, ms: float):
+        with self._lock:
+            m = self._metrics.get(pid)
+            if m:
+                m.last_load_time_ms = ms
+                m.estimated_memory_mb = PIPELINE_SIZES_GB.get(pid, 0) * 1024 * 0.6
+    def get_metrics(self, pid: int) -> PipelineMetrics | None:
+        with self._lock:
+            return self._metrics.get(pid)
+    def get_all_metrics(self) -> dict[int, PipelineMetrics]:
+        with self._lock:
+            return dict(self._metrics)
+    def get_summary(self) -> dict:
+        with self._lock:
+            return {
+                "pipelines": {pid: m.to_dict() for pid, m in self._metrics.items()},
+                "size_order": SIZE_ORDER,
+                "total_calls": sum(m.total_calls for m in self._metrics.values()),
+            }