Spaces:

Shrot102
/

llmopt-server

Running

App Files Files Community

Shrot101 commited on 8 days ago

Commit

bd238e9

1 Parent(s): 2c126c1

feat: initialize core LLMOpt framework including model routing, optimization engines, and frontend dashboard infrastructure.

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.dockerignore +9 -0
.env.example +38 -8
.gitignore +8 -0
Dockerfile +0 -2
data/model_registry.json +20 -20
data/model_registry_v2.json +507 -0
docs/design.md +361 -0
frontend/.gitignore +24 -0
frontend/README.md +50 -0
frontend/eslint.config.js +28 -0
frontend/index.html +16 -0
frontend/package-lock.json +0 -0
frontend/package.json +38 -0
frontend/public/vite.svg +1 -0
frontend/src/App.css +42 -0
frontend/src/App.tsx +217 -0
frontend/src/api.ts +153 -0
frontend/src/assets/react.svg +1 -0
frontend/src/index.css +68 -0
frontend/src/main.tsx +10 -0
frontend/src/pages/Analytics.tsx +385 -0
frontend/src/pages/Login.tsx +209 -0
frontend/src/pages/ModelRegistry.tsx +354 -0
frontend/src/pages/Playground.tsx +606 -0
frontend/src/pages/Settings.tsx +349 -0
frontend/src/store.ts +88 -0
frontend/src/theme.css +1982 -0
frontend/src/types.ts +170 -0
frontend/src/vite-env.d.ts +1 -0
frontend/tsconfig.app.json +26 -0
frontend/tsconfig.json +7 -0
frontend/tsconfig.node.json +24 -0
frontend/vite.config.ts +40 -0
llmopt/analyzer/query_analyzer.py +15 -12
llmopt/api/app.py +580 -13
llmopt/api/crud.py +59 -0
llmopt/api/security.py +186 -0
llmopt/cache/redis_client.py +41 -0
llmopt/core.py +208 -37
llmopt/db/models.py +47 -0
llmopt/db/session.py +34 -0
llmopt/engine/__init__.py +15 -0
llmopt/engine/llmopt_engine.py +275 -0
llmopt/engine/optimization_engine.py +9 -2
llmopt/engine/utility_engine.py +665 -0
llmopt/registry/__init__.py +4 -0
llmopt/registry/hybrid_updater.py +267 -0
llmopt/router/model_router.py +37 -19
llmopt/updater/__init__.py +4 -0
llmopt/updater/adaptive_updater.py +268 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,9 @@

+.git
+.github
+__pycache__/
+*.py[cod]
+.pytest_cache/
+llmopt.db
+config/.env
+tests/
+.env

.env.example CHANGED Viewed

@@ -1,12 +1,42 @@
 # LLMOpt Environment Variables
-# OpenAI
-OPENAI_API_KEY=your_openai_api_key_here
-# Anthropic
-ANTHROPIC_API_KEY=your_anthropic_api_key_here
-# Redis Semantic Cache (V2)
-# Option 1: Local Docker -> redis://localhost:6379
-# Option 2: Redis Cloud -> redis://default:password@endpoint.redis-cloud.com:12345
-REDIS_URL=redis://localhost:6379

 # LLMOpt Environment Variables
+# ==========================================
+# 1. Database & Redis Session Cache (Production)
+# ==========================================
+# PostgreSQL Database URL (e.g. Neon, Supabase, etc.)
+# If not set, LLMOpt defaults to local SQLite.
+DATABASE_URL=postgresql://user:password@ep-cool-fog-12345.aws.neon.tech/neondb?sslmode=require
+# Upstash Redis or Redis Cloud connection string (Mandatory for sessions)
+REDIS_URL=redis://default:password@endpoint.upstash.io:30000
+# 32-byte base64-encoded session key for encrypting user API keys in transit/at rest.
+# Generate in python with: cryptography.fernet.Fernet.generate_key().decode()
+SESSION_SECRET_KEY=generate_your_own_32_byte_base64_key_here
+# Session Time-to-Live (TTL) in seconds (default: 7200 seconds / 2 hours)
+SESSION_TTL=7200
+# ==========================================
+# 2. Third-Party OAuth Sign-In (Optional)
+# ==========================================
+# Google OAuth
+GOOGLE_CLIENT_ID=your_google_client_id.apps.googleusercontent.com
+GOOGLE_CLIENT_SECRET=GOCSPX-your_google_client_secret_here
+# GitHub OAuth
+GITHUB_CLIENT_ID=your_github_client_id_here
+GITHUB_CLIENT_SECRET=your_github_client_secret_here
+# The base URL of the frontend for OAuth redirect callbacks (e.g. your Vercel URL)
+REDIRECT_URI_HOST=https://your-frontend.vercel.app
+# ==========================================
+# 3. Direct LLM Provider Keys (Fallback / Local run only)
+# ==========================================
+# In Bring Your Own Key (BYOK) mode, these are not stored on the server.
+# Provide them here only if running locally or using server-wide default keys.
+OPENAI_API_KEY=your_openai_api_key_here
+ANTHROPIC_API_KEY=your_anthropic_api_key_here
+GEMINI_API_KEY=your_gemini_api_key_here
+OLLAMA_API_BASE=http://localhost:11434

.gitignore CHANGED Viewed

@@ -51,3 +51,11 @@ coverage.xml
 .idea/
 *.swp
 *.swo

 .idea/
 *.swp
 *.swo
+# Runtime/Database files
+llmopt.db
+data/runtime_stats.json
+# Local Environment secrets
+/config/.env

Dockerfile CHANGED Viewed

@@ -2,10 +2,8 @@ FROM python:3.10-slim
 # Install system dependencies
 # build-essential is needed for some ML package wheels
-# redis-server is needed for the local Semantic Caching layer
 RUN apt-get update && apt-get install -y \
     build-essential \
-    redis-server \
     && rm -rf /var/lib/apt/lists/*
 # Set up a new user named "user" with user ID 1000 (Mandatory for Hugging Face Spaces)

 # Install system dependencies
 # build-essential is needed for some ML package wheels
 RUN apt-get update && apt-get install -y \
     build-essential \
     && rm -rf /var/lib/apt/lists/*
 # Set up a new user named "user" with user ID 1000 (Mandatory for Hugging Face Spaces)

data/model_registry.json CHANGED Viewed

@@ -84,32 +84,32 @@
     "notes": "Cheapest Anthropic model. Good for classification, summarization."
   },
   {
-    "model_name": "gemini-1.5-flash",
     "provider": "google",
-    "input_cost_per_1k": 0.000075,
-    "output_cost_per_1k": 0.000300,
-    "context_window": 1000000,
-    "reasoning_score": 0.74,
-    "coding_score": 0.74,
-    "math_score": 0.70,
-    "instruction_following_score": 0.78,
-    "latency_score": 0.88,
-    "max_complexity": 0.72,
-    "notes": "Extremely cheap and fast. Long context support."
   },
   {
-    "model_name": "gemini-1.5-pro",
     "provider": "google",
     "input_cost_per_1k": 0.00125,
-    "output_cost_per_1k": 0.005,
-    "context_window": 2000000,
-    "reasoning_score": 0.88,
-    "coding_score": 0.87,
-    "math_score": 0.85,
-    "instruction_following_score": 0.90,
-    "latency_score": 0.72,
     "max_complexity": 0.95,
-    "notes": "Massive context window. Great for long-doc analysis."
   },
   {
     "model_name": "mistral-small-latest",

     "notes": "Cheapest Anthropic model. Good for classification, summarization."
   },
   {
+    "model_name": "gemini-2.5-flash",
     "provider": "google",
+    "input_cost_per_1k": 0.00015,
+    "output_cost_per_1k": 0.0006,
+    "context_window": 1048576,
+    "reasoning_score": 0.83,
+    "coding_score": 0.82,
+    "math_score": 0.84,
+    "instruction_following_score": 0.85,
+    "latency_score": 0.90,
+    "max_complexity": 0.83,
+    "notes": "Very cheap and fast Gemini 2.5 model."
   },
   {
+    "model_name": "gemini-2.5-pro",
     "provider": "google",
     "input_cost_per_1k": 0.00125,
+    "output_cost_per_1k": 0.010,
+    "context_window": 1048576,
+    "reasoning_score": 0.94,
+    "coding_score": 0.92,
+    "math_score": 0.93,
+    "instruction_following_score": 0.92,
+    "latency_score": 0.75,
     "max_complexity": 0.95,
+    "notes": "Powerful Gemini 2.5 model with massive context window."
   },
   {
     "model_name": "mistral-small-latest",

data/model_registry_v2.json ADDED Viewed

	@@ -0,0 +1,507 @@

+{
+  "_meta": {
+    "version": "2.0.0",
+    "description": "LLMOpt Utility-Based Model Registry. Scores sourced from LMSYS Arena, Artificial Analysis, HumanEval, MMLU-Pro, MATH, IFEval benchmarks. Pricing from provider docs + OpenRouter. Updated via hybrid fetcher.",
+    "last_updated": "2025-01-01T00:00:00Z",
+    "score_range": "All capability scores normalized 0.0–1.0",
+    "pricing_unit": "USD per 1000 tokens"
+  },
+  "models": {
+    "gpt-4o": {
+      "provider": "openai",
+      "model_family": "gpt-4o",
+      "context_window": 128000,
+      "max_output_tokens": 16384,
+      "input_cost_per_1k": 0.0025,
+      "output_cost_per_1k": 0.010,
+      "avg_latency_ms": 1800,
+      "tokens_per_second": 80,
+      "capabilities": {
+        "reasoning": 0.92,
+        "coding": 0.91,
+        "math": 0.87,
+        "creativity": 0.88,
+        "factuality": 0.89,
+        "instruction_following": 0.94,
+        "long_context": 0.85,
+        "multilingual": 0.84,
+        "tool_use": 0.93,
+        "summarization": 0.90,
+        "conversation": 0.91
+      },
+      "features": {
+        "tool_calling": true,
+        "json_mode": true,
+        "streaming": true,
+        "image_input": true,
+        "function_calling": true
+      },
+      "benchmark_sources": ["lmsys_arena", "mmlu_pro", "humaneval", "math_benchmark", "ifeval"],
+      "pricing_last_updated": "2025-01-01T00:00:00Z",
+      "live_patch": {}
+    },
+    "gpt-4o-mini": {
+      "provider": "openai",
+      "model_family": "gpt-4o",
+      "context_window": 128000,
+      "max_output_tokens": 16384,
+      "input_cost_per_1k": 0.00015,
+      "output_cost_per_1k": 0.0006,
+      "avg_latency_ms": 900,
+      "tokens_per_second": 120,
+      "capabilities": {
+        "reasoning": 0.78,
+        "coding": 0.76,
+        "math": 0.72,
+        "creativity": 0.74,
+        "factuality": 0.75,
+        "instruction_following": 0.82,
+        "long_context": 0.76,
+        "multilingual": 0.72,
+        "tool_use": 0.80,
+        "summarization": 0.78,
+        "conversation": 0.82
+      },
+      "features": {
+        "tool_calling": true,
+        "json_mode": true,
+        "streaming": true,
+        "image_input": true,
+        "function_calling": true
+      },
+      "benchmark_sources": ["lmsys_arena", "mmlu_pro", "humaneval"],
+      "pricing_last_updated": "2025-01-01T00:00:00Z",
+      "live_patch": {}
+    },
+    "gpt-4.1": {
+      "provider": "openai",
+      "model_family": "gpt-4.1",
+      "context_window": 1047576,
+      "max_output_tokens": 32768,
+      "input_cost_per_1k": 0.002,
+      "output_cost_per_1k": 0.008,
+      "avg_latency_ms": 1600,
+      "tokens_per_second": 85,
+      "capabilities": {
+        "reasoning": 0.93,
+        "coding": 0.95,
+        "math": 0.88,
+        "creativity": 0.87,
+        "factuality": 0.90,
+        "instruction_following": 0.95,
+        "long_context": 0.97,
+        "multilingual": 0.85,
+        "tool_use": 0.95,
+        "summarization": 0.92,
+        "conversation": 0.90
+      },
+      "features": {
+        "tool_calling": true,
+        "json_mode": true,
+        "streaming": true,
+        "image_input": true,
+        "function_calling": true
+      },
+      "benchmark_sources": ["openai_evals", "swe_bench", "humaneval"],
+      "pricing_last_updated": "2025-01-01T00:00:00Z",
+      "live_patch": {}
+    },
+    "gpt-4.1-mini": {
+      "provider": "openai",
+      "model_family": "gpt-4.1",
+      "context_window": 1047576,
+      "max_output_tokens": 32768,
+      "input_cost_per_1k": 0.0004,
+      "output_cost_per_1k": 0.0016,
+      "avg_latency_ms": 750,
+      "tokens_per_second": 140,
+      "capabilities": {
+        "reasoning": 0.80,
+        "coding": 0.82,
+        "math": 0.75,
+        "creativity": 0.76,
+        "factuality": 0.78,
+        "instruction_following": 0.85,
+        "long_context": 0.92,
+        "multilingual": 0.74,
+        "tool_use": 0.83,
+        "summarization": 0.80,
+        "conversation": 0.83
+      },
+      "features": {
+        "tool_calling": true,
+        "json_mode": true,
+        "streaming": true,
+        "image_input": true,
+        "function_calling": true
+      },
+      "benchmark_sources": ["openai_evals"],
+      "pricing_last_updated": "2025-01-01T00:00:00Z",
+      "live_patch": {}
+    },
+    "claude-opus-4-5": {
+      "provider": "anthropic",
+      "model_family": "claude-4",
+      "context_window": 200000,
+      "max_output_tokens": 32000,
+      "input_cost_per_1k": 0.015,
+      "output_cost_per_1k": 0.075,
+      "avg_latency_ms": 2500,
+      "tokens_per_second": 65,
+      "capabilities": {
+        "reasoning": 0.96,
+        "coding": 0.95,
+        "math": 0.91,
+        "creativity": 0.95,
+        "factuality": 0.93,
+        "instruction_following": 0.96,
+        "long_context": 0.94,
+        "multilingual": 0.87,
+        "tool_use": 0.94,
+        "summarization": 0.95,
+        "conversation": 0.96
+      },
+      "features": {
+        "tool_calling": true,
+        "json_mode": true,
+        "streaming": true,
+        "image_input": true,
+        "function_calling": true
+      },
+      "benchmark_sources": ["lmsys_arena", "mmlu_pro", "swe_bench", "humaneval", "math_benchmark"],
+      "pricing_last_updated": "2025-01-01T00:00:00Z",
+      "live_patch": {}
+    },
+    "claude-sonnet-4-5": {
+      "provider": "anthropic",
+      "model_family": "claude-4",
+      "context_window": 200000,
+      "max_output_tokens": 16000,
+      "input_cost_per_1k": 0.003,
+      "output_cost_per_1k": 0.015,
+      "avg_latency_ms": 1400,
+      "tokens_per_second": 90,
+      "capabilities": {
+        "reasoning": 0.91,
+        "coding": 0.93,
+        "math": 0.86,
+        "creativity": 0.90,
+        "factuality": 0.90,
+        "instruction_following": 0.93,
+        "long_context": 0.91,
+        "multilingual": 0.84,
+        "tool_use": 0.92,
+        "summarization": 0.91,
+        "conversation": 0.92
+      },
+      "features": {
+        "tool_calling": true,
+        "json_mode": true,
+        "streaming": true,
+        "image_input": true,
+        "function_calling": true
+      },
+      "benchmark_sources": ["lmsys_arena", "mmlu_pro", "swe_bench", "humaneval"],
+      "pricing_last_updated": "2025-01-01T00:00:00Z",
+      "live_patch": {}
+    },
+    "claude-haiku-3-5": {
+      "provider": "anthropic",
+      "model_family": "claude-3.5",
+      "context_window": 200000,
+      "max_output_tokens": 8192,
+      "input_cost_per_1k": 0.0008,
+      "output_cost_per_1k": 0.004,
+      "avg_latency_ms": 700,
+      "tokens_per_second": 150,
+      "capabilities": {
+        "reasoning": 0.74,
+        "coding": 0.77,
+        "math": 0.68,
+        "creativity": 0.72,
+        "factuality": 0.73,
+        "instruction_following": 0.80,
+        "long_context": 0.78,
+        "multilingual": 0.72,
+        "tool_use": 0.78,
+        "summarization": 0.76,
+        "conversation": 0.80
+      },
+      "features": {
+        "tool_calling": true,
+        "json_mode": true,
+        "streaming": true,
+        "image_input": true,
+        "function_calling": true
+      },
+      "benchmark_sources": ["lmsys_arena", "humaneval"],
+      "pricing_last_updated": "2025-01-01T00:00:00Z",
+      "live_patch": {}
+    },
+    "gemini-2.5-pro": {
+      "provider": "google",
+      "model_family": "gemini-2.5",
+      "context_window": 1048576,
+      "max_output_tokens": 65536,
+      "input_cost_per_1k": 0.00125,
+      "output_cost_per_1k": 0.010,
+      "avg_latency_ms": 2000,
+      "tokens_per_second": 75,
+      "capabilities": {
+        "reasoning": 0.94,
+        "coding": 0.92,
+        "math": 0.93,
+        "creativity": 0.88,
+        "factuality": 0.91,
+        "instruction_following": 0.92,
+        "long_context": 0.98,
+        "multilingual": 0.90,
+        "tool_use": 0.89,
+        "summarization": 0.92,
+        "conversation": 0.89
+      },
+      "features": {
+        "tool_calling": true,
+        "json_mode": true,
+        "streaming": true,
+        "image_input": true,
+        "function_calling": true
+      },
+      "benchmark_sources": ["lmsys_arena", "mmlu_pro", "math_benchmark", "humaneval"],
+      "pricing_last_updated": "2025-01-01T00:00:00Z",
+      "live_patch": {}
+    },
+    "gemini-2.5-flash": {
+      "provider": "google",
+      "model_family": "gemini-2.5",
+      "context_window": 1048576,
+      "max_output_tokens": 65536,
+      "input_cost_per_1k": 0.00015,
+      "output_cost_per_1k": 0.0006,
+      "avg_latency_ms": 800,
+      "tokens_per_second": 130,
+      "capabilities": {
+        "reasoning": 0.83,
+        "coding": 0.82,
+        "math": 0.84,
+        "creativity": 0.80,
+        "factuality": 0.82,
+        "instruction_following": 0.85,
+        "long_context": 0.95,
+        "multilingual": 0.84,
+        "tool_use": 0.82,
+        "summarization": 0.83,
+        "conversation": 0.84
+      },
+      "features": {
+        "tool_calling": true,
+        "json_mode": true,
+        "streaming": true,
+        "image_input": true,
+        "function_calling": true
+      },
+      "benchmark_sources": ["lmsys_arena", "mmlu_pro", "humaneval"],
+      "pricing_last_updated": "2025-01-01T00:00:00Z",
+      "live_patch": {}
+    },
+    "mistral-large-latest": {
+      "provider": "mistral",
+      "model_family": "mistral-large",
+      "context_window": 128000,
+      "max_output_tokens": 8192,
+      "input_cost_per_1k": 0.003,
+      "output_cost_per_1k": 0.009,
+      "avg_latency_ms": 1600,
+      "tokens_per_second": 75,
+      "capabilities": {
+        "reasoning": 0.82,
+        "coding": 0.82,
+        "math": 0.78,
+        "creativity": 0.78,
+        "factuality": 0.80,
+        "instruction_following": 0.84,
+        "long_context": 0.78,
+        "multilingual": 0.88,
+        "tool_use": 0.82,
+        "summarization": 0.82,
+        "conversation": 0.82
+      },
+      "features": {
+        "tool_calling": true,
+        "json_mode": true,
+        "streaming": true,
+        "image_input": false,
+        "function_calling": true
+      },
+      "benchmark_sources": ["lmsys_arena", "mmlu_pro", "humaneval"],
+      "pricing_last_updated": "2025-01-01T00:00:00Z",
+      "live_patch": {}
+    },
+    "mistral-small-latest": {
+      "provider": "mistral",
+      "model_family": "mistral-small",
+      "context_window": 32000,
+      "max_output_tokens": 8192,
+      "input_cost_per_1k": 0.0001,
+      "output_cost_per_1k": 0.0003,
+      "avg_latency_ms": 700,
+      "tokens_per_second": 140,
+      "capabilities": {
+        "reasoning": 0.68,
+        "coding": 0.68,
+        "math": 0.62,
+        "creativity": 0.66,
+        "factuality": 0.65,
+        "instruction_following": 0.72,
+        "long_context": 0.60,
+        "multilingual": 0.80,
+        "tool_use": 0.68,
+        "summarization": 0.70,
+        "conversation": 0.72
+      },
+      "features": {
+        "tool_calling": true,
+        "json_mode": true,
+        "streaming": true,
+        "image_input": false,
+        "function_calling": true
+      },
+      "benchmark_sources": ["lmsys_arena"],
+      "pricing_last_updated": "2025-01-01T00:00:00Z",
+      "live_patch": {}
+    },
+    "deepseek-chat": {
+      "provider": "deepseek",
+      "model_family": "deepseek-v3",
+      "context_window": 64000,
+      "max_output_tokens": 8192,
+      "input_cost_per_1k": 0.00014,
+      "output_cost_per_1k": 0.00028,
+      "avg_latency_ms": 1200,
+      "tokens_per_second": 95,
+      "capabilities": {
+        "reasoning": 0.87,
+        "coding": 0.90,
+        "math": 0.91,
+        "creativity": 0.78,
+        "factuality": 0.82,
+        "instruction_following": 0.85,
+        "long_context": 0.72,
+        "multilingual": 0.75,
+        "tool_use": 0.82,
+        "summarization": 0.82,
+        "conversation": 0.82
+      },
+      "features": {
+        "tool_calling": true,
+        "json_mode": true,
+        "streaming": true,
+        "image_input": false,
+        "function_calling": true
+      },
+      "benchmark_sources": ["lmsys_arena", "humaneval", "math_benchmark", "mmlu_pro"],
+      "pricing_last_updated": "2025-01-01T00:00:00Z",
+      "live_patch": {}
+    },
+    "deepseek-reasoner": {
+      "provider": "deepseek",
+      "model_family": "deepseek-r1",
+      "context_window": 64000,
+      "max_output_tokens": 8192,
+      "input_cost_per_1k": 0.00055,
+      "output_cost_per_1k": 0.00219,
+      "avg_latency_ms": 3500,
+      "tokens_per_second": 40,
+      "capabilities": {
+        "reasoning": 0.95,
+        "coding": 0.91,
+        "math": 0.96,
+        "creativity": 0.72,
+        "factuality": 0.88,
+        "instruction_following": 0.83,
+        "long_context": 0.70,
+        "multilingual": 0.72,
+        "tool_use": 0.75,
+        "summarization": 0.78,
+        "conversation": 0.72
+      },
+      "features": {
+        "tool_calling": false,
+        "json_mode": true,
+        "streaming": true,
+        "image_input": false,
+        "function_calling": false
+      },
+      "benchmark_sources": ["aime", "math_benchmark", "humaneval", "mmlu_pro"],
+      "pricing_last_updated": "2025-01-01T00:00:00Z",
+      "live_patch": {}
+    },
+    "llama3.3-70b": {
+      "provider": "ollama",
+      "model_family": "llama3",
+      "context_window": 128000,
+      "max_output_tokens": 8192,
+      "input_cost_per_1k": 0.0,
+      "output_cost_per_1k": 0.0,
+      "avg_latency_ms": 2000,
+      "tokens_per_second": 50,
+      "capabilities": {
+        "reasoning": 0.80,
+        "coding": 0.79,
+        "math": 0.74,
+        "creativity": 0.78,
+        "factuality": 0.76,
+        "instruction_following": 0.82,
+        "long_context": 0.76,
+        "multilingual": 0.72,
+        "tool_use": 0.76,
+        "summarization": 0.80,
+        "conversation": 0.82
+      },
+      "features": {
+        "tool_calling": true,
+        "json_mode": true,
+        "streaming": true,
+        "image_input": false,
+        "function_calling": true
+      },
+      "benchmark_sources": ["lmsys_arena", "mmlu_pro", "humaneval"],
+      "pricing_last_updated": "2025-01-01T00:00:00Z",
+      "live_patch": {}
+    },
+    "llama3.2-vision": {
+      "provider": "ollama",
+      "model_family": "llama3",
+      "context_window": 128000,
+      "max_output_tokens": 8192,
+      "input_cost_per_1k": 0.0,
+      "output_cost_per_1k": 0.0,
+      "avg_latency_ms": 2500,
+      "tokens_per_second": 40,
+      "capabilities": {
+        "reasoning": 0.70,
+        "coding": 0.66,
+        "math": 0.62,
+        "creativity": 0.70,
+        "factuality": 0.68,
+        "instruction_following": 0.74,
+        "long_context": 0.70,
+        "multilingual": 0.65,
+        "tool_use": 0.65,
+        "summarization": 0.72,
+        "conversation": 0.75
+      },
+      "features": {
+        "tool_calling": false,
+        "json_mode": true,
+        "streaming": true,
+        "image_input": true,
+        "function_calling": false
+      },
+      "benchmark_sources": ["lmsys_arena"],
+      "pricing_last_updated": "2025-01-01T00:00:00Z",
+      "live_patch": {}
+    }
+  }
+}

docs/design.md ADDED Viewed

	@@ -0,0 +1,361 @@

+# LLMOpt UI Design Specification
+## Project Overview
+**LLMOpt** is an enterprise-grade LLM middleware that intelligently routes queries to the most cost-effective model. The UI must communicate: intelligence, efficiency, cost savings, and observability — all in real time.
+---
+## Aesthetic Direction: "Dark Industrial Dashboard"
+**Concept**: Think Bloomberg Terminal meets cyberpunk command center. Utilitarian precision with electric accents. Every pixel earns its place. Data-dense but crystal clear.
+**Mood**: Authoritative. Efficient. Technical. Like a cockpit for LLM operations.
+**One unforgettable thing**: A real-time animated pipeline that lights up as a query flows through each stage — users *watch* the optimization happen.
+---
+## Color Palette
+```
+--bg-base:        #0A0B0E   /* Near-black base */
+--bg-surface:     #111318   /* Card/panel surface */
+--bg-elevated:    #1A1D26   /* Elevated panels */
+--bg-border:      #252A38   /* Borders */
+--accent-cyan:    #00E5FF   /* Primary accent — pipeline glow */
+--accent-green:   #00FF94   /* Success, savings, cache hits */
+--accent-amber:   #FFB300   /* Warnings, "balanced" tier */
+--accent-red:     #FF3D57   /* Errors, expensive routes */
+--accent-purple:  #7C4DFF   /* ML / AI stage indicators */
+--text-primary:   #E8ECF4   /* Main text */
+--text-secondary: #7A8299   /* Labels, metadata */
+--text-muted:     #3D4357   /* Disabled / placeholder */
+--gradient-glow:  linear-gradient(135deg, #00E5FF22, #7C4DFF11)
+```
+---
+## Typography
+```
+Display / Headers : "JetBrains Mono" (monospace — fits the terminal DNA)
+Body / UI Labels  : "DM Sans" (clean, readable, modern)
+Data / Numbers    : "JetBrains Mono" (monospace alignment for metrics)
+Code Blocks       : "Fira Code" with ligatures
+Sizes:
+  --text-xs:   11px
+  --text-sm:   13px
+  --text-base: 15px
+  --text-lg:   18px
+  --text-xl:   24px
+  --text-2xl:  32px
+  --text-3xl:  48px
+```
+---
+## Layout Structure
+```
+┌─────────────────────────────────────────────────────┐
+│  TOPBAR: Logo | Nav Tabs | Status Indicators         │
+├──────────────┬──────────────────────────────────────┤
+│              │                                       │
+│  LEFT PANEL  │         MAIN CONTENT AREA             │
+│  (280px)     │                                       │
+│              │  [Query Input + Pipeline Visualizer]  │
+│  • Config    │  [Response Output]                    │
+│  • Budget    │  [Explainability Card]                │
+│  • Providers │                                       │
+│  • History   │                                       │
+│              ├──────────────────────────────────────┤
+│              │         METRICS STRIP (bottom)        │
+│              │  Cost | Tokens | Latency | Savings    │
+└──────────────┴──────────────────────────────────────┘
+```
+---
+## Page / View Breakdown
+### 1. `/` — Playground (Main View)
+The core query interface. This is what users interact with daily.
+**Components:**
+#### Query Input Box
+- Large dark textarea with subtle cyan border-glow on focus
+- Font: JetBrains Mono
+- Placeholder: `// Enter your query...`
+- Right side: Budget Mode selector (3 pills: `CHEAP` / `BALANCED` / `QUALITY`)
+- Bottom bar inside textarea: token count estimate, `[RUN]` button (cyan, full-right)
+#### Pipeline Visualizer (HERO COMPONENT)
+A horizontal animated flow diagram that activates on query submission:
+```
+[CACHE] ──► [NLI ANALYZE] ──► [GBR ESTIMATE] ──► [BAYESIAN OPT] ──► [COMPRESS] ──► [ROUTE] ──► [LLM]
+```
+- Each stage is a pill/node with icon + label
+- Inactive: `--bg-elevated` fill, `--text-muted` text
+- Active (processing): Cyan pulsing border + glow, animated spinner inside
+- Complete: Green fill, checkmark icon, latency badge underneath (e.g., `12ms`)
+- Skipped (cache hit): Amber fill with "CACHED" label — flow skips to end
+- Connecting lines animate left-to-right as each stage completes
+**Stage Icons:**
+| Stage | Icon |
+|-------|------|
+| Cache | ⚡ (lightning) |
+| NLI Analyze | 🔍 |
+| GBR Estimate | 📊 |
+| Bayesian Opt | ⚙️ |
+| Compress | 🗜️ |
+| Route | 🔀 |
+| LLM | 🤖 |
+#### Response Panel
+- Appears below pipeline after completion
+- Markdown rendering with syntax highlighting (dark theme)
+- Header strip: `Model: claude-3-5-haiku` | `Provider: Anthropic` | copy button
+- Subtle fade-in animation on arrival
+#### Explainability Card (collapsible)
+- Monospace font block styled like a terminal output
+- Cyan `>` prefix on each line
+- Shows: complexity score, domain, selected model, scoring rationale, cost saved
+- Toggle with `[EXPLAIN]` button next to Run
+---
+### 2. `/analytics` — Observability Dashboard
+**Components:**
+#### KPI Row (top 4 cards)
+```
+┌──────────────┐ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐
+│ Total Saved  │ │ Avg Latency  │ │ Cache Hit %  │ │ Total Queries│
+│  $12.48      │ │   840ms      │ │   34%        │ │   1,204      │
+│ ↑ 18% today  │ │ ↓ 12%        │ │ ↑ 5%         │ │              │
+└──────────────┘ └──────────────┘ └──────────────┘ └──────────────┘
+```
+- Micro sparklines inside each card (7-day trend)
+- Green arrows = good, Red = bad
+#### Model Usage Breakdown
+- Horizontal stacked bar chart
+- Each provider has a distinct color segment
+- Hover shows: model name, % of queries, avg cost
+#### Cost Over Time
+- Area chart, cyan fill with glow
+- X-axis: time (last 7 days / 30 days toggle)
+- Y-axis: USD
+- Dotted line showing "cost if all GPT-4o" — dramatic visual of savings
+#### Query Log Table
+```
+Timestamp | Query Preview | Complexity | Model Used | Cost | Latency | Score
+```
+- Zebra striping with `--bg-surface` / `--bg-elevated`
+- Complexity shown as colored bar (green → amber → red)
+- Clickable rows expand to show full explainability output
+---
+### 3. `/models` — Model Registry
+**Components:**
+#### Model Cards Grid (2-col)
+Each card:
+- Model name (large, monospace)
+- Provider badge (colored pill)
+- Capability score as radial gauge (0–1)
+- Pricing: Input / Output per 1k tokens
+- "Best For" tag
+- Toggle: Enable / Disable this model
+#### Comparison Table
+- Sortable columns: Capability, Input Cost, Output Cost, Best For
+- Highlight the "Best Value" row with cyan left border
+---
+### 4. `/settings` — Configuration
+**Components:**
+- API Key inputs per provider (masked, with test button)
+- Redis URL config
+- Budget weight sliders (α Cost / β Tokens / γ Quality) with live formula display
+- Compression toggle + threshold slider
+- Evaluation (LLM-as-Judge) toggle
+---
+## Component Design Details
+### Sidebar Navigation
+```
+┌─────────────────┐
+│  ⚡ LLMOpt      │  ← Logo: monospace, cyan accent
+├─────────────────┤
+│  ▸ Playground   │  ← Active: cyan left border + bg highlight
+│  ▸ Analytics    │
+│  ▸ Models       │
+│  ▸ Settings     │
+├─────────────────┤
+│  SYSTEM STATUS  │
+│  ● Redis    OK  │  ← Green dot
+│  ● ML Deps  OK  │
+│  ● Cache   34%  │
+└─────────────────┘
+```
+### Budget Mode Pills
+```
+[ CHEAP ]  [ BALANCED ]  [ QUALITY ]
+```
+- Inactive: `--bg-elevated` + `--text-secondary`
+- Active CHEAP: Green fill
+- Active BALANCED: Amber fill
+- Active QUALITY: Cyan fill
+### Metric Cards
+```css
+.metric-card {
+  background: var(--bg-surface);
+  border: 1px solid var(--bg-border);
+  border-radius: 8px;
+  padding: 20px 24px;
+  position: relative;
+  overflow: hidden;
+}
+.metric-card::before {
+  content: '';
+  position: absolute;
+  top: 0; left: 0; right: 0;
+  height: 2px;
+  background: var(--accent-cyan);  /* or green/amber/purple per card */
+}
+```
+### Status Dots
+```css
+.dot-live {
+  width: 8px; height: 8px;
+  border-radius: 50%;
+  background: var(--accent-green);
+  box-shadow: 0 0 8px var(--accent-green);
+  animation: pulse 2s infinite;
+}
+```
+---
+## Animation Spec
+### Pipeline Stage Activation
+```
+Trigger: query submitted
+Sequence:
+  t=0ms    → CACHE node: border glows cyan, spinner starts
+  t=~200ms → CACHE completes (hit/miss), NLI node activates
+  t=~400ms → NLI completes, GBR node activates
+  ...and so on until ROUTE
+  Final    → Response panel fades in (opacity 0→1, translateY 8px→0, 300ms ease)
+```
+### Page Load
+- Sidebar slides in from left (translateX -100% → 0, 400ms ease-out)
+- KPI cards stagger in with 80ms delay each (opacity 0→1, translateY 16px→0)
+- Chart areas draw from left (width 0→100%, 600ms ease-in-out)
+### Hover States
+- Cards: `border-color` transitions to `--accent-cyan` at 30% opacity
+- Buttons: subtle scale(1.02) + glow intensification
+- Table rows: `--bg-elevated` background fill
+---
+## Responsive Breakpoints
+```
+Desktop (≥1280px) : Full 2-panel layout as described
+Tablet  (≥768px)  : Sidebar collapses to icon rail (48px)
+Mobile  (<768px)  : Full-screen single column, bottom tab nav
+```
+---
+## Tech Stack Recommendation
+```
+Framework    : React 18 + TypeScript
+Styling      : Tailwind CSS + CSS custom properties for theming
+Charts       : Recharts (area, bar, sparklines)
+Animation    : Framer Motion (pipeline, page transitions)
+Markdown     : react-markdown + react-syntax-highlighter
+Icons        : Lucide React
+API Client   : axios / fetch with React Query for caching
+State        : Zustand (lightweight global state)
+```
+---
+## Key UX Principles
+1. **Show, don't tell** — the pipeline animation IS the explainability
+2. **Every number has context** — cost shown alongside "vs GPT-4o baseline"
+3. **Progressive disclosure** — simple by default, deep data on demand
+4. **Zero loading skeletons** — use optimistic UI and instant local feedback
+5. **Error states are designed** — not afterthoughts. Red glow on failed stages, clear recovery path.
+---
+## Sample Data / Placeholders
+Use these for mockups:
+```json
+{
+  "query": "Write a recursive Fibonacci function in Rust",
+  "model_used": "claude-3-5-haiku-20241022",
+  "provider": "anthropic",
+  "complexity_score": 0.62,
+  "complexity_tier": "hard",
+  "estimated_cost": 0.001452,
+  "tokens_saved": 28,
+  "compression_ratio": 0.21,
+  "latency_ms": 1140,
+  "evaluation": {
+    "overall": 9.5,
+    "accuracy": 10.0,
+    "feedback": "The code is idiomatic and correctly implements recursion."
+  }
+}
+```
+---
+## Deliverables Checklist for Agent
+- [ ] `App.tsx` — root layout with sidebar + router
+- [ ] `Playground.tsx` — main query interface
+- [ ] `PipelineVisualizer.tsx` — animated stage flow
+- [ ] `ResponsePanel.tsx` — markdown response display
+- [ ] `ExplainCard.tsx` — monospace terminal-style explanation
+- [ ] `Analytics.tsx` — dashboard with charts
+- [ ] `ModelRegistry.tsx` — model cards + table
+- [ ] `Settings.tsx` — config form
+- [ ] `theme.css` — all CSS variables
+- [ ] `components/MetricCard.tsx`
+- [ ] `components/BudgetPills.tsx`
+- [ ] `components/StatusDot.tsx`

frontend/.gitignore ADDED Viewed

	@@ -0,0 +1,24 @@

+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+node_modules
+dist
+dist-ssr
+*.local
+# Editor directories and files
+.vscode/*
+!.vscode/extensions.json
+.idea
+.DS_Store
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?

frontend/README.md ADDED Viewed

	@@ -0,0 +1,50 @@

+# React + TypeScript + Vite
+This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
+Currently, two official plugins are available:
+- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react/README.md) uses [Babel](https://babeljs.io/) for Fast Refresh
+- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh
+## Expanding the ESLint configuration
+If you are developing a production application, we recommend updating the configuration to enable type aware lint rules:
+- Configure the top-level `parserOptions` property like this:
+```js
+export default tseslint.config({
+  languageOptions: {
+    // other options...
+    parserOptions: {
+      project: ['./tsconfig.node.json', './tsconfig.app.json'],
+      tsconfigRootDir: import.meta.dirname,
+    },
+  },
+})
+```
+- Replace `tseslint.configs.recommended` to `tseslint.configs.recommendedTypeChecked` or `tseslint.configs.strictTypeChecked`
+- Optionally add `...tseslint.configs.stylisticTypeChecked`
+- Install [eslint-plugin-react](https://github.com/jsx-eslint/eslint-plugin-react) and update the config:
+```js
+// eslint.config.js
+import react from 'eslint-plugin-react'
+export default tseslint.config({
+  // Set the react version
+  settings: { react: { version: '18.3' } },
+  plugins: {
+    // Add the react plugin
+    react,
+  },
+  rules: {
+    // other rules...
+    // Enable its recommended rules
+    ...react.configs.recommended.rules,
+    ...react.configs['jsx-runtime'].rules,
+  },
+})
+```

frontend/eslint.config.js ADDED Viewed

	@@ -0,0 +1,28 @@

+import js from '@eslint/js'
+import globals from 'globals'
+import reactHooks from 'eslint-plugin-react-hooks'
+import reactRefresh from 'eslint-plugin-react-refresh'
+import tseslint from 'typescript-eslint'
+export default tseslint.config(
+  { ignores: ['dist'] },
+  {
+    extends: [js.configs.recommended, ...tseslint.configs.recommended],
+    files: ['**/*.{ts,tsx}'],
+    languageOptions: {
+      ecmaVersion: 2020,
+      globals: globals.browser,
+    },
+    plugins: {
+      'react-hooks': reactHooks,
+      'react-refresh': reactRefresh,
+    },
+    rules: {
+      ...reactHooks.configs.recommended.rules,
+      'react-refresh/only-export-components': [
+        'warn',
+        { allowConstantExport: true },
+      ],
+    },
+  },
+)

frontend/index.html ADDED Viewed

	@@ -0,0 +1,16 @@

+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>LLMOpt — Adaptive LLM Inference Optimization</title>
+    <meta name="description" content="LLMOpt is an enterprise-grade LLM gateway that intelligently routes queries to the most cost-effective model, saving costs while maintaining quality." />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;500;600;700&family=DM+Sans:wght@300;400;500;600;700&family=Fira+Code:wght@400;500&display=swap" rel="stylesheet" />
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="/src/main.tsx"></script>
+  </body>
+</html>

frontend/package-lock.json ADDED Viewed

The diff for this file is too large to render. See raw diff

frontend/package.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "name": "frontend",
+  "private": true,
+  "version": "0.0.0",
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "tsc -b && vite build",
+    "lint": "eslint .",
+    "preview": "vite preview"
+  },
+  "dependencies": {
+    "@types/react-syntax-highlighter": "^15.5.13",
+    "framer-motion": "^12.40.0",
+    "lucide-react": "^1.16.0",
+    "react": "^18.3.1",
+    "react-dom": "^18.3.1",
+    "react-markdown": "^10.1.0",
+    "react-router-dom": "^7.15.1",
+    "react-syntax-highlighter": "^16.1.1",
+    "recharts": "^3.8.1",
+    "zustand": "^5.0.13"
+  },
+  "devDependencies": {
+    "@eslint/js": "^9.13.0",
+    "@types/node": "^25.9.1",
+    "@types/react": "^18.3.12",
+    "@types/react-dom": "^18.3.1",
+    "@vitejs/plugin-react": "^4.3.3",
+    "eslint": "^9.13.0",
+    "eslint-plugin-react-hooks": "^5.0.0",
+    "eslint-plugin-react-refresh": "^0.4.14",
+    "globals": "^15.11.0",
+    "typescript": "~5.6.2",
+    "typescript-eslint": "^8.11.0",
+    "vite": "^5.4.10"
+  }
+}

frontend/public/vite.svg ADDED Viewed

frontend/src/App.css ADDED Viewed

	@@ -0,0 +1,42 @@

+#root {
+  max-width: 1280px;
+  margin: 0 auto;
+  padding: 2rem;
+  text-align: center;
+}
+.logo {
+  height: 6em;
+  padding: 1.5em;
+  will-change: filter;
+  transition: filter 300ms;
+}
+.logo:hover {
+  filter: drop-shadow(0 0 2em #646cffaa);
+}
+.logo.react:hover {
+  filter: drop-shadow(0 0 2em #61dafbaa);
+}
+@keyframes logo-spin {
+  from {
+    transform: rotate(0deg);
+  }
+  to {
+    transform: rotate(360deg);
+  }
+}
+@media (prefers-reduced-motion: no-preference) {
+  a:nth-of-type(2) .logo {
+    animation: logo-spin infinite 20s linear;
+  }
+}
+.card {
+  padding: 2em;
+}
+.read-the-docs {
+  color: #888;
+}

frontend/src/App.tsx ADDED Viewed

	@@ -0,0 +1,217 @@

+import { BrowserRouter, Routes, Route, NavLink, useLocation } from 'react-router-dom';
+import { motion, AnimatePresence } from 'framer-motion';
+import { useEffect } from 'react';
+import { Zap, LayoutDashboard, BarChart3, Database, Settings, LogOut, ChevronLeft, ChevronRight } from 'lucide-react';
+import { useAppStore } from './store';
+import { api, getStoredSession, setStoredSession } from './api';
+import Playground from './pages/Playground';
+import Analytics from './pages/Analytics';
+import ModelRegistry from './pages/ModelRegistry';
+import SettingsPage from './pages/Settings';
+import LoginPage from './pages/Login';
+function Sidebar({ collapsed, setCollapsed }: { collapsed: boolean; setCollapsed: (v: boolean) => void }) {
+  const { health, auth, setAuth } = useAppStore();
+  const location = useLocation();
+  const navItems = [
+    { to: '/', icon: <LayoutDashboard size={18} />, label: 'Playground', exact: true },
+    { to: '/analytics', icon: <BarChart3 size={18} />, label: 'Analytics' },
+    { to: '/models', icon: <Database size={18} />, label: 'Models' },
+    { to: '/settings', icon: <Settings size={18} />, label: 'Settings' },
+  ];
+  const handleLogout = async () => {
+    try {
+      await api.logout();
+    } catch (_) { /* ignore */ }
+    setStoredSession(null);
+    setAuth({ isLoggedIn: false, sessionId: null });
+  };
+  const dotClass = (s: string) =>
+    s === 'ok' ? 'dot dot-live' : s === 'error' ? 'dot dot-error' : 'dot dot-muted';
+  return (
+    <motion.aside
+      className={`sidebar${collapsed ? ' collapsed' : ''}`}
+      initial={false}
+      animate={{ width: collapsed ? 56 : 280 }}
+      transition={{ duration: 0.25, ease: 'easeInOut' }}
+    >
+      {/* Logo */}
+      <div className="sidebar-logo">
+        <div className="sidebar-logo-icon"><Zap size={22} fill="currentColor" /></div>
+        {!collapsed && (
+          <div className="sidebar-logo-text">LLM<span>Opt</span></div>
+        )}
+        <button
+          onClick={() => setCollapsed(!collapsed)}
+          style={{
+            marginLeft: 'auto',
+            background: 'transparent',
+            border: 'none',
+            color: 'var(--text-muted)',
+            cursor: 'pointer',
+            display: 'flex',
+            alignItems: 'center',
+            padding: '4px',
+            borderRadius: '4px',
+            flexShrink: 0,
+          }}
+        >
+          {collapsed ? <ChevronRight size={16} /> : <ChevronLeft size={16} />}
+        </button>
+      </div>
+      {/* Nav */}
+      <nav className="sidebar-nav">
+        {!collapsed && <div className="sidebar-section-label">Navigation</div>}
+        {navItems.map((item) => {
+          const isActive = item.exact
+            ? location.pathname === item.to
+            : location.pathname.startsWith(item.to) && item.to !== '/';
+          return (
+            <NavLink
+              key={item.to}
+              to={item.to}
+              className={`sidebar-nav-item${isActive ? ' active' : ''}`}
+              data-tooltip={collapsed ? item.label : undefined}
+            >
+              <span className="sidebar-nav-icon">{item.icon}</span>
+              {!collapsed && <span>{item.label}</span>}
+            </NavLink>
+          );
+        })}
+        <div style={{ flex: 1 }} />
+        {auth.isLoggedIn && (
+          <button
+            className="sidebar-nav-item"
+            onClick={handleLogout}
+            data-tooltip={collapsed ? 'Sign Out' : undefined}
+          >
+            <span className="sidebar-nav-icon"><LogOut size={18} /></span>
+            {!collapsed && <span>Sign Out</span>}
+          </button>
+        )}
+      </nav>
+      {/* Status */}
+      {!collapsed && (
+        <div className="sidebar-status">
+          <div className="sidebar-status-title">System Status</div>
+          <div className="sidebar-status-item">
+            <span className={dotClass(health.api)} />
+            <span>API</span>
+            <span style={{ marginLeft: 'auto', color: health.api === 'ok' ? 'var(--accent-green)' : 'var(--accent-red)' }}>
+              {health.api.toUpperCase()}
+            </span>
+          </div>
+          <div className="sidebar-status-item">
+            <span className={dotClass(health.redis)} />
+            <span>Redis</span>
+            <span style={{ marginLeft: 'auto', color: health.redis === 'ok' ? 'var(--accent-green)' : 'var(--text-muted)' }}>
+              {health.redis.toUpperCase()}
+            </span>
+          </div>
+          <div className="sidebar-status-item">
+            <span className={dotClass(health.ml_deps)} />
+            <span>ML Deps</span>
+            <span style={{ marginLeft: 'auto', color: health.ml_deps === 'ok' ? 'var(--accent-green)' : 'var(--text-muted)' }}>
+              {health.ml_deps.toUpperCase()}
+            </span>
+          </div>
+        </div>
+      )}
+    </motion.aside>
+  );
+}
+function AppShell() {
+  const { auth, setAuth, sidebarCollapsed, setSidebarCollapsed, setConnectedProviders } = useAppStore();
+  const location = useLocation();
+  // Restore session from localStorage on startup
+  useEffect(() => {
+    const stored = getStoredSession();
+    if (stored) {
+      setAuth({ isLoggedIn: true, sessionId: stored });
+    }
+  }, []);
+  // Poll health
+  useEffect(() => {
+    const check = async () => {
+      try {
+        await api.health();
+      } catch { /* ignore */ }
+    };
+    check();
+    const t = setInterval(check, 30000);
+    return () => clearInterval(t);
+  }, []);
+  // Poll connected providers if logged in
+  useEffect(() => {
+    if (!auth.isLoggedIn) return;
+    const check = async () => {
+      try {
+        const data = await api.getKeys();
+        setConnectedProviders(data.connected_providers);
+      } catch (_) {}
+    };
+    check();
+  }, [auth.isLoggedIn, setConnectedProviders]);
+  if (!auth.isLoggedIn) {
+    return (
+      <AnimatePresence mode="wait">
+        <motion.div
+          key="login"
+          initial={{ opacity: 0 }}
+          animate={{ opacity: 1 }}
+          exit={{ opacity: 0 }}
+          transition={{ duration: 0.3 }}
+        >
+          <LoginPage />
+        </motion.div>
+      </AnimatePresence>
+    );
+  }
+  return (
+    <div className="app-layout">
+      <Sidebar collapsed={sidebarCollapsed} setCollapsed={setSidebarCollapsed} />
+      <main className={`main-content${sidebarCollapsed ? ' sidebar-collapsed' : ''}`}>
+        <AnimatePresence mode="wait">
+          <motion.div
+            key={location.pathname}
+            initial={{ opacity: 0, y: 8 }}
+            animate={{ opacity: 1, y: 0 }}
+            exit={{ opacity: 0, y: -8 }}
+            transition={{ duration: 0.2 }}
+            style={{ flex: 1, overflow: 'hidden', display: 'flex', flexDirection: 'column', minHeight: 0 }}
+          >
+            <Routes>
+              <Route path="/" element={<Playground />} />
+              <Route path="/analytics" element={<Analytics />} />
+              <Route path="/models" element={<ModelRegistry />} />
+              <Route path="/settings" element={<SettingsPage />} />
+            </Routes>
+          </motion.div>
+        </AnimatePresence>
+      </main>
+    </div>
+  );
+}
+export default function App() {
+  return (
+    <BrowserRouter basename="/ui">
+      <AppShell />
+    </BrowserRouter>
+  );
+}

frontend/src/api.ts ADDED Viewed

	@@ -0,0 +1,153 @@

+// API client for LLMOpt backend
+// Session ID is stored in localStorage and sent as Authorization: Bearer <token>
+// This avoids httponly cookie issues on localhost
+import type {
+  GenerateRequest,
+  GenerateResponse,
+  ExplainResponse,
+  HistoryItem,
+  DashboardStats,
+  ModelSpec,
+} from './types';
+const BASE = '';  // same-origin (served by FastAPI or proxied by Vite)
+// Get session token from localStorage
+export function getStoredSession(): string | null {
+  return localStorage.getItem('llmopt_session');
+}
+export function setStoredSession(id: string | null) {
+  if (id) localStorage.setItem('llmopt_session', id);
+  else localStorage.removeItem('llmopt_session');
+}
+async function request<T>(
+  path: string,
+  options: RequestInit = {},
+  requiresAuth = true,
+): Promise<T> {
+  const headers: Record<string, string> = {
+    'Content-Type': 'application/json',
+    ...(options.headers as Record<string, string> || {}),
+  };
+  // Inject session token as Bearer header
+  if (requiresAuth) {
+    const session = getStoredSession();
+    if (session) {
+      headers['Authorization'] = `Bearer ${session}`;
+    }
+  }
+  const res = await fetch(`${BASE}${path}`, {
+    credentials: 'include',
+    ...options,
+    headers,
+  });
+  if (!res.ok) {
+    const body = await res.json().catch(() => ({ detail: res.statusText }));
+    const err = new Error(body.detail || `HTTP ${res.status}`);
+    (err as any).status = res.status;
+    throw err;
+  }
+  return res.json();
+}
+export const api = {
+  health: () => request<{ status: string; version: string }>('/health', {}, false),
+  generate: (req: GenerateRequest) =>
+    request<GenerateResponse>('/generate', {
+      method: 'POST',
+      body: JSON.stringify(req),
+    }),
+  explain: (
+    query: string,
+    budget_mode: string,
+    params?: {
+      alpha?: number;
+      beta?: number;
+      gamma?: number;
+      compression_enabled?: boolean;
+      exclude_providers?: string[];
+      only_providers?: string[];
+    }
+  ) =>
+    request<ExplainResponse>('/explain', {
+      method: 'POST',
+      body: JSON.stringify({ query, budget_mode, ...params }),
+    }),  // explain requires auth to access session keys
+  models: () =>
+    request<{ models: ModelSpec[] }>('/models', {}, false),
+  // Auth
+  register: (email: string, password: string) =>
+    request<{ message: string }>('/auth/register', {
+      method: 'POST',
+      body: JSON.stringify({ email, password }),
+    }, false),
+  login: (email: string, password: string) =>
+    request<{ message: string; session_id: string }>('/auth/login', {
+      method: 'POST',
+      body: JSON.stringify({ email, password }),
+    }, false),
+  logout: () =>
+    request<{ message: string }>('/auth/logout', { method: 'POST' }),
+  getKeys: () =>
+    request<{ connected_providers: string[] }>('/auth/keys'),
+  updateKeys: (api_keys: Record<string, string>) =>
+    request<{ message: string }>('/auth/keys', {
+      method: 'POST',
+      body: JSON.stringify({ api_keys }),
+    }),
+  deleteKey: (provider: string) =>
+    request<{ message: string }>(`/auth/keys/${provider}`, {
+      method: 'DELETE',
+    }),
+  getDashboardStats: () =>
+    request<DashboardStats>('/auth/dashboard-stats'),
+  getHistory: () =>
+    request<HistoryItem[]>('/auth/history'),
+  // Streaming
+  stream: async (req: GenerateRequest, onChunk: (chunk: string) => void) => {
+    const session = getStoredSession();
+    const headers: Record<string, string> = { 'Content-Type': 'application/json' };
+    if (session) headers['Authorization'] = `Bearer ${session}`;
+    const res = await fetch('/stream', {
+      method: 'POST',
+      credentials: 'include',
+      headers,
+      body: JSON.stringify(req),
+    });
+    if (!res.ok) {
+      const body = await res.json().catch(() => ({ detail: res.statusText }));
+      throw new Error(body.detail || `HTTP ${res.status}`);
+    }
+    const reader = res.body?.getReader();
+    const decoder = new TextDecoder();
+    if (!reader) return;
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) break;
+      onChunk(decoder.decode(value, { stream: true }));
+    }
+  },
+};

frontend/src/assets/react.svg ADDED Viewed

frontend/src/index.css ADDED Viewed

	@@ -0,0 +1,68 @@

+:root {
+  font-family: Inter, system-ui, Avenir, Helvetica, Arial, sans-serif;
+  line-height: 1.5;
+  font-weight: 400;
+  color-scheme: light dark;
+  color: rgba(255, 255, 255, 0.87);
+  background-color: #242424;
+  font-synthesis: none;
+  text-rendering: optimizeLegibility;
+  -webkit-font-smoothing: antialiased;
+  -moz-osx-font-smoothing: grayscale;
+}
+a {
+  font-weight: 500;
+  color: #646cff;
+  text-decoration: inherit;
+}
+a:hover {
+  color: #535bf2;
+}
+body {
+  margin: 0;
+  display: flex;
+  place-items: center;
+  min-width: 320px;
+  min-height: 100vh;
+}
+h1 {
+  font-size: 3.2em;
+  line-height: 1.1;
+}
+button {
+  border-radius: 8px;
+  border: 1px solid transparent;
+  padding: 0.6em 1.2em;
+  font-size: 1em;
+  font-weight: 500;
+  font-family: inherit;
+  background-color: #1a1a1a;
+  cursor: pointer;
+  transition: border-color 0.25s;
+}
+button:hover {
+  border-color: #646cff;
+}
+button:focus,
+button:focus-visible {
+  outline: 4px auto -webkit-focus-ring-color;
+}
+@media (prefers-color-scheme: light) {
+  :root {
+    color: #213547;
+    background-color: #ffffff;
+  }
+  a:hover {
+    color: #747bff;
+  }
+  button {
+    background-color: #f9f9f9;
+  }
+}

frontend/src/main.tsx ADDED Viewed

	@@ -0,0 +1,10 @@

+import { StrictMode } from 'react'
+import { createRoot } from 'react-dom/client'
+import './theme.css'
+import App from './App.tsx'
+createRoot(document.getElementById('root')!).render(
+  <StrictMode>
+    <App />
+  </StrictMode>,
+)

frontend/src/pages/Analytics.tsx ADDED Viewed

	@@ -0,0 +1,385 @@

+import { useEffect, useState } from 'react';
+import { motion } from 'framer-motion';
+import { BarChart3, TrendingUp, TrendingDown } from 'lucide-react';
+import {
+  AreaChart,
+  Area,
+  BarChart,
+  Bar,
+  XAxis,
+  YAxis,
+  CartesianGrid,
+  Tooltip,
+  ResponsiveContainer,
+  Cell,
+  Legend,
+} from 'recharts';
+import { api } from '../api';
+import type { DashboardStats, HistoryItem } from '../types';
+// ─── Mock time-series data (since backend doesn't expose it yet) ──────────────
+function generateDailyData(days: number, base: number, variance: number) {
+  const now = new Date();
+  return Array.from({ length: days }, (_, i) => {
+    const d = new Date(now);
+    d.setDate(d.getDate() - (days - 1 - i));
+    return {
+      date: d.toLocaleDateString('en-US', { month: 'short', day: 'numeric' }),
+      actual: Math.max(0, base + (Math.random() - 0.5) * variance),
+      baseline: base * 8,
+    };
+  });
+}
+const PROVIDER_COLORS: Record<string, string> = {
+  openai:    '#00E5FF',
+  anthropic: '#7C4DFF',
+  google:    '#00FF94',
+  ollama:    '#FFB300',
+  cohere:    '#FF3D57',
+  other:     '#7A8299',
+};
+// ─── Custom Tooltip ───────────────────────────────────────────────────────────
+const CustomTooltip = ({ active, payload, label }: any) => {
+  if (!active || !payload?.length) return null;
+  return (
+    <div style={{
+      background: 'var(--bg-elevated)',
+      border: '1px solid var(--bg-border)',
+      borderRadius: '8px',
+      padding: '12px 16px',
+      fontSize: 'var(--text-xs)',
+      fontFamily: 'JetBrains Mono, monospace',
+    }}>
+      <div style={{ color: 'var(--text-secondary)', marginBottom: 8 }}>{label}</div>
+      {payload.map((p: any) => (
+        <div key={p.name} style={{ color: p.color, marginBottom: 4 }}>
+          {p.name}: ${typeof p.value === 'number' ? p.value.toFixed(4) : p.value}
+        </div>
+      ))}
+    </div>
+  );
+};
+// ─── KPI Card ────────────────────────────────────────────────────────────────
+function KPICard({
+  label,
+  value,
+  delta,
+  deltaPositive,
+  color,
+  delay,
+}: {
+  label: string;
+  value: string;
+  delta?: string;
+  deltaPositive?: boolean;
+  color: string;
+  delay: number;
+}) {
+  return (
+    <motion.div
+      className={`metric-card ${color}`}
+      initial={{ opacity: 0, y: 16 }}
+      animate={{ opacity: 1, y: 0 }}
+      transition={{ delay, duration: 0.4 }}
+    >
+      <div className="metric-card-label">{label}</div>
+      <div className="metric-card-value">{value}</div>
+      {delta && (
+        <div className={`metric-card-delta ${deltaPositive ? 'delta-up' : 'delta-down'}`}>
+          {deltaPositive ? <TrendingUp size={12} /> : <TrendingDown size={12} />}
+          {delta}
+        </div>
+      )}
+    </motion.div>
+  );
+}
+// ─── Query Log Table ──────────────────────────────────────────────────────────
+function ComplexityBar({ score }: { score: number }) {
+  const color =
+    score < 0.4 ? 'var(--accent-green)' :
+    score < 0.7 ? 'var(--accent-amber)' :
+    'var(--accent-red)';
+  return (
+    <div className="complexity-bar">
+      <div className="complexity-bar-track">
+        <div
+          className="complexity-bar-fill"
+          style={{ width: `${score * 100}%`, background: color }}
+        />
+      </div>
+      <span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)', color }}>
+        {(score * 100).toFixed(0)}
+      </span>
+    </div>
+  );
+}
+function QueryLogTable({ items }: { items: HistoryItem[] }) {
+  const [expanded, setExpanded] = useState<number | null>(null);
+  if (items.length === 0) {
+    return (
+      <div className="empty-state">
+        <div className="empty-state-icon">📋</div>
+        <div className="empty-state-title">No Query History</div>
+        <div className="empty-state-desc">Run queries in the Playground to see them here.</div>
+      </div>
+    );
+  }
+  return (
+    <div style={{ overflowX: 'auto' }}>
+      <table className="data-table">
+        <thead>
+          <tr>
+            <th>Time</th>
+            <th>Query</th>
+            <th>Complexity</th>
+            <th>Model</th>
+            <th>Cost</th>
+            <th>Latency</th>
+            <th>Tier</th>
+          </tr>
+        </thead>
+        <tbody>
+          {items.map((item) => (
+            <>
+              <tr key={item.id} onClick={() => setExpanded(expanded === item.id ? null : item.id)}>
+                <td>
+                  <span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)', color: 'var(--text-muted)' }}>
+                    {item.time_ago}
+                  </span>
+                </td>
+                <td>
+                  <div className="truncate" style={{ maxWidth: 240, fontSize: 'var(--text-sm)' }}>
+                    {item.query}
+                  </div>
+                </td>
+                <td><ComplexityBar score={item.complexity_score || 0} /></td>
+                <td>
+                  <div style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)', color: 'var(--accent-cyan)' }}>
+                    {item.model_used?.split('-').slice(-2).join('-') || '—'}
+                  </div>
+                </td>
+                <td>
+                  <span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)', color: 'var(--accent-green)' }}>
+                    ${(item.estimated_cost || 0).toFixed(6)}
+                  </span>
+                </td>
+                <td>
+                  <span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)' }}>
+                    {item.latency_ms?.toFixed(0) || '—'}ms
+                  </span>
+                </td>
+                <td>
+                  <span className={`badge badge-${item.complexity_tier === 'easy' ? 'green' : item.complexity_tier === 'hard' ? 'red' : 'amber'}`}>
+                    {item.complexity_tier || 'std'}
+                  </span>
+                </td>
+              </tr>
+              {expanded === item.id && (
+                <tr key={`${item.id}-exp`}>
+                  <td colSpan={7} style={{ padding: 0 }}>
+                    <div style={{
+                      padding: 'var(--sp-4) var(--sp-5)',
+                      background: 'var(--bg-base)',
+                      borderTop: '1px solid var(--bg-border)',
+                      fontFamily: 'JetBrains Mono, monospace',
+                      fontSize: 'var(--text-xs)',
+                      lineHeight: 1.8,
+                    }}>
+                      <div style={{ color: 'var(--accent-cyan)' }}>{'>'} Full query:</div>
+                      <div style={{ color: 'var(--text-secondary)', margin: '4px 0 12px', whiteSpace: 'pre-wrap' }}>{item.query}</div>
+                      <div style={{ color: 'var(--accent-cyan)' }}>{'>'} Response snippet:</div>
+                      <div style={{ color: 'var(--text-secondary)', margin: '4px 0', whiteSpace: 'pre-wrap' }}>
+                        {(item.response || '').slice(0, 400)}{item.response?.length > 400 ? '...' : ''}
+                      </div>
+                      <div style={{ display: 'flex', gap: 'var(--sp-6)', marginTop: 12, color: 'var(--text-muted)' }}>
+                        <span>Tokens in: {item.input_tokens}</span>
+                        <span>Tokens out: {item.output_tokens}</span>
+                        <span>Saved: {item.tokens_saved}</span>
+                        <span>Cost saved: ${(item.cost_saved || 0).toFixed(6)}</span>
+                      </div>
+                    </div>
+                  </td>
+                </tr>
+              )}
+            </>
+          ))}
+        </tbody>
+      </table>
+    </div>
+  );
+}
+// ─── Analytics Page ───────────────────────────────────────────────────────────
+export default function Analytics() {
+  const [stats, setStats] = useState<DashboardStats | null>(null);
+  const [history, setHistory] = useState<HistoryItem[]>([]);
+  const [loading, setLoading] = useState(true);
+  const [error, setError] = useState('');
+  const [chartRange, setChartRange] = useState<7 | 30>(7);
+  const costData = generateDailyData(chartRange, 0.05, 0.08);
+  const modelData = stats
+    ? Object.entries(stats.distribution).map(([name, pct]) => ({
+        name: name.charAt(0).toUpperCase() + name.slice(1),
+        value: pct,
+        fill: PROVIDER_COLORS[name] || PROVIDER_COLORS.other,
+      }))
+    : [];
+  useEffect(() => {
+    const load = async () => {
+      try {
+        const [s, h] = await Promise.all([
+          api.getDashboardStats(),
+          api.getHistory(),
+        ]);
+        setStats(s);
+        setHistory(h);
+      } catch (e: any) {
+        setError(e.message || 'Failed to load analytics');
+      } finally {
+        setLoading(false);
+      }
+    };
+    load();
+  }, []);
+  if (loading) {
+    return (
+      <div style={{ display: 'flex', alignItems: 'center', justifyContent: 'center', height: '100%' }}>
+        <span className="spinner" style={{ width: 32, height: 32, borderWidth: 3 }} />
+      </div>
+    );
+  }
+  return (
+    <div style={{ display: 'flex', flexDirection: 'column', flex: 1, minHeight: 0, overflow: 'hidden' }}>
+      <div className="topbar">
+        <div className="topbar-breadcrumb">
+          <BarChart3 size={14} style={{ color: 'var(--accent-cyan)' }} />
+          <strong>Analytics</strong>
+          <span style={{ color: 'var(--text-muted)' }}>/ Observability Dashboard</span>
+        </div>
+      </div>
+      <div className="page-content" style={{ display: 'flex', flexDirection: 'column', gap: 'var(--sp-5)', flex: 1, overflowY: 'auto' }}>
+        {error && <div className="auth-error">⚠ {error}</div>}
+        {/* KPI Row */}
+        <div className="grid-4">
+          <KPICard label="Total Saved"    value={stats?.routing_savings || '$0.00'} delta="from routing"     deltaPositive color="green"  delay={0} />
+          <KPICard label="Queries Run"    value={String(stats?.prompts_improved || 0)} color="cyan"   delay={0.08} />
+          <KPICard label="Avg Quality Boost" value={stats?.avg_boost || '0%'}      delta="complexity-adjusted" deltaPositive color="purple" delay={0.16} />
+          <KPICard label="Tokens Saved"   value={stats?.tokens_saved || '0'}       delta="vs uncompressed"  deltaPositive color="amber"  delay={0.24} />
+        </div>
+        {/* Cost Over Time + Model Distribution */}
+        <div className="grid-2">
+          <div className="card">
+            <div className="card-header">
+              <div>
+                <div className="card-title">Cost Over Time</div>
+                <div className="card-subtitle">Actual vs GPT-4o baseline (USD)</div>
+              </div>
+              <div style={{ display: 'flex', gap: 'var(--sp-2)' }}>
+                {([7, 30] as const).map((d) => (
+                  <button
+                    key={d}
+                    className={`btn btn-ghost btn-sm${chartRange === d ? ' active' : ''}`}
+                    style={chartRange === d ? { borderColor: 'var(--accent-cyan)', color: 'var(--accent-cyan)' } : {}}
+                    onClick={() => setChartRange(d)}
+                  >
+                    {d}d
+                  </button>
+                ))}
+              </div>
+            </div>
+            <div className="card-body">
+              <ResponsiveContainer width="100%" height={200}>
+                <AreaChart data={costData} margin={{ top: 5, right: 5, bottom: 5, left: 5 }}>
+                  <defs>
+                    <linearGradient id="costGrad" x1="0" y1="0" x2="0" y2="1">
+                      <stop offset="5%" stopColor="#00E5FF" stopOpacity={0.3} />
+                      <stop offset="95%" stopColor="#00E5FF" stopOpacity={0} />
+                    </linearGradient>
+                    <linearGradient id="baseGrad" x1="0" y1="0" x2="0" y2="1">
+                      <stop offset="5%" stopColor="#FF3D57" stopOpacity={0.1} />
+                      <stop offset="95%" stopColor="#FF3D57" stopOpacity={0} />
+                    </linearGradient>
+                  </defs>
+                  <CartesianGrid strokeDasharray="3 3" stroke="var(--bg-border)" />
+                  <XAxis dataKey="date" tick={{ fill: 'var(--text-muted)', fontSize: 11 }} />
+                  <YAxis tick={{ fill: 'var(--text-muted)', fontSize: 11 }} tickFormatter={(v) => `$${v.toFixed(2)}`} />
+                  <Tooltip content={<CustomTooltip />} />
+                  <Legend wrapperStyle={{ fontSize: 12, color: 'var(--text-secondary)' }} />
+                  <Area type="monotone" dataKey="baseline" name="GPT-4o Baseline" stroke="#FF3D57" strokeDasharray="5 5" fill="url(#baseGrad)" strokeWidth={2} />
+                  <Area type="monotone" dataKey="actual"   name="LLMOpt Actual"  stroke="#00E5FF" fill="url(#costGrad)" strokeWidth={2} dot={{ fill: '#00E5FF', r: 3 }} />
+                </AreaChart>
+              </ResponsiveContainer>
+            </div>
+          </div>
+          <div className="card">
+            <div className="card-header">
+              <div>
+                <div className="card-title">Model Usage</div>
+                <div className="card-subtitle">Distribution by provider (%)</div>
+              </div>
+            </div>
+            <div className="card-body">
+              {modelData.length > 0 ? (
+                <ResponsiveContainer width="100%" height={200}>
+                  <BarChart data={modelData} layout="vertical" margin={{ top: 5, right: 20, bottom: 5, left: 60 }}>
+                    <CartesianGrid strokeDasharray="3 3" stroke="var(--bg-border)" horizontal={false} />
+                    <XAxis type="number" tick={{ fill: 'var(--text-muted)', fontSize: 11 }} tickFormatter={(v) => `${v}%`} />
+                    <YAxis type="category" dataKey="name" tick={{ fill: 'var(--text-secondary)', fontSize: 12, fontFamily: 'JetBrains Mono' }} />
+                    <Tooltip
+                      formatter={(v: any) => [`${v}%`, 'Share']}
+                      contentStyle={{ background: 'var(--bg-elevated)', border: '1px solid var(--bg-border)', borderRadius: 8, fontSize: 12 }}
+                    />
+                    <Bar dataKey="value" radius={[0, 4, 4, 0]}>
+                      {modelData.map((entry, i) => (
+                        <Cell key={i} fill={entry.fill} />
+                      ))}
+                    </Bar>
+                  </BarChart>
+                </ResponsiveContainer>
+              ) : (
+                <div className="empty-state" style={{ padding: 'var(--sp-8)' }}>
+                  <div className="empty-state-title">No data yet</div>
+                  <div className="empty-state-desc">Run queries to see model distribution.</div>
+                </div>
+              )}
+            </div>
+          </div>
+        </div>
+        {/* Query Log */}
+        <div className="card">
+          <div className="card-header">
+            <div>
+              <div className="card-title">Query Log</div>
+              <div className="card-subtitle">Last 20 requests — click to expand</div>
+            </div>
+          </div>
+          <QueryLogTable items={history} />
+        </div>
+      </div>
+    </div>
+  );
+}

frontend/src/pages/Login.tsx ADDED Viewed

	@@ -0,0 +1,209 @@

+import { useState } from 'react';
+import { motion, AnimatePresence } from 'framer-motion';
+import { Zap } from 'lucide-react';
+import { api, setStoredSession } from '../api';
+import { useAppStore } from '../store';
+type Mode = 'login' | 'register';
+export default function LoginPage() {
+  const { setAuth, setConnectedProviders } = useAppStore();
+  const [mode, setMode] = useState<Mode>('login');
+  const [email, setEmail] = useState('');
+  const [password, setPassword] = useState('');
+  const [loading, setLoading] = useState(false);
+  const [error, setError] = useState('');
+  const handleSubmit = async (e: React.FormEvent) => {
+    e.preventDefault();
+    if (!email.trim() || !password.trim()) return;
+    setLoading(true);
+    setError('');
+    try {
+      if (mode === 'register') {
+        await api.register(email, password);
+        // Auto-login after register
+      }
+      const data = await api.login(email, password);
+      setStoredSession(data.session_id);
+      setAuth({ isLoggedIn: true, sessionId: data.session_id });
+      // Fetch connected providers
+      try {
+        const keys = await api.getKeys();
+        setConnectedProviders(keys.connected_providers);
+      } catch (_) {}
+    } catch (e: any) {
+      setError(e.message || 'Authentication failed');
+    } finally {
+      setLoading(false);
+    }
+  };
+  const handleGoogleLogin = () => {
+    window.location.href = '/auth/login/google';
+  };
+  const handleGithubLogin = () => {
+    window.location.href = '/auth/login/github';
+  };
+  return (
+    <div className="auth-page">
+      {/* Background */}
+      <div className="auth-bg-grid" />
+      <div className="auth-bg-glow" />
+      <div className="auth-bg-glow-2" />
+      {/* Floating particles */}
+      {[...Array(6)].map((_, i) => (
+        <motion.div
+          key={i}
+          style={{
+            position: 'absolute',
+            width: `${4 + i * 2}px`,
+            height: `${4 + i * 2}px`,
+            borderRadius: '50%',
+            background: i % 2 === 0 ? 'var(--accent-cyan)' : 'var(--accent-purple)',
+            opacity: 0.3,
+            left: `${15 + i * 14}%`,
+            top: `${20 + (i % 3) * 25}%`,
+          }}
+          animate={{
+            y: [0, -20, 0],
+            opacity: [0.3, 0.6, 0.3],
+          }}
+          transition={{
+            duration: 3 + i * 0.5,
+            repeat: Infinity,
+            ease: 'easeInOut',
+            delay: i * 0.4,
+          }}
+        />
+      ))}
+      <AnimatePresence mode="wait">
+        <motion.div
+          key={mode}
+          className="auth-card"
+          initial={{ opacity: 0, y: 24, scale: 0.97 }}
+          animate={{ opacity: 1, y: 0, scale: 1 }}
+          exit={{ opacity: 0, y: -16, scale: 0.97 }}
+          transition={{ duration: 0.3 }}
+        >
+          {/* Logo */}
+          <div className="auth-logo">
+            <div className="auth-logo-icon"><Zap size={28} fill="currentColor" /></div>
+            <div className="auth-logo-text">LLM<span>Opt</span></div>
+          </div>
+          <div className="auth-title">
+            {mode === 'login' ? 'Welcome back' : 'Create account'}
+          </div>
+          <div className="auth-subtitle">
+            {mode === 'login'
+              ? 'Sign in to your LLMOpt workspace'
+              : 'Start optimizing your LLM costs today'}
+          </div>
+          {/* OAuth */}
+          <div style={{ display: 'flex', gap: 'var(--sp-3)', marginBottom: 'var(--sp-4)' }}>
+            <button className="oauth-btn" onClick={handleGoogleLogin}>
+              <svg width="18" height="18" viewBox="0 0 24 24">
+                <path d="M22.56 12.25c0-.78-.07-1.53-.2-2.25H12v4.26h5.92c-.26 1.37-1.04 2.53-2.21 3.31v2.77h3.57c2.08-1.92 3.28-4.74 3.28-8.09z" fill="#4285F4"/>
+                <path d="M12 23c2.97 0 5.46-.98 7.28-2.66l-3.57-2.77c-.98.66-2.23 1.06-3.71 1.06-2.86 0-5.29-1.93-6.16-4.53H2.18v2.84C3.99 20.53 7.7 23 12 23z" fill="#34A853"/>
+                <path d="M5.84 14.09c-.22-.66-.35-1.36-.35-2.09s.13-1.43.35-2.09V7.07H2.18C1.43 8.55 1 10.22 1 12s.43 3.45 1.18 4.93l2.85-2.22.81-.62z" fill="#FBBC05"/>
+                <path d="M12 5.38c1.62 0 3.06.56 4.21 1.64l3.15-3.15C17.45 2.09 14.97 1 12 1 7.7 1 3.99 3.47 2.18 7.07l3.66 2.84c.87-2.6 3.3-4.53 6.16-4.53z" fill="#EA4335"/>
+              </svg>
+              Continue with Google
+            </button>
+            <button className="oauth-btn" onClick={handleGithubLogin}>
+              <svg width="18" height="18" viewBox="0 0 24 24" fill="currentColor"><path d="M12 2C6.477 2 2 6.477 2 12c0 4.42 2.865 8.167 6.839 9.49.5.092.682-.217.682-.482 0-.237-.008-.866-.013-1.7-2.782.603-3.369-1.342-3.369-1.342-.454-1.155-1.11-1.462-1.11-1.462-.908-.62.069-.608.069-.608 1.003.07 1.531 1.03 1.531 1.03.892 1.529 2.341 1.087 2.91.832.092-.647.35-1.088.636-1.338-2.22-.253-4.555-1.11-4.555-4.943 0-1.091.39-1.984 1.029-2.683-.103-.253-.446-1.27.098-2.647 0 0 .84-.268 2.75 1.026A9.578 9.578 0 0112 6.836c.85.004 1.705.115 2.504.337 1.909-1.294 2.747-1.026 2.747-1.026.546 1.377.202 2.394.1 2.647.64.699 1.028 1.592 1.028 2.683 0 3.842-2.339 4.687-4.566 4.935.359.309.678.919.678 1.852 0 1.336-.012 2.415-.012 2.743 0 .267.18.578.688.48C19.138 20.163 22 16.418 22 12c0-5.523-4.477-10-10-10z"/></svg>
+              GitHub
+            </button>
+          </div>
+          <div className="auth-divider">
+            <div className="auth-divider-line" />
+            <div className="auth-divider-text">or</div>
+            <div className="auth-divider-line" />
+          </div>
+          {/* Form */}
+          <form className="auth-form" onSubmit={handleSubmit}>
+            <div className="input-group">
+              <label className="input-label" htmlFor="auth-email">Email</label>
+              <input
+                id="auth-email"
+                type="email"
+                value={email}
+                onChange={(e) => setEmail(e.target.value)}
+                placeholder="you@company.com"
+                autoComplete="email"
+                required
+              />
+            </div>
+            <div className="input-group">
+              <label className="input-label" htmlFor="auth-password">Password</label>
+              <input
+                id="auth-password"
+                type="password"
+                value={password}
+                onChange={(e) => setPassword(e.target.value)}
+                placeholder="••••••••"
+                autoComplete={mode === 'login' ? 'current-password' : 'new-password'}
+                required
+              />
+            </div>
+            {error && (
+              <motion.div
+                className="auth-error"
+                initial={{ opacity: 0 }}
+                animate={{ opacity: 1 }}
+              >
+                {error}
+              </motion.div>
+            )}
+            <button
+              id="auth-submit-btn"
+              type="submit"
+              className="btn btn-primary btn-lg"
+              disabled={loading}
+              style={{ width: '100%', marginTop: 'var(--sp-2)' }}
+            >
+              {loading ? (
+                <>
+                  <span className="spinner" />
+                  {mode === 'login' ? 'Signing in...' : 'Creating account...'}
+                </>
+              ) : (
+                mode === 'login' ? 'Sign In' : 'Create Account'
+              )}
+            </button>
+          </form>
+          <div className="auth-footer">
+            {mode === 'login' ? (
+              <>
+                Don&apos;t have an account?{' '}
+                <span className="auth-link" onClick={() => { setMode('register'); setError(''); }}>
+                  Sign up
+                </span>
+              </>
+            ) : (
+              <>
+                Already have an account?{' '}
+                <span className="auth-link" onClick={() => { setMode('login'); setError(''); }}>
+                  Sign in
+                </span>
+              </>
+            )}
+          </div>
+        </motion.div>
+      </AnimatePresence>
+    </div>
+  );
+}

frontend/src/pages/ModelRegistry.tsx ADDED Viewed

	@@ -0,0 +1,354 @@

+import { useEffect, useState } from 'react';
+import { motion } from 'framer-motion';
+import { Database, Star, ArrowUpDown } from 'lucide-react';
+import { api } from '../api';
+import type { ModelSpec } from '../types';
+const PROVIDER_COLORS: Record<string, string> = {
+  openai:    'badge-cyan',
+  anthropic: 'badge-purple',
+  google:    'badge-green',
+  ollama:    'badge-amber',
+  mistral:   'badge-muted',
+  deepseek:  'badge-red',
+  cohere:    'badge-muted',
+};
+function providerBadge(provider: string) {
+  const p = provider?.toLowerCase();
+  for (const key of Object.keys(PROVIDER_COLORS)) {
+    if (p?.includes(key)) return PROVIDER_COLORS[key];
+  }
+  return 'badge-muted';
+}
+function CapabilityGauge({ score }: { score: number }) {
+  const r = 24;
+  const circ = 2 * Math.PI * r;
+  const dash = circ * score;
+  const color = score >= 0.8 ? 'var(--accent-cyan)' : score >= 0.6 ? 'var(--accent-amber)' : 'var(--accent-red)';
+  return (
+    <svg width={60} height={60}>
+      <circle cx={30} cy={30} r={r} fill="none" stroke="var(--bg-border)" strokeWidth={4} />
+      <circle
+        cx={30} cy={30} r={r}
+        fill="none"
+        stroke={color}
+        strokeWidth={4}
+        strokeDasharray={`${dash} ${circ - dash}`}
+        strokeLinecap="round"
+        transform="rotate(-90 30 30)"
+        style={{ transition: 'stroke-dasharray 0.6s ease' }}
+      />
+      <text
+        x={30} y={35}
+        textAnchor="middle"
+        fill="var(--text-primary)"
+        style={{
+          fontSize: '11px',
+          fontFamily: 'JetBrains Mono, monospace',
+          fontWeight: 700,
+        }}
+      >
+        {(score * 100).toFixed(0)}
+      </text>
+    </svg>
+  );
+}
+function ModelCard({ model, index }: { model: ModelSpec; index: number }) {
+  // Best value: high capability + low cost
+  const isBestValue = model.capability_score > 0.7 && model.input_cost_per_1k < 0.002;
+  const isLocalFree = model.provider === 'ollama';
+  return (
+    <motion.div
+      className="model-card"
+      style={isBestValue ? { borderColor: 'var(--accent-cyan)', boxShadow: '0 0 20px rgba(0,229,255,0.08)' } : {}}
+      initial={{ opacity: 0, y: 16 }}
+      animate={{ opacity: 1, y: 0 }}
+      transition={{ delay: index * 0.04, duration: 0.35 }}
+    >
+      {isBestValue && (
+        <div style={{ display: 'flex', alignItems: 'center', gap: 6, marginBottom: 'var(--sp-2)' }}>
+          <Star size={12} fill="var(--accent-cyan)" color="var(--accent-cyan)" />
+          <span style={{ fontSize: 'var(--text-xs)', color: 'var(--accent-cyan)', fontFamily: 'JetBrains Mono, monospace', fontWeight: 700 }}>
+            BEST VALUE
+          </span>
+        </div>
+      )}
+      <div style={{ display: 'flex', alignItems: 'flex-start', justifyContent: 'space-between', gap: 'var(--sp-3)' }}>
+        <div style={{ flex: 1 }}>
+          <div className="model-card-name">{model.model_name}</div>
+          <div style={{ marginTop: 'var(--sp-2)', display: 'flex', gap: 'var(--sp-2)', flexWrap: 'wrap' }}>
+            <span className={`badge ${providerBadge(model.provider)}`}>{model.provider}</span>
+            {isLocalFree && <span className="badge badge-green">FREE LOCAL</span>}
+          </div>
+        </div>
+        <CapabilityGauge score={model.capability_score} />
+      </div>
+      <div className="model-card-pricing">
+        <div className="model-card-price-item">
+          <div className="model-card-price-label">Input /1k</div>
+          <div className="model-card-price-value">
+            {isLocalFree ? <span style={{ color: 'var(--accent-green)' }}>FREE</span> : `$${model.input_cost_per_1k.toFixed(5)}`}
+          </div>
+        </div>
+        <div className="model-card-price-item">
+          <div className="model-card-price-label">Output /1k</div>
+          <div className="model-card-price-value">
+            {isLocalFree ? <span style={{ color: 'var(--accent-green)' }}>FREE</span> : `$${model.output_cost_per_1k.toFixed(5)}`}
+          </div>
+        </div>
+        <div className="model-card-price-item">
+          <div className="model-card-price-label">Context</div>
+          <div className="model-card-price-value" style={{ color: 'var(--accent-purple)' }}>
+            {model.context_window >= 1000000
+              ? `${(model.context_window / 1000000).toFixed(0)}M`
+              : `${(model.context_window / 1000).toFixed(0)}k`}
+          </div>
+        </div>
+      </div>
+      {/* Capability breakdown bars */}
+      <div style={{ display: 'flex', flexDirection: 'column', gap: 6 }}>
+        {[
+          { label: 'Reasoning', value: model.reasoning_score, color: 'var(--accent-cyan)' },
+          { label: 'Coding',    value: model.coding_score,    color: 'var(--accent-purple)' },
+          { label: 'Math',      value: model.math_score,      color: 'var(--accent-amber)' },
+        ].map(({ label, value, color }) => (
+          <div key={label} style={{ display: 'flex', alignItems: 'center', gap: 8 }}>
+            <span style={{ fontSize: 'var(--text-xs)', color: 'var(--text-muted)', width: 62, flexShrink: 0 }}>{label}</span>
+            <div style={{ flex: 1, height: 4, background: 'var(--bg-border)', borderRadius: 2, overflow: 'hidden' }}>
+              <div style={{ width: `${value * 100}%`, height: '100%', background: color, borderRadius: 2 }} />
+            </div>
+            <span style={{ fontSize: 'var(--text-xs)', color, fontFamily: 'JetBrains Mono', width: 28, textAlign: 'right' }}>
+              {(value * 100).toFixed(0)}
+            </span>
+          </div>
+        ))}
+      </div>
+      {model.notes && (
+        <div style={{ fontSize: 'var(--text-xs)', color: 'var(--text-secondary)', borderTop: '1px solid var(--bg-border)', paddingTop: 'var(--sp-3)', lineHeight: 1.5 }}>
+          {model.notes}
+        </div>
+      )}
+    </motion.div>
+  );
+}
+type SortKey = 'capability_score' | 'input_cost_per_1k' | 'output_cost_per_1k' | 'max_complexity';
+export default function ModelRegistry() {
+  const [models, setModels] = useState<ModelSpec[]>([]);
+  const [loading, setLoading] = useState(true);
+  const [error, setError] = useState('');
+  const [sortKey, setSortKey] = useState<SortKey>('capability_score');
+  const [sortAsc, setSortAsc] = useState(false);
+  const [view, setView] = useState<'grid' | 'table'>('grid');
+  const [filter, setFilter] = useState('');
+  useEffect(() => {
+    const load = async () => {
+      try {
+        const data = await api.models();
+        setModels(data.models || []);
+      } catch (e: any) {
+        setError(e.message || 'Failed to load models');
+      } finally {
+        setLoading(false);
+      }
+    };
+    load();
+  }, []);
+  const filtered = models.filter(
+    (m) =>
+      m.model_name.toLowerCase().includes(filter.toLowerCase()) ||
+      m.provider.toLowerCase().includes(filter.toLowerCase())
+  );
+  const sorted = [...filtered].sort((a, b) => {
+    const d = a[sortKey] - b[sortKey];
+    return sortAsc ? d : -d;
+  });
+  const toggleSort = (k: SortKey) => {
+    if (sortKey === k) setSortAsc((v) => !v);
+    else { setSortKey(k); setSortAsc(false); }
+  };
+  if (loading) {
+    return (
+      <div style={{ display: 'flex', alignItems: 'center', justifyContent: 'center', height: '100%' }}>
+        <span className="spinner" style={{ width: 32, height: 32, borderWidth: 3 }} />
+      </div>
+    );
+  }
+  return (
+    <div style={{ display: 'flex', flexDirection: 'column', flex: 1, minHeight: 0, overflow: 'hidden' }}>
+      <div className="topbar">
+        <div className="topbar-breadcrumb">
+          <Database size={14} style={{ color: 'var(--accent-cyan)' }} />
+          <strong>Model Registry</strong>
+          <span style={{ color: 'var(--text-muted)' }}>/ {models.length} models registered</span>
+        </div>
+        <div className="topbar-actions">
+          <input
+            type="text"
+            value={filter}
+            onChange={(e) => setFilter(e.target.value)}
+            placeholder="Filter models..."
+            style={{ width: 180, padding: '6px 12px', fontSize: 'var(--text-xs)' }}
+          />
+          <button
+            className="btn btn-ghost btn-sm"
+            style={view === 'grid' ? { borderColor: 'var(--accent-cyan)', color: 'var(--accent-cyan)' } : {}}
+            onClick={() => setView('grid')}
+          >
+            Grid
+          </button>
+          <button
+            className="btn btn-ghost btn-sm"
+            style={view === 'table' ? { borderColor: 'var(--accent-cyan)', color: 'var(--accent-cyan)' } : {}}
+            onClick={() => setView('table')}
+          >
+            Table
+          </button>
+        </div>
+      </div>
+      <div className="page-content">
+        {error && <div className="auth-error mb-4">⚠ {error}</div>}
+        {sorted.length === 0 && !error ? (
+          <div className="empty-state">
+            <div className="empty-state-icon"><Database size={48} /></div>
+            <div className="empty-state-title">No Models Found</div>
+            <div className="empty-state-desc">
+              {filter ? `No models match "${filter}"` : 'The model registry is empty.'}
+            </div>
+          </div>
+        ) : view === 'grid' ? (
+          <div className="grid-2">
+            {sorted.map((m, i) => <ModelCard key={m.model_name} model={m} index={i} />)}
+          </div>
+        ) : (
+          <div className="card">
+            <div style={{ overflowX: 'auto' }}>
+              <table className="data-table">
+                <thead>
+                  <tr>
+                    <th>Model</th>
+                    <th>Provider</th>
+                    <th
+                      style={{ cursor: 'pointer', userSelect: 'none' }}
+                      onClick={() => toggleSort('capability_score')}
+                    >
+                      <div style={{ display: 'flex', alignItems: 'center', gap: 4 }}>
+                        Capability <ArrowUpDown size={12} />
+                      </div>
+                    </th>
+                    <th
+                      style={{ cursor: 'pointer', userSelect: 'none' }}
+                      onClick={() => toggleSort('input_cost_per_1k')}
+                    >
+                      <div style={{ display: 'flex', alignItems: 'center', gap: 4 }}>
+                        Input /1k <ArrowUpDown size={12} />
+                      </div>
+                    </th>
+                    <th
+                      style={{ cursor: 'pointer', userSelect: 'none' }}
+                      onClick={() => toggleSort('output_cost_per_1k')}
+                    >
+                      <div style={{ display: 'flex', alignItems: 'center', gap: 4 }}>
+                        Output /1k <ArrowUpDown size={12} />
+                      </div>
+                    </th>
+                    <th
+                      style={{ cursor: 'pointer', userSelect: 'none' }}
+                      onClick={() => toggleSort('max_complexity')}
+                    >
+                      <div style={{ display: 'flex', alignItems: 'center', gap: 4 }}>
+                        Max Complexity <ArrowUpDown size={12} />
+                      </div>
+                    </th>
+                    <th>Context</th>
+                    <th>Notes</th>
+                  </tr>
+                </thead>
+                <tbody>
+                  {sorted.map((m) => {
+                    const isBest = m.capability_score > 0.7 && m.input_cost_per_1k < 0.002;
+                    return (
+                      <tr
+                        key={m.model_name}
+                        style={isBest ? { borderLeft: '3px solid var(--accent-cyan)' } : {}}
+                      >
+                        <td>
+                          <div style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-sm)', color: 'var(--text-primary)', display: 'flex', alignItems: 'center', gap: 8 }}>
+                            {isBest && <Star size={11} fill="var(--accent-cyan)" color="var(--accent-cyan)" />}
+                            {m.model_name}
+                          </div>
+                        </td>
+                        <td>
+                          <span className={`badge ${providerBadge(m.provider)}`}>{m.provider}</span>
+                        </td>
+                        <td>
+                          <div style={{ display: 'flex', alignItems: 'center', gap: 8 }}>
+                            <div style={{ width: 60, height: 4, background: 'var(--bg-border)', borderRadius: 2, overflow: 'hidden' }}>
+                              <div style={{ width: `${m.capability_score * 100}%`, height: '100%', background: 'var(--accent-cyan)', borderRadius: 2 }} />
+                            </div>
+                            <span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)', color: 'var(--accent-cyan)' }}>
+                              {(m.capability_score * 100).toFixed(0)}
+                            </span>
+                          </div>
+                        </td>
+                        <td>
+                          <span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)', color: m.input_cost_per_1k === 0 ? 'var(--accent-green)' : 'var(--text-primary)' }}>
+                            {m.input_cost_per_1k === 0 ? 'FREE' : `$${m.input_cost_per_1k.toFixed(5)}`}
+                          </span>
+                        </td>
+                        <td>
+                          <span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)', color: m.output_cost_per_1k === 0 ? 'var(--accent-green)' : 'var(--text-primary)' }}>
+                            {m.output_cost_per_1k === 0 ? 'FREE' : `$${m.output_cost_per_1k.toFixed(5)}`}
+                          </span>
+                        </td>
+                        <td>
+                          <div style={{ display: 'flex', alignItems: 'center', gap: 6 }}>
+                            <div style={{ width: 50, height: 4, background: 'var(--bg-border)', borderRadius: 2, overflow: 'hidden' }}>
+                              <div style={{ width: `${m.max_complexity * 100}%`, height: '100%', background: 'var(--accent-purple)', borderRadius: 2 }} />
+                            </div>
+                            <span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)', color: 'var(--accent-purple)' }}>
+                              {(m.max_complexity * 100).toFixed(0)}
+                            </span>
+                          </div>
+                        </td>
+                        <td>
+                          <span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)', color: 'var(--accent-purple)' }}>
+                            {m.context_window >= 1000000
+                              ? `${(m.context_window / 1000000).toFixed(0)}M`
+                              : `${(m.context_window / 1000).toFixed(0)}k`}
+                          </span>
+                        </td>
+                        <td style={{ maxWidth: 200 }}>
+                          <span style={{ fontSize: 'var(--text-xs)', color: 'var(--text-secondary)' }} title={m.notes}>
+                            {m.notes?.length > 50 ? m.notes.slice(0, 50) + '…' : m.notes}
+                          </span>
+                        </td>
+                      </tr>
+                    );
+                  })}
+                </tbody>
+              </table>
+            </div>
+          </div>
+        )}
+      </div>
+    </div>
+  );
+}

frontend/src/pages/Playground.tsx ADDED Viewed

	@@ -0,0 +1,606 @@

+import { useState, useCallback } from 'react';
+import { motion, AnimatePresence } from 'framer-motion';
+import { Play, Zap, Eye, EyeOff, Copy, Check } from 'lucide-react';
+import { useAppStore } from '../store';
+import { api } from '../api';
+import type { GenerateResponse, ExplainResponse, PipelineStage, PipelineStageStatus, ComplexityTier } from '../types';
+import ReactMarkdown from 'react-markdown';
+// ─── Tier Badge ────────────────────────────────────────────────────────────────
+const TIER_COLORS: Record<ComplexityTier, string> = {
+  trivial: '#00ff94',
+  easy:    '#00e5ff',
+  medium:  '#ffc700',
+  hard:    '#ff6b35',
+  expert:  '#e040fb',
+};
+function TierBadge({ tier }: { tier: ComplexityTier }) {
+  const color = TIER_COLORS[tier] || 'var(--text-muted)';
+  return (
+    <span style={{
+      display: 'inline-block',
+      padding: '2px 10px',
+      borderRadius: 12,
+      border: `1px solid ${color}`,
+      color,
+      fontSize: 'var(--text-xs)',
+      fontFamily: 'JetBrains Mono, monospace',
+      fontWeight: 700,
+      letterSpacing: 1,
+      textTransform: 'uppercase',
+      background: `${color}15`,
+    }}>
+      {tier}
+    </span>
+  );
+}
+// ─── Pipeline Visualizer ─────────────────────────────────────────────────────
+const PIPELINE_STAGES: { id: string; label: string; icon: string; desc: string }[] = [
+  { id: 'analyze',    label: 'Query Analyzer',    icon: '🔍', desc: 'Detecting domain, complexity signals, token count' },
+  { id: 'estimate',  label: 'Complexity Estimator', icon: '🧠', desc: 'ML model scoring query complexity C(q) ∈ [0,1]' },
+  { id: 'optimize',  label: 'Optimization Engine', icon: '⚡', desc: 'Selecting optimal model via budget constraints' },
+  { id: 'compress',  label: 'Prompt Optimizer',    icon: '📦', desc: 'Compressing tokens and injecting system prompt' },
+  { id: 'route',     label: 'Model Router',         icon: '🚀', desc: 'Routing request to provider API' },
+];
+function PipelineVisualizer({
+  stages,
+  explainData,
+}: {
+  stages: PipelineStage[];
+  explainData?: ExplainResponse | null;
+}) {
+  return (
+    <div className="pipeline-wrapper">
+      {PIPELINE_STAGES.map((def, i) => {
+        const stage = stages.find((s) => s.id === def.id);
+        const status: PipelineStageStatus = stage?.status || 'idle';
+        // Extract real detail from explain data
+        let detail = def.desc;
+        if (explainData) {
+          if (def.id === 'analyze') {
+            detail = `Domain: ${explainData.features.primary_domain} | Tokens: ${explainData.features.token_count} | Output: ${explainData.features.estimated_output_length}`;
+          } else if (def.id === 'estimate') {
+            detail = `Score: ${explainData.complexity.score.toFixed(3)} | Tier: ${explainData.complexity.tier} | Reasoning req: ${explainData.complexity.required_reasoning.toFixed(2)}`;
+          } else if (def.id === 'optimize') {
+            const opt = explainData.optimization;
+            detail = `Selected: ${opt.selected_model} | Budget: ${opt.budget_mode} | Compression: ${opt.compression_enabled ? 'yes' : 'no'}`;
+          } else if (def.id === 'compress') {
+            const p = explainData.optimized_prompt;
+            const saved = p.tokens_saved || 0;
+            detail = `Tokens before: ${p.tokens_before} → after: ${p.tokens_after} | Saved: ${saved} tokens`;
+          } else if (def.id === 'route') {
+            const opt = explainData.optimization;
+            detail = `Provider: ${opt.provider} | Max tokens: ${opt.estimated_output_tokens} | Style: ${opt.system_prompt_style}`;
+          }
+        }
+        return (
+          <div key={def.id} className={`pipeline-stage ${status}`}>
+            <div className="pipeline-stage-icon">
+              {status === 'active' ? (
+                <span className="spinner" style={{ width: 16, height: 16, borderWidth: 2 }} />
+              ) : status === 'complete' ? (
+                <span style={{ color: 'var(--accent-green)', fontSize: 18 }}>✓</span>
+              ) : status === 'skipped' ? (
+                <span style={{ color: 'var(--text-muted)', fontSize: 14 }}>⊘</span>
+              ) : status === 'error' ? (
+                <span style={{ color: 'var(--accent-red)', fontSize: 18 }}>✕</span>
+              ) : (
+                <span style={{ fontSize: 14, opacity: 0.5 }}>{def.icon}</span>
+              )}
+            </div>
+            <div className="pipeline-stage-body">
+              <div className="pipeline-stage-label">{def.label}</div>
+              <div className="pipeline-stage-detail">{detail}</div>
+            </div>
+            {i < PIPELINE_STAGES.length - 1 && (
+              <div
+                className="pipeline-connector"
+                style={{
+                  background: status === 'complete'
+                    ? 'linear-gradient(180deg, var(--accent-cyan) 0%, var(--bg-border) 100%)'
+                    : 'var(--bg-border)',
+                }}
+              />
+            )}
+          </div>
+        );
+      })}
+    </div>
+  );
+}
+// ─── Rationale Card ──────────────────────────────────────────────────────────
+function RationaleCard({ explain }: { explain: ExplainResponse }) {
+  const { complexity, optimization, optimized_prompt, features } = explain;
+  return (
+    <div className="rationale-card">
+      <div className="rationale-card-title">
+        <Zap size={14} />
+        LLMOpt Decision Rationale
+      </div>
+      {/* Complexity breakdown */}
+      <div className="rationale-section">
+        <div className="rationale-label">Complexity Analysis</div>
+        <div style={{ display: 'flex', alignItems: 'center', gap: 12, marginBottom: 12 }}>
+          <TierBadge tier={complexity.tier} />
+          <span style={{ fontFamily: 'JetBrains Mono', fontSize: 'var(--text-sm)', color: 'var(--text-primary)' }}>
+            Score: <strong style={{ color: 'var(--accent-cyan)' }}>{complexity.score.toFixed(3)}</strong>
+          </span>
+        </div>
+        <div style={{ display: 'flex', flexDirection: 'column', gap: 8 }}>
+          {[
+            { label: 'Reasoning req.', value: complexity.required_reasoning, color: 'var(--accent-cyan)' },
+            { label: 'Coding req.',    value: complexity.required_coding,    color: 'var(--accent-purple)' },
+            { label: 'Math req.',      value: complexity.required_math,      color: 'var(--accent-amber)' },
+          ].map(({ label, value, color }) => (
+            <div key={label} style={{ display: 'flex', alignItems: 'center', gap: 8 }}>
+              <span style={{ fontSize: 'var(--text-xs)', color: 'var(--text-muted)', width: 100, flexShrink: 0 }}>{label}</span>
+              <div style={{ flex: 1, height: 4, background: 'var(--bg-border)', borderRadius: 2 }}>
+                <div style={{ width: `${value * 100}%`, height: '100%', background: color, borderRadius: 2, transition: 'width 0.6s ease' }} />
+              </div>
+              <span style={{ fontSize: 'var(--text-xs)', color, fontFamily: 'JetBrains Mono', width: 32, textAlign: 'right' }}>
+                {(value * 100).toFixed(0)}
+              </span>
+            </div>
+          ))}
+        </div>
+      </div>
+      {/* Feature flags */}
+      <div className="rationale-section">
+        <div className="rationale-label">Detected Features</div>
+        <div style={{ display: 'flex', flexWrap: 'wrap', gap: 6 }}>
+          {[
+            { key: 'domain_code',         label: 'Code' },
+            { key: 'domain_math',         label: 'Math' },
+            { key: 'domain_science',      label: 'Science' },
+            { key: 'domain_reasoning',    label: 'Reasoning' },
+            { key: 'domain_creative',     label: 'Creative' },
+            { key: 'multi_step',          label: 'Multi-step' },
+            { key: 'requires_comparison', label: 'Comparison' },
+            { key: 'requires_analysis',   label: 'Analysis' },
+            { key: 'has_math_notation',   label: 'Math notation' },
+            { key: 'has_code_block',      label: 'Code block' },
+          ].filter((f) => features[f.key as keyof typeof features] === true).map((f) => (
+            <span key={f.key} className="badge badge-cyan" style={{ fontSize: '10px' }}>
+              {f.label}
+            </span>
+          ))}
+          {Object.entries(features).filter(([k]) =>
+            k.startsWith('domain_') || k.startsWith('requires_') || k.startsWith('has_')
+          ).every(([, v]) => v !== true) && (
+            <span style={{ fontSize: 'var(--text-xs)', color: 'var(--text-muted)' }}>No special features detected</span>
+          )}
+        </div>
+      </div>
+      {/* Routing decision */}
+      <div className="rationale-section">
+        <div className="rationale-label">Routing Decision</div>
+        <div style={{ display: 'grid', gridTemplateColumns: '1fr 1fr', gap: 8 }}>
+          {[
+            { label: 'Selected Model',    value: optimization.selected_model,    mono: true },
+            { label: 'Provider',          value: optimization.provider,          mono: true },
+            { label: 'Budget Mode',       value: optimization.budget_mode,       mono: false },
+            { label: 'System Prompt',     value: optimization.system_prompt_style, mono: false },
+            { label: 'Compression',       value: optimization.compression_enabled ? 'enabled' : 'disabled', mono: false },
+            { label: 'Fallback',          value: optimization.fallback_model || 'N/A', mono: true },
+          ].map(({ label, value, mono }) => (
+            <div key={label}>
+              <div style={{ fontSize: '10px', color: 'var(--text-muted)', marginBottom: 2 }}>{label}</div>
+              <div style={{
+                fontFamily: mono ? 'JetBrains Mono, monospace' : 'inherit',
+                fontSize: 'var(--text-xs)',
+                color: mono ? 'var(--accent-cyan)' : 'var(--text-primary)',
+              }}>
+                {value}
+              </div>
+            </div>
+          ))}
+        </div>
+      </div>
+      {/* Compression */}
+      {optimized_prompt.tokens_saved > 0 && (
+        <div className="rationale-section">
+          <div className="rationale-label">Prompt Compression</div>
+          <div style={{ display: 'flex', alignItems: 'center', gap: 8 }}>
+            <span style={{ fontFamily: 'JetBrains Mono', fontSize: 'var(--text-xs)', color: 'var(--text-muted)' }}>
+              {optimized_prompt.tokens_before}
+            </span>
+            <span style={{ color: 'var(--accent-green)', fontSize: 'var(--text-xs)' }}>→</span>
+            <span style={{ fontFamily: 'JetBrains Mono', fontSize: 'var(--text-xs)', color: 'var(--accent-green)' }}>
+              {optimized_prompt.tokens_after} tokens
+            </span>
+            <span style={{ marginLeft: 'auto', color: 'var(--accent-amber)', fontFamily: 'JetBrains Mono', fontSize: 'var(--text-xs)' }}>
+              -{optimized_prompt.tokens_saved} saved ({(optimized_prompt.compression_ratio * 100).toFixed(1)}%)
+            </span>
+          </div>
+        </div>
+      )}
+      {/* Rationale bullets */}
+      {optimization.rationale?.length > 0 && (
+        <div className="rationale-section">
+          <div className="rationale-label">Optimizer Rationale</div>
+          <ul style={{ margin: 0, padding: '0 0 0 16px', display: 'flex', flexDirection: 'column', gap: 4 }}>
+            {optimization.rationale.map((r, i) => (
+              <li key={i} style={{ fontSize: 'var(--text-xs)', color: 'var(--text-secondary)' }}>{r}</li>
+            ))}
+          </ul>
+        </div>
+      )}
+    </div>
+  );
+}
+// ─── Metrics Bar ─────────────────────────────────────────────────────────────
+function MetricsBar({ result }: { result: GenerateResponse }) {
+  return (
+    <div className="metrics-bar">
+      <div className="metric-item">
+        <div className="metric-label">Model</div>
+        <div className="metric-value" style={{ fontFamily: 'JetBrains Mono', fontSize: 'var(--text-xs)' }}>{result.model_used}</div>
+      </div>
+      <div className="metric-divider" />
+      <div className="metric-item">
+        <div className="metric-label">Tokens</div>
+        <div className="metric-value">{result.total_tokens.toLocaleString()}</div>
+      </div>
+      <div className="metric-divider" />
+      <div className="metric-item">
+        <div className="metric-label">Cost</div>
+        <div className="metric-value" style={{ color: 'var(--accent-green)' }}>${result.estimated_cost.toFixed(6)}</div>
+      </div>
+      <div className="metric-divider" />
+      <div className="metric-item">
+        <div className="metric-label">Saved</div>
+        <div className="metric-value" style={{ color: 'var(--accent-amber)' }}>${result.cost_saved.toFixed(6)}</div>
+      </div>
+      <div className="metric-divider" />
+      <div className="metric-item">
+        <div className="metric-label">Latency</div>
+        <div className="metric-value">{result.latency_ms.toFixed(0)}ms</div>
+      </div>
+      <div className="metric-divider" />
+      <div className="metric-item">
+        <div className="metric-label">Complexity</div>
+        <div className="metric-value">
+          <TierBadge tier={result.complexity_tier} />
+        </div>
+      </div>
+      {result.tokens_saved > 0 && (
+        <>
+          <div className="metric-divider" />
+          <div className="metric-item">
+            <div className="metric-label">Tokens Compressed</div>
+            <div className="metric-value" style={{ color: 'var(--accent-purple)' }}>-{result.tokens_saved}</div>
+          </div>
+        </>
+      )}
+    </div>
+  );
+}
+// ─── Main Playground Page ────────────────────────────────────────────────────
+export default function Playground() {
+  const {
+    budgetMode,
+    alphaWeight,
+    betaWeight,
+    gammaWeight,
+    compressionEnabled,
+    evaluationEnabled,
+    connectedProviders,
+  } = useAppStore();
+  const [query, setQuery] = useState('');
+  const [selectedProviders, setSelectedProviders] = useState<string[]>([]);
+  const [loading, setLoading] = useState(false);
+  const [error, setError] = useState('');
+  const [result, setResult] = useState<GenerateResponse | null>(null);
+  const [explain, setExplain] = useState<ExplainResponse | null>(null);
+  const [showExplain, setShowExplain] = useState(false);
+  const [copied, setCopied] = useState(false);
+  const [stages, setStages] = useState<PipelineStage[]>([]);
+  const setStageStatus = useCallback((id: string, status: PipelineStageStatus, detail?: string) => {
+    setStages((prev) => {
+      const existing = prev.find((s) => s.id === id);
+      if (existing) {
+        return prev.map((s) => s.id === id ? { ...s, status, detail: detail ?? s.detail } : s);
+      }
+      return [...prev, { id, label: id, icon: '', status, detail }];
+    });
+  }, []);
+  const resetStages = useCallback(() => {
+    setStages(PIPELINE_STAGES.map((s) => ({ id: s.id, label: s.label, icon: s.icon, status: 'idle' as PipelineStageStatus })));
+  }, []);
+  // Animate stages based on real pipeline latency
+  const runPipelineAnimation = useCallback(async () => {
+    const timings = [300, 600, 400, 300, 0]; // ms per stage (route waits for real API)
+    const ids = ['analyze', 'estimate', 'optimize', 'compress', 'route'];
+    for (let i = 0; i < ids.length - 1; i++) {
+      setStageStatus(ids[i], 'active');
+      await new Promise((r) => setTimeout(r, timings[i]));
+      setStageStatus(ids[i], 'complete');
+    }
+    setStageStatus('route', 'active');
+  }, [setStageStatus]);
+  const handleSubmit = async () => {
+    if (!query.trim() || loading) return;
+    setLoading(true);
+    setError('');
+    setResult(null);
+    setExplain(null);
+    resetStages();
+    const providerConstraints = selectedProviders.length > 0 ? selectedProviders : undefined;
+    try {
+      // 1. Run explain in parallel with pipeline animation to get real routing data
+      const explainPromise = api.explain(query, budgetMode, {
+        alpha: alphaWeight,
+        beta: betaWeight,
+        gamma: gammaWeight,
+        compression_enabled: compressionEnabled,
+        only_providers: providerConstraints,
+      });
+      // 2. Start animation
+      await runPipelineAnimation();
+      // 3. Get explain data (ready by now or wait a bit more)
+      const explainData = await explainPromise;
+      setExplain(explainData);
+      setShowExplain(false);
+      // 4. Complete route stage after getting result
+      setStageStatus('route', 'active');
+      // 5. Generate with real API
+      const genResult = await api.generate({
+        query,
+        budget_mode: budgetMode,
+        alpha: alphaWeight,
+        beta: betaWeight,
+        gamma: gammaWeight,
+        compression_enabled: compressionEnabled,
+        evaluate: evaluationEnabled,
+        only_providers: providerConstraints,
+      });
+      setStageStatus('route', 'complete');
+      setResult(genResult);
+    } catch (e: any) {
+      const status = (e as any).status;
+      if (status === 401) {
+        setError('Please add your API keys in Settings to generate responses.');
+      } else if (status === 503) {
+        setError('Redis is unavailable — sessions require Redis. Try adding ?session= to the request or run Redis locally.');
+      } else {
+        setError(e.message || 'Generation failed');
+      }
+      setStageStatus('route', 'error');
+    } finally {
+      setLoading(false);
+    }
+  };
+  const handleCopy = () => {
+    if (result?.response) {
+      navigator.clipboard.writeText(result.response);
+      setCopied(true);
+      setTimeout(() => setCopied(false), 2000);
+    }
+  };
+  const examplePrompts = [
+    'Explain quicksort with Python code and time complexity analysis',
+    'What is the derivative of x²·sin(x)?',
+    'Write a haiku about machine learning',
+    'Design a distributed rate limiter for 1M RPS',
+    'Summarize the French Revolution in 3 bullet points',
+  ];
+  return (
+    <div style={{ display: 'flex', flexDirection: 'column', flex: 1, minHeight: 0, overflow: 'hidden' }}>
+      <div className="topbar">
+        <div className="topbar-breadcrumb">
+          <Zap size={14} style={{ color: 'var(--accent-cyan)' }} />
+          <strong>Playground</strong>
+          <span style={{ color: 'var(--text-muted)' }}>/ Query Optimizer</span>
+        </div>
+        <div className="topbar-actions">
+          {connectedProviders.length > 0 && (
+            <div style={{ display: 'flex', alignItems: 'center', gap: '6px', marginRight: '16px', borderRight: '1px solid var(--bg-border)', paddingRight: '16px' }}>
+              <span style={{ fontSize: 'var(--text-xs)', color: 'var(--text-muted)' }}>Routing Pool:</span>
+              {connectedProviders.map((prov) => {
+                const isSelected = selectedProviders.includes(prov);
+                const isActive = selectedProviders.length === 0 || isSelected;
+                return (
+                  <button
+                    key={prov}
+                    className={`btn btn-xs`}
+                    style={{
+                      textTransform: 'capitalize',
+                      fontSize: '10px',
+                      padding: '2px 8px',
+                      borderColor: isActive ? 'var(--accent-cyan)' : 'var(--bg-border)',
+                      color: isActive ? 'var(--accent-cyan)' : 'var(--text-muted)',
+                      opacity: isActive ? 1 : 0.4,
+                      transition: 'all 0.2s ease',
+                      background: 'transparent',
+                    }}
+                    onClick={() => {
+                      setSelectedProviders(prev =>
+                        prev.includes(prov)
+                          ? prev.filter(p => p !== prov)
+                          : [...prev, prov]
+                      );
+                    }}
+                  >
+                    {prov}
+                  </button>
+                );
+              })}
+            </div>
+          )}
+          <span style={{ fontSize: 'var(--text-xs)', color: 'var(--text-muted)' }}>Budget:</span>
+          {(['cheap', 'balanced', 'quality'] as const).map((m) => (
+            <button
+              key={m}
+              className={`btn btn-ghost btn-sm ${budgetMode === m ? 'active' : ''}`}
+              style={budgetMode === m ? { borderColor: 'var(--accent-cyan)', color: 'var(--accent-cyan)' } : {}}
+              onClick={() => useAppStore.getState().setBudgetMode(m)}
+            >
+              {m}
+            </button>
+          ))}
+        </div>
+      </div>
+      <div className="page-content" style={{ display: 'flex', gap: 'var(--sp-4)', alignItems: 'flex-start', overflow: 'auto' }}>
+        {/* Left panel: Input + Pipeline + Examples */}
+        <div style={{ flex: 1, display: 'flex', flexDirection: 'column', gap: 'var(--sp-4)', minWidth: 0 }}>
+          {/* Query input */}
+          <div className="card">
+            <div className="card-header">Query</div>
+            <textarea
+              id="playground-query-input"
+              value={query}
+              onChange={(e) => setQuery(e.target.value)}
+              onKeyDown={(e) => {
+                if (e.key === 'Enter' && (e.ctrlKey || e.metaKey)) handleSubmit();
+              }}
+              placeholder="Ask anything — LLMOpt will analyze complexity, route to the optimal model, and compress the prompt to save cost..."
+              style={{ minHeight: 120, resize: 'vertical', fontFamily: 'inherit' }}
+            />
+            <div style={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between', marginTop: 'var(--sp-3)' }}>
+              <span style={{ fontSize: 'var(--text-xs)', color: 'var(--text-muted)' }}>
+                Ctrl+Enter to run
+              </span>
+              <button
+                id="playground-submit-btn"
+                className="btn btn-primary"
+                onClick={handleSubmit}
+                disabled={loading || !query.trim()}
+              >
+                {loading ? (
+                  <><span className="spinner" /> Optimizing…</>
+                ) : (
+                  <><Play size={14} /> Run</>
+                )}
+              </button>
+            </div>
+          </div>
+          {/* Example prompts */}
+          {!result && !loading && (
+            <div className="card">
+              <div className="card-header">Example Prompts</div>
+              <div style={{ display: 'flex', flexDirection: 'column', gap: 'var(--sp-2)' }}>
+                {examplePrompts.map((p) => (
+                  <button
+                    key={p}
+                    className="example-prompt-btn"
+                    onClick={() => setQuery(p)}
+                  >
+                    <span className="example-prompt-icon">→</span>
+                    {p}
+                  </button>
+                ))}
+              </div>
+            </div>
+          )}
+          {/* Error */}
+          {error && (
+            <motion.div
+              className="auth-error"
+              initial={{ opacity: 0, y: -8 }}
+              animate={{ opacity: 1, y: 0 }}
+            >
+              ⚠ {error}
+            </motion.div>
+          )}
+          {/* Result */}
+          <AnimatePresence>
+            {result && (
+              <motion.div
+                className="card"
+                initial={{ opacity: 0, y: 12 }}
+                animate={{ opacity: 1, y: 0 }}
+                exit={{ opacity: 0, y: -8 }}
+              >
+                <div className="card-header" style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center' }}>
+                  <span>Response</span>
+                  <div style={{ display: 'flex', gap: 'var(--sp-2)' }}>
+                    <button className="btn btn-ghost btn-sm" onClick={() => setShowExplain((v) => !v)}>
+                      {showExplain ? <EyeOff size={12} /> : <Eye size={12} />}
+                      {showExplain ? 'Hide Explain' : 'Explain'}
+                    </button>
+                    <button className="btn btn-ghost btn-sm" onClick={handleCopy}>
+                      {copied ? <Check size={12} /> : <Copy size={12} />}
+                      {copied ? 'Copied!' : 'Copy'}
+                    </button>
+                  </div>
+                </div>
+                <MetricsBar result={result} />
+                <div className="response-content">
+                  <ReactMarkdown>{result.response}</ReactMarkdown>
+                </div>
+                {/* Inline explain panel */}
+                <AnimatePresence>
+                  {showExplain && explain && (
+                    <motion.div
+                      initial={{ opacity: 0, height: 0 }}
+                      animate={{ opacity: 1, height: 'auto' }}
+                      exit={{ opacity: 0, height: 0 }}
+                      style={{ overflow: 'hidden' }}
+                    >
+                      <div style={{ borderTop: '1px solid var(--bg-border)', paddingTop: 'var(--sp-4)', marginTop: 'var(--sp-4)' }}>
+                        <RationaleCard explain={explain} />
+                      </div>
+                    </motion.div>
+                  )}
+                </AnimatePresence>
+              </motion.div>
+            )}
+          </AnimatePresence>
+        </div>
+        {/* Right panel: Pipeline */}
+        <div style={{ width: 280, flexShrink: 0 }}>
+          <div className="card" style={{ position: 'sticky', top: 0 }}>
+            <div className="card-header">Optimization Pipeline</div>
+            <PipelineVisualizer stages={stages} explainData={explain} />
+            {!loading && stages.length === 0 && (
+              <div style={{ padding: 'var(--sp-4)', textAlign: 'center', color: 'var(--text-muted)', fontSize: 'var(--text-xs)' }}>
+                Run a query to see the pipeline in action
+              </div>
+            )}
+          </div>
+        </div>
+      </div>
+    </div>
+  );
+}

frontend/src/pages/Settings.tsx ADDED Viewed

	@@ -0,0 +1,349 @@

+import { useState, useEffect } from 'react';
+import { Settings, Eye, EyeOff, CheckCircle } from 'lucide-react';
+import { api } from '../api';
+import { useAppStore } from '../store';
+interface KeyEntry {
+  provider: string;
+  label: string;
+  placeholder: string;
+  connected: boolean;
+}
+const PROVIDERS: Omit<KeyEntry, 'connected'>[] = [
+  { provider: 'openai',    label: 'OpenAI',     placeholder: 'sk-...' },
+  { provider: 'anthropic', label: 'Anthropic',  placeholder: 'sk-ant-...' },
+  { provider: 'google',    label: 'Google AI',  placeholder: 'AI...' },
+  { provider: 'deepseek',  label: 'DeepSeek',   placeholder: 'sk-...' },
+  { provider: 'mistral',   label: 'Mistral',    placeholder: '...' },
+  { provider: 'cohere',    label: 'Cohere',     placeholder: 'co-...' },
+  { provider: 'ollama',    label: 'Ollama URL', placeholder: 'http://localhost:11434' },
+];
+function ApiKeyRow({
+  entry,
+  onSave,
+  onDelete,
+}: {
+  entry: KeyEntry;
+  onSave: (provider: string, key: string) => Promise<void>;
+  onDelete: (provider: string) => Promise<void>;
+}) {
+  const [value, setValue] = useState('');
+  const [visible, setVisible] = useState(false);
+  const [saving, setSaving] = useState(false);
+  const [saved, setSaved] = useState(false);
+  const [deleting, setDeleting] = useState(false);
+  const [confirmDelete, setConfirmDelete] = useState(false);
+  const handleSave = async () => {
+    if (!value.trim()) return;
+    setSaving(true);
+    try {
+      await onSave(entry.provider, value);
+      setSaved(true);
+      setValue('');
+      setTimeout(() => setSaved(false), 3000);
+    } finally {
+      setSaving(false);
+    }
+  };
+  const handleDelete = async () => {
+    if (!confirmDelete) {
+      setConfirmDelete(true);
+      setTimeout(() => setConfirmDelete(false), 3000);
+      return;
+    }
+    setDeleting(true);
+    try {
+      await onDelete(entry.provider);
+      setValue('');
+      setConfirmDelete(false);
+    } finally {
+      setDeleting(false);
+    }
+  };
+  return (
+    <div className="settings-row">
+      <div className="settings-row-info">
+        <div className="settings-row-label">{entry.label}</div>
+        <div className="settings-row-desc">
+          {entry.connected ? (
+            <span style={{ color: 'var(--accent-green)', display: 'flex', alignItems: 'center', gap: 4 }}>
+              <CheckCircle size={12} /> Connected
+            </span>
+          ) : (
+            <span style={{ color: 'var(--text-muted)' }}>No key set</span>
+          )}
+        </div>
+      </div>
+      <div className="settings-key-input-wrapper">
+        <div style={{ position: 'relative', flex: 1 }}>
+          <input
+            type={visible ? 'text' : 'password'}
+            value={value}
+            onChange={(e) => setValue(e.target.value)}
+            placeholder={entry.placeholder}
+            onKeyDown={(e) => e.key === 'Enter' && handleSave()}
+            style={{ paddingRight: '40px' }}
+          />
+          <button
+            onClick={() => setVisible((v) => !v)}
+            style={{
+              position: 'absolute', right: 10, top: '50%', transform: 'translateY(-50%)',
+              background: 'none', border: 'none', cursor: 'pointer',
+              color: 'var(--text-muted)', display: 'flex',
+            }}
+          >
+            {visible ? <EyeOff size={14} /> : <Eye size={14} />}
+          </button>
+        </div>
+        <button
+          className="btn btn-ghost btn-sm"
+          onClick={handleSave}
+          disabled={saving || !value.trim()}
+          style={saved ? { borderColor: 'var(--accent-green)', color: 'var(--accent-green)' } : {}}
+        >
+          {saving ? <span className="spinner" /> : saved ? '✓ Saved' : 'Save'}
+        </button>
+        {entry.connected && (
+          <button
+            className="btn btn-ghost btn-sm"
+            onClick={handleDelete}
+            disabled={deleting}
+            style={{
+              borderColor: confirmDelete ? 'var(--accent-red)' : 'var(--bg-border)',
+              color: 'var(--accent-red)',
+            }}
+          >
+            {deleting ? <span className="spinner" /> : confirmDelete ? 'Sure?' : 'Clear'}
+          </button>
+        )}
+      </div>
+    </div>
+  );
+}
+interface SliderRowProps {
+  label: string;
+  desc: string;
+  value: number;
+  min?: number;
+  max?: number;
+  step?: number;
+  onChange: (v: number) => void;
+  unit?: string;
+}
+function SliderRow({ label, desc, value, min = 0, max = 1, step = 0.01, onChange, unit = '' }: SliderRowProps) {
+  return (
+    <div className="settings-row" style={{ flexDirection: 'column', alignItems: 'flex-start', gap: 'var(--sp-3)' }}>
+      <div style={{ display: 'flex', justifyContent: 'space-between', width: '100%', alignItems: 'center' }}>
+        <div>
+          <div className="settings-row-label">{label}</div>
+          <div className="settings-row-desc">{desc}</div>
+        </div>
+        <span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-sm)', color: 'var(--accent-cyan)', minWidth: 48, textAlign: 'right' }}>
+          {value.toFixed(2)}{unit}
+        </span>
+      </div>
+      <input
+        type="range"
+        min={min}
+        max={max}
+        step={step}
+        value={value}
+        onChange={(e) => onChange(parseFloat(e.target.value))}
+        style={{ width: '100%' }}
+      />
+    </div>
+  );
+}
+function ToggleRow({
+  label, desc, value, onChange,
+}: {
+  label: string; desc: string; value: boolean; onChange: (v: boolean) => void;
+}) {
+  return (
+    <div className="settings-row">
+      <div className="settings-row-info">
+        <div className="settings-row-label">{label}</div>
+        <div className="settings-row-desc">{desc}</div>
+      </div>
+      <label className="toggle-switch">
+        <input type="checkbox" checked={value} onChange={(e) => onChange(e.target.checked)} />
+        <span className="toggle-slider" />
+      </label>
+    </div>
+  );
+}
+export default function SettingsPage() {
+  const {
+    connectedProviders,
+    setConnectedProviders,
+    alphaWeight,
+    setAlphaWeight,
+    betaWeight,
+    setBetaWeight,
+    gammaWeight,
+    setGammaWeight,
+    compressionEnabled,
+    setCompressionEnabled,
+    compressionThreshold,
+    setCompressionThreshold,
+    evaluationEnabled,
+    setEvaluationEnabled,
+    redisUrl,
+    setRedisUrl,
+  } = useAppStore();
+  useEffect(() => {
+    // Refresh connected providers
+    api.getKeys().then((d) => setConnectedProviders(d.connected_providers)).catch(() => {});
+  }, []);
+  const handleSaveKey = async (provider: string, key: string) => {
+    await api.updateKeys({ [provider]: key });
+    const data = await api.getKeys();
+    setConnectedProviders(data.connected_providers);
+  };
+  const handleDeleteKey = async (provider: string) => {
+    await api.deleteKey(provider);
+    const data = await api.getKeys();
+    setConnectedProviders(data.connected_providers);
+  };
+  const providerEntries: KeyEntry[] = PROVIDERS.map((p) => ({
+    ...p,
+    connected: connectedProviders.includes(p.provider),
+  }));
+  const totalWeight = alphaWeight + betaWeight + gammaWeight;
+  const formulaDisplay = `score = ${alphaWeight.toFixed(2)}·cost + ${betaWeight.toFixed(2)}·tokens + ${gammaWeight.toFixed(2)}·quality`;
+  return (
+    <div style={{ display: 'flex', flexDirection: 'column', flex: 1, minHeight: 0, overflow: 'hidden' }}>
+      <div className="topbar">
+        <div className="topbar-breadcrumb">
+          <Settings size={14} style={{ color: 'var(--accent-cyan)' }} />
+          <strong>Settings</strong>
+          <span style={{ color: 'var(--text-muted)' }}>/ Configuration</span>
+        </div>
+      </div>
+      <div className="page-content" style={{ display: 'flex', flexDirection: 'column', gap: 'var(--sp-4)', flex: 1, overflowY: 'auto' }}>
+        {/* API Keys */}
+        <div className="settings-section">
+          <div className="settings-section-header">
+            <div className="settings-section-title">API Keys</div>
+            <div className="settings-section-desc">Provider credentials are encrypted and stored server-side in your session</div>
+          </div>
+          {providerEntries.map((entry) => (
+            <ApiKeyRow key={entry.provider} entry={entry} onSave={handleSaveKey} onDelete={handleDeleteKey} />
+          ))}
+        </div>
+        {/* Budget Weights */}
+        <div className="settings-section">
+          <div className="settings-section-header">
+            <div className="settings-section-title">Budget Optimization Weights</div>
+            <div className="settings-section-desc">
+              Control how the routing optimizer balances cost, tokens, and quality
+            </div>
+          </div>
+          <div style={{ padding: 'var(--sp-4) var(--sp-5)' }}>
+            <div style={{
+              background: 'var(--bg-base)',
+              border: '1px solid var(--bg-border)',
+              borderRadius: 'var(--radius-md)',
+              padding: 'var(--sp-3) var(--sp-4)',
+              fontFamily: 'Fira Code, monospace',
+              fontSize: 'var(--text-sm)',
+              color: 'var(--accent-cyan)',
+              marginBottom: 'var(--sp-4)',
+            }}>
+              {formulaDisplay}
+              {Math.abs(totalWeight - 1) > 0.01 && (
+                <span style={{ color: 'var(--accent-amber)', marginLeft: 12 }}>
+                  ⚠ sum = {totalWeight.toFixed(2)} (should be 1.0)
+                </span>
+              )}
+            </div>
+          </div>
+          <SliderRow label="α — Cost Weight"    desc="Penalize expensive routes"       value={alphaWeight} onChange={setAlphaWeight} />
+          <SliderRow label="β — Token Weight"   desc="Penalize high token usage"       value={betaWeight}  onChange={setBetaWeight}  />
+          <SliderRow label="γ — Quality Weight" desc="Reward high-capability models"   value={gammaWeight} onChange={setGammaWeight} />
+        </div>
+        {/* Compression */}
+        <div className="settings-section">
+          <div className="settings-section-header">
+            <div className="settings-section-title">Prompt Compression</div>
+            <div className="settings-section-desc">Automatically compress prompts to reduce token usage and cost</div>
+          </div>
+          <ToggleRow
+            label="Enable Compression"
+            desc="Apply LLM-based prompt compression before routing"
+            value={compressionEnabled}
+            onChange={setCompressionEnabled}
+          />
+          {compressionEnabled && (
+            <SliderRow
+              label="Compression Threshold"
+              desc="Minimum compression ratio to apply (lower = more aggressive)"
+              value={compressionThreshold}
+              min={0.05}
+              max={0.5}
+              step={0.01}
+              onChange={setCompressionThreshold}
+              unit=" ratio"
+            />
+          )}
+        </div>
+        {/* Evaluation */}
+        <div className="settings-section">
+          <div className="settings-section-header">
+            <div className="settings-section-title">LLM-as-Judge Evaluation</div>
+            <div className="settings-section-desc">Use a secondary LLM to evaluate response quality (adds cost)</div>
+          </div>
+          <ToggleRow
+            label="Enable Evaluation"
+            desc="Score each response using an independent judge model"
+            value={evaluationEnabled}
+            onChange={setEvaluationEnabled}
+          />
+        </div>
+        {/* Infrastructure */}
+        <div className="settings-section">
+          <div className="settings-section-header">
+            <div className="settings-section-title">Infrastructure</div>
+            <div className="settings-section-desc">Connection settings for cache and queue</div>
+          </div>
+          <div className="settings-row">
+            <div className="settings-row-info">
+              <div className="settings-row-label">Redis URL</div>
+              <div className="settings-row-desc">Used for session storage and response caching</div>
+            </div>
+            <div className="settings-key-input-wrapper">
+              <input
+                type="text"
+                value={redisUrl}
+                onChange={(e) => setRedisUrl(e.target.value)}
+                placeholder="redis://localhost:6379"
+              />
+            </div>
+          </div>
+        </div>
+      </div>
+    </div>
+  );
+}

frontend/src/store.ts ADDED Viewed

	@@ -0,0 +1,88 @@

+import { create } from 'zustand';
+import type { BudgetMode, GenerateResponse, HealthStatus } from './types';
+const getLocal = <T>(key: string, def: T): T => {
+  const val = localStorage.getItem(key);
+  if (val === null) return def;
+  try {
+    return JSON.parse(val) as T;
+  } catch {
+    return val as unknown as T;
+  }
+};
+interface AuthState {
+  isLoggedIn: boolean;
+  sessionId: string | null;
+}
+interface AppState {
+  auth: AuthState;
+  health: HealthStatus;
+  budgetMode: BudgetMode;
+  connectedProviders: string[];
+  sidebarCollapsed: boolean;
+  setBudgetMode: (mode: BudgetMode) => void;
+  setAuth: (auth: Partial<AuthState>) => void;
+  setHealth: (health: Partial<HealthStatus>) => void;
+  setConnectedProviders: (providers: string[]) => void;
+  setSidebarCollapsed: (v: boolean) => void;
+  lastResult: GenerateResponse | null;
+  setLastResult: (r: GenerateResponse | null) => void;
+  // Settings
+  alphaWeight: number;
+  betaWeight: number;
+  gammaWeight: number;
+  compressionEnabled: boolean;
+  compressionThreshold: number;
+  evaluationEnabled: boolean;
+  redisUrl: string;
+  setAlphaWeight: (v: number) => void;
+  setBetaWeight: (v: number) => void;
+  setGammaWeight: (v: number) => void;
+  setCompressionEnabled: (v: boolean) => void;
+  setCompressionThreshold: (v: number) => void;
+  setEvaluationEnabled: (v: boolean) => void;
+  setRedisUrl: (v: string) => void;
+}
+export const useAppStore = create<AppState>((set) => ({
+  auth: {
+    isLoggedIn: false,
+    sessionId: null,
+  },
+  health: {
+    redis: 'unknown',
+    ml_deps: 'unknown',
+    api: 'ok',
+  },
+  budgetMode: 'balanced',
+  connectedProviders: [],
+  sidebarCollapsed: false,
+  lastResult: null,
+  setBudgetMode: (mode) => set({ budgetMode: mode }),
+  setAuth: (auth) => set((s) => ({ auth: { ...s.auth, ...auth } })),
+  setHealth: (health) => set((s) => ({ health: { ...s.health, ...health } })),
+  setConnectedProviders: (providers) => set({ connectedProviders: providers }),
+  setSidebarCollapsed: (v) => set({ sidebarCollapsed: v }),
+  setLastResult: (r) => set({ lastResult: r }),
+  // Settings initial state from localStorage
+  alphaWeight: getLocal('llmopt_alpha', 0.4),
+  betaWeight: getLocal('llmopt_beta', 0.3),
+  gammaWeight: getLocal('llmopt_gamma', 0.3),
+  compressionEnabled: getLocal('llmopt_compression', true),
+  compressionThreshold: getLocal('llmopt_compression_threshold', 0.15),
+  evaluationEnabled: getLocal('llmopt_evaluation', false),
+  redisUrl: getLocal('llmopt_redis_url', 'redis://localhost:6379'),
+  // Settings setters
+  setAlphaWeight: (v) => { localStorage.setItem('llmopt_alpha', JSON.stringify(v)); set({ alphaWeight: v }); },
+  setBetaWeight: (v) => { localStorage.setItem('llmopt_beta', JSON.stringify(v)); set({ betaWeight: v }); },
+  setGammaWeight: (v) => { localStorage.setItem('llmopt_gamma', JSON.stringify(v)); set({ gammaWeight: v }); },
+  setCompressionEnabled: (v) => { localStorage.setItem('llmopt_compression', JSON.stringify(v)); set({ compressionEnabled: v }); },
+  setCompressionThreshold: (v) => { localStorage.setItem('llmopt_compression_threshold', JSON.stringify(v)); set({ compressionThreshold: v }); },
+  setEvaluationEnabled: (v) => { localStorage.setItem('llmopt_evaluation', JSON.stringify(v)); set({ evaluationEnabled: v }); },
+  setRedisUrl: (v) => { localStorage.setItem('llmopt_redis_url', v); set({ redisUrl: v }); },
+}));

frontend/src/theme.css ADDED Viewed

	@@ -0,0 +1,1982 @@

+/* ============================================================
+   LLMOpt Design System — CSS Custom Properties
+   Aesthetic: Dark Industrial Dashboard ("Bloomberg meets cyberpunk")
+   ============================================================ */
+/* ---- Google Fonts (loaded in index.html) ---- */
+:root {
+  /* Backgrounds */
+  --bg-base:     #0A0B0E;
+  --bg-surface:  #111318;
+  --bg-elevated: #1A1D26;
+  --bg-border:   #252A38;
+  /* Accents */
+  --accent-cyan:   #00E5FF;
+  --accent-green:  #00FF94;
+  --accent-amber:  #FFB300;
+  --accent-red:    #FF3D57;
+  --accent-purple: #7C4DFF;
+  /* Text */
+  --text-primary:   #E8ECF4;
+  --text-secondary: #C5CBE0;
+  --text-muted:     #9098B0;
+  /* Gradients */
+  --gradient-glow: linear-gradient(135deg, #00E5FF22, #7C4DFF11);
+  --gradient-card: linear-gradient(145deg, #111318, #1A1D26);
+  /* Typography sizes */
+  --text-xs:   11px;
+  --text-sm:   13px;
+  --text-base: 15px;
+  --text-lg:   18px;
+  --text-xl:   24px;
+  --text-2xl:  32px;
+  --text-3xl:  48px;
+  /* Spacing */
+  --sp-1: 4px;
+  --sp-2: 8px;
+  --sp-3: 12px;
+  --sp-4: 16px;
+  --sp-5: 20px;
+  --sp-6: 24px;
+  --sp-8: 32px;
+  --sp-10: 40px;
+  --sp-12: 48px;
+  /* Border radius */
+  --radius-sm:  4px;
+  --radius-md:  8px;
+  --radius-lg:  12px;
+  --radius-xl:  16px;
+  --radius-full: 9999px;
+  /* Transitions */
+  --transition-fast:   150ms ease;
+  --transition-normal: 250ms ease;
+  --transition-slow:   400ms ease;
+  /* Z-index layers */
+  --z-sidebar: 100;
+  --z-modal:   200;
+  --z-toast:   300;
+  /* Sidebar */
+  --sidebar-w:        280px;
+  --sidebar-w-collapsed: 56px;
+}
+/* ============================================================
+   Reset & Base
+   ============================================================ */
+*, *::before, *::after {
+  box-sizing: border-box;
+  margin: 0;
+  padding: 0;
+}
+html {
+  font-size: 16px;
+  scroll-behavior: smooth;
+}
+body {
+  background-color: var(--bg-base);
+  color: var(--text-primary);
+  font-family: 'DM Sans', -apple-system, BlinkMacSystemFont, sans-serif;
+  font-size: var(--text-base);
+  line-height: 1.6;
+  -webkit-font-smoothing: antialiased;
+  -moz-osx-font-smoothing: grayscale;
+  overflow-x: hidden;
+}
+/* ============================================================
+   Typography
+   ============================================================ */
+h1, h2, h3, h4, h5, h6 {
+  font-family: 'JetBrains Mono', monospace;
+  line-height: 1.2;
+  letter-spacing: -0.02em;
+}
+code, pre, .mono {
+  font-family: 'Fira Code', 'JetBrains Mono', monospace;
+}
+/* ============================================================
+   Scrollbar
+   ============================================================ */
+::-webkit-scrollbar { width: 6px; height: 6px; }
+::-webkit-scrollbar-track { background: var(--bg-base); }
+::-webkit-scrollbar-thumb { background: var(--bg-border); border-radius: 3px; }
+::-webkit-scrollbar-thumb:hover { background: var(--text-muted); }
+/* ============================================================
+   Layout
+   ============================================================ */
+.app-layout {
+  display: flex;
+  height: 100vh;
+  overflow: hidden;
+}
+.main-content {
+  flex: 1;
+  display: flex;
+  flex-direction: column;
+  overflow: hidden;
+  margin-left: var(--sidebar-w);
+  transition: margin-left var(--transition-normal);
+}
+.main-content.sidebar-collapsed {
+  margin-left: var(--sidebar-w-collapsed);
+}
+.page-content {
+  flex: 1;
+  overflow-y: auto;
+  padding: var(--sp-6);
+  background: var(--bg-base);
+}
+/* ============================================================
+   Sidebar
+   ============================================================ */
+.sidebar {
+  width: var(--sidebar-w);
+  min-width: var(--sidebar-w);
+  background: var(--bg-surface);
+  border-right: 1px solid var(--bg-border);
+  display: flex;
+  flex-direction: column;
+  position: fixed;
+  left: 0;
+  top: 0;
+  height: 100vh;
+  z-index: var(--z-sidebar);
+  transition: width var(--transition-normal), min-width var(--transition-normal);
+  overflow: hidden;
+}
+.sidebar.collapsed {
+  width: var(--sidebar-w-collapsed);
+  min-width: var(--sidebar-w-collapsed);
+}
+.sidebar-logo {
+  display: flex;
+  align-items: center;
+  gap: var(--sp-3);
+  padding: var(--sp-5) var(--sp-5);
+  border-bottom: 1px solid var(--bg-border);
+  min-height: 64px;
+}
+.sidebar-logo-icon {
+  color: var(--accent-cyan);
+  font-size: 22px;
+  flex-shrink: 0;
+  display: flex;
+  align-items: center;
+}
+.sidebar-logo-text {
+  font-family: 'JetBrains Mono', monospace;
+  font-size: var(--text-lg);
+  font-weight: 700;
+  color: var(--text-primary);
+  white-space: nowrap;
+}
+.sidebar-logo-text span {
+  color: var(--accent-cyan);
+}
+.sidebar-nav {
+  flex: 1;
+  padding: var(--sp-4) var(--sp-3);
+  display: flex;
+  flex-direction: column;
+  gap: var(--sp-1);
+}
+.sidebar-nav-item {
+  display: flex;
+  align-items: center;
+  gap: var(--sp-3);
+  padding: var(--sp-3) var(--sp-3);
+  border-radius: var(--radius-md);
+  color: var(--text-secondary);
+  text-decoration: none;
+  font-size: var(--text-sm);
+  font-weight: 500;
+  transition: background var(--transition-fast), color var(--transition-fast), border-color var(--transition-fast);
+  cursor: pointer;
+  border: none;
+  background: transparent;
+  width: 100%;
+  text-align: left;
+  border-left: 2px solid transparent;
+  white-space: nowrap;
+}
+.sidebar-nav-item:hover {
+  background: var(--bg-elevated);
+  color: var(--text-primary);
+}
+.sidebar-nav-item.active {
+  background: rgba(0, 229, 255, 0.08);
+  color: var(--accent-cyan);
+  border-left-color: var(--accent-cyan);
+}
+.sidebar-nav-icon {
+  flex-shrink: 0;
+  width: 20px;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+}
+.sidebar-section-label {
+  font-size: var(--text-xs);
+  font-weight: 600;
+  text-transform: uppercase;
+  letter-spacing: 0.1em;
+  color: var(--text-muted);
+  padding: var(--sp-4) var(--sp-3) var(--sp-2);
+  white-space: nowrap;
+}
+.sidebar-status {
+  padding: var(--sp-4);
+  border-top: 1px solid var(--bg-border);
+}
+.sidebar-status-title {
+  font-size: var(--text-xs);
+  font-weight: 600;
+  text-transform: uppercase;
+  letter-spacing: 0.1em;
+  color: var(--text-muted);
+  margin-bottom: var(--sp-3);
+}
+.sidebar-status-item {
+  display: flex;
+  align-items: center;
+  gap: var(--sp-2);
+  padding: var(--sp-1) 0;
+  font-size: var(--text-xs);
+  font-family: 'JetBrains Mono', monospace;
+  color: var(--text-secondary);
+}
+/* ============================================================
+   Status Dot
+   ============================================================ */
+.dot {
+  width: 8px;
+  height: 8px;
+  border-radius: 50%;
+  flex-shrink: 0;
+}
+.dot-live {
+  background: var(--accent-green);
+  box-shadow: 0 0 8px var(--accent-green);
+  animation: pulse 2s infinite;
+}
+.dot-warning {
+  background: var(--accent-amber);
+  box-shadow: 0 0 8px var(--accent-amber);
+}
+.dot-error {
+  background: var(--accent-red);
+  box-shadow: 0 0 8px var(--accent-red);
+}
+.dot-muted {
+  background: var(--text-muted);
+}
+@keyframes pulse {
+  0%, 100% { opacity: 1; transform: scale(1); }
+  50%       { opacity: 0.7; transform: scale(0.9); }
+}
+/* ============================================================
+   Metric Cards
+   ============================================================ */
+.metric-card {
+  background: var(--bg-surface);
+  border: 1px solid var(--bg-border);
+  border-radius: var(--radius-lg);
+  padding: var(--sp-5) var(--sp-6);
+  position: relative;
+  overflow: hidden;
+  transition: border-color var(--transition-normal), box-shadow var(--transition-normal);
+}
+.metric-card:hover {
+  border-color: rgba(0, 229, 255, 0.3);
+  box-shadow: 0 0 20px rgba(0, 229, 255, 0.05);
+}
+.metric-card::before {
+  content: '';
+  position: absolute;
+  top: 0; left: 0; right: 0;
+  height: 2px;
+}
+.metric-card.cyan::before   { background: var(--accent-cyan); }
+.metric-card.green::before  { background: var(--accent-green); }
+.metric-card.amber::before  { background: var(--accent-amber); }
+.metric-card.purple::before { background: var(--accent-purple); }
+.metric-card-label {
+  font-size: var(--text-xs);
+  font-weight: 600;
+  text-transform: uppercase;
+  letter-spacing: 0.1em;
+  color: var(--text-secondary);
+  margin-bottom: var(--sp-2);
+}
+.metric-card-value {
+  font-family: 'JetBrains Mono', monospace;
+  font-size: var(--text-2xl);
+  font-weight: 700;
+  color: var(--text-primary);
+  line-height: 1;
+  margin-bottom: var(--sp-2);
+}
+.metric-card-delta {
+  font-size: var(--text-xs);
+  font-weight: 600;
+  display: flex;
+  align-items: center;
+  gap: 4px;
+}
+.delta-up   { color: var(--accent-green); }
+.delta-down { color: var(--accent-red); }
+.delta-neutral { color: var(--text-secondary); }
+/* ============================================================
+   Buttons
+   ============================================================ */
+.btn {
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+  gap: var(--sp-2);
+  padding: var(--sp-2) var(--sp-5);
+  border-radius: var(--radius-md);
+  font-family: 'JetBrains Mono', monospace;
+  font-size: var(--text-sm);
+  font-weight: 600;
+  letter-spacing: 0.05em;
+  cursor: pointer;
+  border: none;
+  transition: all var(--transition-fast);
+  text-transform: uppercase;
+  white-space: nowrap;
+}
+.btn:disabled {
+  opacity: 0.4;
+  cursor: not-allowed;
+}
+.btn-primary {
+  background: var(--accent-cyan);
+  color: #000;
+}
+.btn-primary:hover:not(:disabled) {
+  background: #33EAFF;
+  box-shadow: 0 0 20px rgba(0, 229, 255, 0.4);
+  transform: scale(1.02);
+}
+.btn-ghost {
+  background: transparent;
+  color: var(--text-secondary);
+  border: 1px solid var(--bg-border);
+}
+.btn-ghost:hover:not(:disabled) {
+  border-color: var(--accent-cyan);
+  color: var(--accent-cyan);
+}
+.btn-danger {
+  background: rgba(255, 61, 87, 0.1);
+  color: var(--accent-red);
+  border: 1px solid rgba(255, 61, 87, 0.3);
+}
+.btn-danger:hover:not(:disabled) {
+  background: var(--accent-red);
+  color: #fff;
+}
+.btn-sm {
+  padding: 6px 12px;
+  font-size: var(--text-xs);
+}
+.btn-lg {
+  padding: var(--sp-3) var(--sp-8);
+  font-size: var(--text-base);
+}
+/* ============================================================
+   Budget Pills
+   ============================================================ */
+.budget-pills {
+  display: flex;
+  gap: var(--sp-2);
+}
+.budget-pill {
+  padding: 6px 14px;
+  border-radius: var(--radius-full);
+  font-family: 'JetBrains Mono', monospace;
+  font-size: var(--text-xs);
+  font-weight: 700;
+  letter-spacing: 0.08em;
+  cursor: pointer;
+  border: 1px solid var(--bg-border);
+  background: var(--bg-elevated);
+  color: var(--text-secondary);
+  transition: all var(--transition-fast);
+  text-transform: uppercase;
+}
+.budget-pill:hover {
+  border-color: var(--text-secondary);
+  color: var(--text-primary);
+}
+.budget-pill.cheap.active   { background: rgba(0, 255, 148, 0.15); border-color: var(--accent-green);  color: var(--accent-green);  }
+.budget-pill.balanced.active { background: rgba(255, 179, 0, 0.15);  border-color: var(--accent-amber);  color: var(--accent-amber);  }
+.budget-pill.quality.active  { background: rgba(0, 229, 255, 0.15); border-color: var(--accent-cyan);   color: var(--accent-cyan);   }
+/* ============================================================
+   Form elements
+   ============================================================ */
+.input-group {
+  display: flex;
+  flex-direction: column;
+  gap: var(--sp-2);
+}
+.input-label {
+  font-size: var(--text-xs);
+  font-weight: 600;
+  text-transform: uppercase;
+  letter-spacing: 0.08em;
+  color: var(--text-secondary);
+}
+input[type="text"],
+input[type="email"],
+input[type="password"],
+input[type="url"],
+input[type="number"],
+textarea,
+select {
+  width: 100%;
+  background: var(--bg-elevated);
+  border: 1px solid var(--bg-border);
+  border-radius: var(--radius-md);
+  color: var(--text-primary);
+  font-family: 'JetBrains Mono', monospace;
+  font-size: var(--text-sm);
+  padding: var(--sp-3) var(--sp-4);
+  transition: border-color var(--transition-fast), box-shadow var(--transition-fast);
+  outline: none;
+  resize: vertical;
+}
+input[type="text"]:focus,
+input[type="email"]:focus,
+input[type="password"]:focus,
+input[type="url"]:focus,
+input[type="number"]:focus,
+textarea:focus,
+select:focus {
+  border-color: var(--accent-cyan);
+  box-shadow: 0 0 0 3px rgba(0, 229, 255, 0.1);
+}
+input::placeholder,
+textarea::placeholder {
+  color: var(--text-muted);
+}
+select {
+  cursor: pointer;
+  appearance: none;
+  background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' fill='none' viewBox='0 0 24 24' stroke='%237A8299'%3E%3Cpath stroke-linecap='round' stroke-linejoin='round' stroke-width='2' d='M19 9l-7 7-7-7'/%3E%3C/svg%3E");
+  background-repeat: no-repeat;
+  background-position: right 12px center;
+  background-size: 16px;
+  padding-right: 40px;
+}
+/* ============================================================
+   Toggle Switch
+   ============================================================ */
+.toggle-switch {
+  position: relative;
+  display: inline-block;
+  width: 44px;
+  height: 24px;
+  flex-shrink: 0;
+}
+.toggle-switch input {
+  opacity: 0;
+  width: 0;
+  height: 0;
+}
+.toggle-slider {
+  position: absolute;
+  cursor: pointer;
+  top: 0; left: 0; right: 0; bottom: 0;
+  background: var(--bg-border);
+  border-radius: var(--radius-full);
+  transition: var(--transition-fast);
+}
+.toggle-slider::before {
+  content: '';
+  position: absolute;
+  height: 18px;
+  width: 18px;
+  left: 3px;
+  bottom: 3px;
+  background: var(--text-secondary);
+  border-radius: 50%;
+  transition: var(--transition-fast);
+}
+.toggle-switch input:checked + .toggle-slider {
+  background: rgba(0, 229, 255, 0.2);
+  border: 1px solid var(--accent-cyan);
+}
+.toggle-switch input:checked + .toggle-slider::before {
+  transform: translateX(20px);
+  background: var(--accent-cyan);
+}
+/* ============================================================
+   Range Slider
+   ============================================================ */
+input[type="range"] {
+  -webkit-appearance: none;
+  appearance: none;
+  width: 100%;
+  height: 4px;
+  background: var(--bg-border);
+  border-radius: 2px;
+  outline: none;
+  padding: 0;
+}
+input[type="range"]::-webkit-slider-thumb {
+  -webkit-appearance: none;
+  appearance: none;
+  width: 16px;
+  height: 16px;
+  border-radius: 50%;
+  background: var(--accent-cyan);
+  cursor: pointer;
+  box-shadow: 0 0 8px rgba(0, 229, 255, 0.5);
+}
+input[type="range"]::-moz-range-thumb {
+  width: 16px;
+  height: 16px;
+  border-radius: 50%;
+  background: var(--accent-cyan);
+  cursor: pointer;
+  border: none;
+}
+/* ============================================================
+   Pipeline Visualizer
+   ============================================================ */
+.pipeline-wrapper {
+  background: var(--bg-surface);
+  border: 1px solid var(--bg-border);
+  border-radius: var(--radius-lg);
+  padding: var(--sp-5);
+  overflow-x: auto;
+}
+.pipeline-stages {
+  display: flex;
+  align-items: center;
+  gap: 0;
+  min-width: max-content;
+  padding: var(--sp-2) 0;
+}
+.pipeline-stage {
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  gap: var(--sp-2);
+  min-width: 90px;
+}
+.pipeline-node {
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  justify-content: center;
+  gap: 4px;
+  width: 80px;
+  height: 64px;
+  border-radius: var(--radius-md);
+  border: 1px solid var(--bg-border);
+  background: var(--bg-elevated);
+  color: var(--text-muted);
+  font-size: var(--text-xs);
+  font-family: 'JetBrains Mono', monospace;
+  font-weight: 600;
+  text-transform: uppercase;
+  letter-spacing: 0.05em;
+  transition: all var(--transition-normal);
+  position: relative;
+  cursor: default;
+  text-align: center;
+}
+.pipeline-node-icon {
+  font-size: 18px;
+  line-height: 1;
+}
+.pipeline-node.active {
+  border-color: var(--accent-cyan);
+  box-shadow: 0 0 20px rgba(0, 229, 255, 0.3);
+  color: var(--accent-cyan);
+  animation: nodePulse 1s ease-in-out infinite;
+}
+.pipeline-node.complete {
+  border-color: var(--accent-green);
+  background: rgba(0, 255, 148, 0.08);
+  color: var(--accent-green);
+}
+.pipeline-node.skipped {
+  border-color: var(--accent-amber);
+  background: rgba(255, 179, 0, 0.08);
+  color: var(--accent-amber);
+}
+.pipeline-node.error {
+  border-color: var(--accent-red);
+  background: rgba(255, 61, 87, 0.08);
+  color: var(--accent-red);
+}
+.pipeline-latency {
+  font-size: var(--text-xs);
+  font-family: 'JetBrains Mono', monospace;
+  color: var(--text-muted);
+  min-height: 16px;
+}
+.pipeline-latency.visible {
+  color: var(--accent-green);
+}
+.pipeline-connector {
+  flex: 1;
+  height: 2px;
+  background: var(--bg-border);
+  position: relative;
+  min-width: 20px;
+  overflow: hidden;
+}
+.pipeline-connector-fill {
+  position: absolute;
+  left: 0; top: 0; bottom: 0;
+  background: var(--accent-cyan);
+  transition: width 0.3s ease;
+  box-shadow: 0 0 6px var(--accent-cyan);
+}
+@keyframes nodePulse {
+  0%, 100% { box-shadow: 0 0 10px rgba(0, 229, 255, 0.3); }
+  50%       { box-shadow: 0 0 25px rgba(0, 229, 255, 0.6); }
+}
+/* ============================================================
+   Query Input Area
+   ============================================================ */
+.query-box-wrapper {
+  background: var(--bg-surface);
+  border: 1px solid var(--bg-border);
+  border-radius: var(--radius-lg);
+  overflow: hidden;
+  transition: border-color var(--transition-fast), box-shadow var(--transition-fast);
+}
+.query-box-wrapper:focus-within {
+  border-color: var(--accent-cyan);
+  box-shadow: 0 0 0 3px rgba(0, 229, 255, 0.1);
+}
+.query-textarea {
+  width: 100%;
+  background: transparent;
+  border: none;
+  padding: var(--sp-5);
+  color: var(--text-primary);
+  font-family: 'JetBrains Mono', monospace;
+  font-size: var(--text-base);
+  resize: none;
+  min-height: 140px;
+  outline: none;
+  line-height: 1.7;
+}
+.query-toolbar {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  padding: var(--sp-3) var(--sp-4);
+  border-top: 1px solid var(--bg-border);
+  background: var(--bg-elevated);
+}
+.query-toolbar-left {
+  display: flex;
+  align-items: center;
+  gap: var(--sp-4);
+}
+.query-token-count {
+  font-family: 'JetBrains Mono', monospace;
+  font-size: var(--text-xs);
+  color: var(--text-muted);
+}
+/* ============================================================
+   Response Panel
+   ============================================================ */
+.response-panel {
+  background: var(--bg-surface);
+  border: 1px solid var(--bg-border);
+  border-radius: var(--radius-lg);
+  overflow: hidden;
+}
+.response-header {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  padding: var(--sp-3) var(--sp-5);
+  border-bottom: 1px solid var(--bg-border);
+  background: var(--bg-elevated);
+}
+.response-meta {
+  display: flex;
+  align-items: center;
+  gap: var(--sp-4);
+}
+.response-meta-item {
+  font-family: 'JetBrains Mono', monospace;
+  font-size: var(--text-xs);
+  color: var(--text-secondary);
+}
+.response-meta-item span {
+  color: var(--accent-cyan);
+}
+.response-body {
+  padding: var(--sp-5);
+  font-size: var(--text-base);
+  line-height: 1.8;
+  max-height: 500px;
+  overflow-y: auto;
+}
+.response-body p { margin-bottom: var(--sp-4); }
+.response-body code {
+  background: var(--bg-elevated);
+  padding: 2px 6px;
+  border-radius: var(--radius-sm);
+  font-family: 'Fira Code', monospace;
+  font-size: 0.9em;
+  color: var(--accent-cyan);
+}
+.response-body pre {
+  background: var(--bg-elevated);
+  border: 1px solid var(--bg-border);
+  border-radius: var(--radius-md);
+  padding: var(--sp-4);
+  overflow-x: auto;
+  margin-bottom: var(--sp-4);
+}
+.response-body pre code {
+  background: transparent;
+  padding: 0;
+  color: var(--text-primary);
+}
+/* ============================================================
+   Explain Card
+   ============================================================ */
+.explain-card {
+  background: var(--bg-elevated);
+  border: 1px solid var(--bg-border);
+  border-radius: var(--radius-md);
+  overflow: hidden;
+}
+.explain-card-header {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  padding: var(--sp-3) var(--sp-4);
+  border-bottom: 1px solid var(--bg-border);
+  cursor: pointer;
+  user-select: none;
+}
+.explain-card-header:hover {
+  background: rgba(0, 229, 255, 0.04);
+}
+.explain-title {
+  font-family: 'JetBrains Mono', monospace;
+  font-size: var(--text-xs);
+  font-weight: 700;
+  text-transform: uppercase;
+  letter-spacing: 0.1em;
+  color: var(--accent-cyan);
+}
+.explain-body {
+  padding: var(--sp-4);
+  font-family: 'JetBrains Mono', monospace;
+  font-size: var(--text-xs);
+  line-height: 2;
+  color: var(--text-secondary);
+}
+.explain-line::before {
+  content: '> ';
+  color: var(--accent-cyan);
+}
+.explain-line {
+  display: block;
+}
+.explain-highlight {
+  color: var(--accent-green);
+}
+/* ============================================================
+   Tables
+   ============================================================ */
+.data-table {
+  width: 100%;
+  border-collapse: collapse;
+  font-size: var(--text-sm);
+}
+.data-table th {
+  padding: var(--sp-3) var(--sp-4);
+  text-align: left;
+  font-size: var(--text-xs);
+  font-weight: 600;
+  text-transform: uppercase;
+  letter-spacing: 0.08em;
+  color: var(--text-secondary);
+  border-bottom: 1px solid var(--bg-border);
+  white-space: nowrap;
+}
+.data-table td {
+  padding: var(--sp-3) var(--sp-4);
+  border-bottom: 1px solid rgba(37, 42, 56, 0.5);
+  vertical-align: middle;
+}
+.data-table tbody tr:nth-child(odd)  { background: var(--bg-surface); }
+.data-table tbody tr:nth-child(even) { background: var(--bg-elevated); }
+.data-table tbody tr:hover { background: rgba(0, 229, 255, 0.04); cursor: pointer; }
+.complexity-bar {
+  display: flex;
+  align-items: center;
+  gap: var(--sp-2);
+}
+.complexity-bar-track {
+  width: 60px;
+  height: 4px;
+  background: var(--bg-border);
+  border-radius: 2px;
+  overflow: hidden;
+  flex-shrink: 0;
+}
+.complexity-bar-fill {
+  height: 100%;
+  border-radius: 2px;
+  transition: width var(--transition-slow);
+}
+/* ============================================================
+   Badge / Pill
+   ============================================================ */
+.badge {
+  display: inline-flex;
+  align-items: center;
+  padding: 2px 8px;
+  border-radius: var(--radius-full);
+  font-size: var(--text-xs);
+  font-weight: 600;
+  font-family: 'JetBrains Mono', monospace;
+  letter-spacing: 0.04em;
+  text-transform: uppercase;
+}
+.badge-cyan   { background: rgba(0, 229, 255, 0.12);  color: var(--accent-cyan);   border: 1px solid rgba(0, 229, 255, 0.3);  }
+.badge-green  { background: rgba(0, 255, 148, 0.12);  color: var(--accent-green);  border: 1px solid rgba(0, 255, 148, 0.3);  }
+.badge-amber  { background: rgba(255, 179, 0, 0.12);  color: var(--accent-amber);  border: 1px solid rgba(255, 179, 0, 0.3);  }
+.badge-red    { background: rgba(255, 61, 87, 0.12);  color: var(--accent-red);    border: 1px solid rgba(255, 61, 87, 0.3);  }
+.badge-purple { background: rgba(124, 77, 255, 0.12); color: var(--accent-purple); border: 1px solid rgba(124, 77, 255, 0.3); }
+.badge-muted  { background: rgba(61, 67, 87, 0.3);   color: var(--text-secondary); border: 1px solid var(--bg-border); }
+/* ============================================================
+   Cards
+   ============================================================ */
+.card {
+  background: var(--bg-surface);
+  border: 1px solid var(--bg-border);
+  border-radius: var(--radius-lg);
+  overflow: hidden;
+  transition: border-color var(--transition-normal), box-shadow var(--transition-normal);
+  flex-shrink: 0;
+}
+.card:hover {
+  border-color: rgba(0, 229, 255, 0.2);
+  box-shadow: 0 4px 20px rgba(0, 0, 0, 0.3);
+}
+.card-header {
+  padding: var(--sp-4) var(--sp-5);
+  border-bottom: 1px solid var(--bg-border);
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+}
+.card-title {
+  font-family: 'JetBrains Mono', monospace;
+  font-size: var(--text-base);
+  font-weight: 700;
+  color: var(--text-primary);
+}
+.card-subtitle {
+  font-size: var(--text-xs);
+  color: var(--text-secondary);
+  margin-top: 2px;
+}
+.card-body {
+  padding: var(--sp-5);
+}
+/* ============================================================
+   Grid layouts
+   ============================================================ */
+.grid-2 { display: grid; grid-template-columns: repeat(2, 1fr); gap: var(--sp-4); }
+.grid-3 { display: grid; grid-template-columns: repeat(3, 1fr); gap: var(--sp-4); }
+.grid-4 { display: grid; grid-template-columns: repeat(4, 1fr); gap: var(--sp-4); }
+@media (max-width: 1280px) {
+  .grid-4 { grid-template-columns: repeat(2, 1fr); }
+}
+@media (max-width: 1024px) {
+  /* Settings responsive overrides - stacked at 1024px to prevent overlapping when sidebar is open */
+  .settings-row {
+    flex-direction: column;
+    align-items: stretch;
+    gap: var(--sp-3);
+  }
+  .settings-key-input-wrapper {
+    width: 100%;
+    max-width: none;
+  }
+}
+@media (max-width: 768px) {
+  .grid-2,
+  .grid-3,
+  .grid-4 { grid-template-columns: 1fr; }
+  .main-content { margin-left: var(--sidebar-w-collapsed) !important; }
+  .sidebar { width: var(--sidebar-w-collapsed) !important; }
+  .sidebar-logo-text,
+  .sidebar-nav-item span,
+  .sidebar-section-label,
+  .sidebar-status { display: none; }
+  .page-content { padding: var(--sp-4); }
+}
+/* ============================================================
+   Page headings
+   ============================================================ */
+.page-header {
+  margin-bottom: var(--sp-6);
+}
+.page-title {
+  font-family: 'JetBrains Mono', monospace;
+  font-size: var(--text-xl);
+  font-weight: 700;
+  color: var(--text-primary);
+  margin-bottom: var(--sp-1);
+}
+.page-subtitle {
+  font-size: var(--text-sm);
+  color: var(--text-secondary);
+}
+/* ============================================================
+   Section
+   ============================================================ */
+.section {
+  margin-bottom: var(--sp-6);
+}
+.section-title {
+  font-family: 'JetBrains Mono', monospace;
+  font-size: var(--text-sm);
+  font-weight: 700;
+  text-transform: uppercase;
+  letter-spacing: 0.1em;
+  color: var(--text-secondary);
+  margin-bottom: var(--sp-4);
+  display: flex;
+  align-items: center;
+  gap: var(--sp-3);
+}
+.section-title::after {
+  content: '';
+  flex: 1;
+  height: 1px;
+  background: var(--bg-border);
+}
+/* ============================================================
+   Auth / Login page
+   ============================================================ */
+.auth-page {
+  min-height: 100vh;
+  background: var(--bg-base);
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  position: relative;
+  overflow: hidden;
+}
+.auth-bg-grid {
+  position: absolute;
+  inset: 0;
+  background-image:
+    linear-gradient(rgba(0, 229, 255, 0.03) 1px, transparent 1px),
+    linear-gradient(90deg, rgba(0, 229, 255, 0.03) 1px, transparent 1px);
+  background-size: 40px 40px;
+  pointer-events: none;
+}
+.auth-bg-glow {
+  position: absolute;
+  width: 600px;
+  height: 600px;
+  border-radius: 50%;
+  background: radial-gradient(circle, rgba(0, 229, 255, 0.06) 0%, transparent 70%);
+  top: -100px;
+  left: -100px;
+  pointer-events: none;
+}
+.auth-bg-glow-2 {
+  position: absolute;
+  width: 400px;
+  height: 400px;
+  border-radius: 50%;
+  background: radial-gradient(circle, rgba(124, 77, 255, 0.06) 0%, transparent 70%);
+  bottom: -50px;
+  right: -50px;
+  pointer-events: none;
+}
+.auth-card {
+  background: var(--bg-surface);
+  border: 1px solid var(--bg-border);
+  border-radius: var(--radius-xl);
+  padding: var(--sp-10);
+  width: 100%;
+  max-width: 440px;
+  position: relative;
+  z-index: 1;
+}
+.auth-logo {
+  display: flex;
+  align-items: center;
+  gap: var(--sp-3);
+  margin-bottom: var(--sp-8);
+}
+.auth-logo-icon {
+  color: var(--accent-cyan);
+  font-size: 28px;
+}
+.auth-logo-text {
+  font-family: 'JetBrains Mono', monospace;
+  font-size: var(--text-xl);
+  font-weight: 700;
+}
+.auth-logo-text span {
+  color: var(--accent-cyan);
+}
+.auth-title {
+  font-family: 'JetBrains Mono', monospace;
+  font-size: var(--text-lg);
+  font-weight: 700;
+  margin-bottom: var(--sp-1);
+}
+.auth-subtitle {
+  font-size: var(--text-sm);
+  color: var(--text-secondary);
+  margin-bottom: var(--sp-8);
+}
+.auth-form {
+  display: flex;
+  flex-direction: column;
+  gap: var(--sp-4);
+}
+.auth-divider {
+  display: flex;
+  align-items: center;
+  gap: var(--sp-3);
+  margin: var(--sp-5) 0;
+}
+.auth-divider-line {
+  flex: 1;
+  height: 1px;
+  background: var(--bg-border);
+}
+.auth-divider-text {
+  font-size: var(--text-xs);
+  color: var(--text-muted);
+  text-transform: uppercase;
+  letter-spacing: 0.08em;
+}
+.oauth-btn {
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  gap: var(--sp-3);
+  padding: var(--sp-3);
+  border: 1px solid var(--bg-border);
+  border-radius: var(--radius-md);
+  background: var(--bg-elevated);
+  color: var(--text-primary);
+  font-size: var(--text-sm);
+  font-weight: 500;
+  cursor: pointer;
+  transition: all var(--transition-fast);
+  text-decoration: none;
+  width: 100%;
+}
+.oauth-btn:hover {
+  border-color: var(--text-muted);
+  background: rgba(255, 255, 255, 0.04);
+}
+.auth-footer {
+  margin-top: var(--sp-6);
+  text-align: center;
+  font-size: var(--text-sm);
+  color: var(--text-secondary);
+}
+.auth-footer a, .auth-link {
+  color: var(--accent-cyan);
+  cursor: pointer;
+  text-decoration: none;
+}
+.auth-footer a:hover, .auth-link:hover {
+  text-decoration: underline;
+}
+.auth-error {
+  padding: var(--sp-3) var(--sp-4);
+  background: rgba(255, 61, 87, 0.1);
+  border: 1px solid rgba(255, 61, 87, 0.3);
+  border-radius: var(--radius-md);
+  color: var(--accent-red);
+  font-size: var(--text-sm);
+}
+/* ============================================================
+   Model Registry Card
+   ============================================================ */
+.model-card {
+  background: var(--bg-surface);
+  border: 1px solid var(--bg-border);
+  border-radius: var(--radius-lg);
+  padding: var(--sp-5);
+  display: flex;
+  flex-direction: column;
+  gap: var(--sp-4);
+  transition: border-color var(--transition-normal), box-shadow var(--transition-normal);
+}
+.model-card:hover {
+  border-color: rgba(0, 229, 255, 0.3);
+  box-shadow: 0 0 20px rgba(0, 229, 255, 0.06);
+}
+.model-card-name {
+  font-family: 'JetBrains Mono', monospace;
+  font-size: var(--text-base);
+  font-weight: 700;
+  color: var(--text-primary);
+}
+.model-card-pricing {
+  display: flex;
+  gap: var(--sp-6);
+}
+.model-card-price-item {
+  display: flex;
+  flex-direction: column;
+  gap: 2px;
+}
+.model-card-price-label {
+  font-size: var(--text-xs);
+  color: var(--text-muted);
+  text-transform: uppercase;
+  letter-spacing: 0.08em;
+}
+.model-card-price-value {
+  font-family: 'JetBrains Mono', monospace;
+  font-size: var(--text-sm);
+  font-weight: 600;
+  color: var(--accent-green);
+}
+.capability-gauge {
+  position: relative;
+  width: 60px;
+  height: 60px;
+  flex-shrink: 0;
+}
+/* ============================================================
+   Settings
+   ============================================================ */
+.settings-section {
+  background: var(--bg-surface);
+  border: 1px solid var(--bg-border);
+  border-radius: var(--radius-lg);
+  overflow: hidden;
+  margin-bottom: var(--sp-5);
+  flex-shrink: 0;
+}
+.settings-section-header {
+  padding: var(--sp-4) var(--sp-5);
+  border-bottom: 1px solid var(--bg-border);
+  background: var(--bg-elevated);
+}
+.settings-section-title {
+  font-family: 'JetBrains Mono', monospace;
+  font-size: var(--text-sm);
+  font-weight: 700;
+  text-transform: uppercase;
+  letter-spacing: 0.1em;
+  color: var(--text-primary);
+}
+.settings-section-desc {
+  font-size: var(--text-xs);
+  color: var(--text-secondary);
+  margin-top: 2px;
+}
+.settings-row {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  gap: var(--sp-4);
+  padding: var(--sp-4) var(--sp-5);
+  border-bottom: 1px solid rgba(37, 42, 56, 0.5);
+}
+.settings-row:last-child {
+  border-bottom: none;
+}
+.settings-row-info {
+  flex: 1;
+}
+.settings-row-label {
+  font-size: var(--text-sm);
+  font-weight: 500;
+  color: var(--text-primary);
+  margin-bottom: 2px;
+}
+.settings-row-desc {
+  font-size: var(--text-xs);
+  color: var(--text-secondary);
+}
+.settings-key-input-wrapper {
+  position: relative;
+  display: flex;
+  gap: var(--sp-2);
+  align-items: center;
+  width: 100%;
+  max-width: 450px;
+}
+/* ============================================================
+   Loading spinner
+   ============================================================ */
+.spinner {
+  width: 16px;
+  height: 16px;
+  border: 2px solid var(--bg-border);
+  border-top-color: var(--accent-cyan);
+  border-radius: 50%;
+  animation: spin 0.7s linear infinite;
+}
+@keyframes spin {
+  to { transform: rotate(360deg); }
+}
+/* ============================================================
+   Empty states
+   ============================================================ */
+.empty-state {
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  justify-content: center;
+  padding: var(--sp-12) var(--sp-8);
+  text-align: center;
+  color: var(--text-secondary);
+  gap: var(--sp-4);
+}
+.empty-state-icon {
+  font-size: 48px;
+  opacity: 0.3;
+}
+.empty-state-title {
+  font-family: 'JetBrains Mono', monospace;
+  font-size: var(--text-lg);
+  font-weight: 700;
+  color: var(--text-muted);
+}
+.empty-state-desc {
+  font-size: var(--text-sm);
+  max-width: 320px;
+}
+/* ============================================================
+   Topbar / Header strip
+   ============================================================ */
+.topbar {
+  height: 52px;
+  background: var(--bg-surface);
+  border-bottom: 1px solid var(--bg-border);
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  padding: 0 var(--sp-6);
+  flex-shrink: 0;
+}
+.topbar-breadcrumb {
+  font-family: 'JetBrains Mono', monospace;
+  font-size: var(--text-xs);
+  color: var(--text-secondary);
+  display: flex;
+  align-items: center;
+  gap: var(--sp-2);
+}
+.topbar-breadcrumb strong {
+  color: var(--text-primary);
+  font-weight: 700;
+}
+.topbar-actions {
+  display: flex;
+  align-items: center;
+  gap: var(--sp-3);
+}
+.topbar-health {
+  display: flex;
+  align-items: center;
+  gap: var(--sp-2);
+  font-family: 'JetBrains Mono', monospace;
+  font-size: var(--text-xs);
+  color: var(--text-secondary);
+}
+/* ============================================================
+   Tooltip
+   ============================================================ */
+[data-tooltip] {
+  position: relative;
+}
+[data-tooltip]::after {
+  content: attr(data-tooltip);
+  position: absolute;
+  bottom: calc(100% + 8px);
+  left: 50%;
+  transform: translateX(-50%);
+  background: var(--bg-elevated);
+  border: 1px solid var(--bg-border);
+  border-radius: var(--radius-sm);
+  padding: 4px 8px;
+  font-size: var(--text-xs);
+  white-space: nowrap;
+  pointer-events: none;
+  opacity: 0;
+  transition: opacity var(--transition-fast);
+  z-index: 100;
+}
+[data-tooltip]:hover::after {
+  opacity: 1;
+}
+/* ============================================================
+   Misc utilities
+   ============================================================ */
+.flex { display: flex; }
+.flex-col { display: flex; flex-direction: column; }
+.items-center { align-items: center; }
+.justify-between { justify-content: space-between; }
+.gap-2 { gap: var(--sp-2); }
+.gap-3 { gap: var(--sp-3); }
+.gap-4 { gap: var(--sp-4); }
+.gap-6 { gap: var(--sp-6); }
+.flex-1 { flex: 1; }
+.w-full { width: 100%; }
+.text-cyan   { color: var(--accent-cyan); }
+.text-green  { color: var(--accent-green); }
+.text-amber  { color: var(--accent-amber); }
+.text-red    { color: var(--accent-red); }
+.text-muted  { color: var(--text-muted); }
+.text-secondary { color: var(--text-secondary); }
+.font-mono { font-family: 'JetBrains Mono', monospace; }
+.font-sm   { font-size: var(--text-sm); }
+.font-xs   { font-size: var(--text-xs); }
+.truncate  { overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
+.mb-4 { margin-bottom: var(--sp-4); }
+.mb-6 { margin-bottom: var(--sp-6); }
+.mt-4 { margin-top: var(--sp-4); }
+/* ============================================================
+   Badge Variants
+   ============================================================ */
+.badge-red {
+  color: var(--accent-red);
+  border-color: rgba(255, 61, 87, 0.4);
+  background: rgba(255, 61, 87, 0.08);
+}
+/* ============================================================
+   Pipeline Visualizer — Vertical Layout (sidebar)
+   ============================================================ */
+.pipeline-wrapper {
+  display: flex;
+  flex-direction: column;
+  gap: 0;
+  position: relative;
+  padding: var(--sp-3) 0;
+}
+.pipeline-stage {
+  display: flex;
+  align-items: flex-start;
+  gap: var(--sp-3);
+  padding: var(--sp-3) var(--sp-4);
+  position: relative;
+  transition: background var(--transition-fast);
+  border-radius: var(--radius-md);
+}
+.pipeline-stage.active {
+  background: rgba(0, 229, 255, 0.05);
+}
+.pipeline-stage.complete .pipeline-stage-label {
+  color: var(--accent-green);
+}
+.pipeline-stage.error .pipeline-stage-label {
+  color: var(--accent-red);
+}
+.pipeline-stage-icon {
+  width: 24px;
+  height: 24px;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  flex-shrink: 0;
+  font-size: 14px;
+}
+.pipeline-stage-body {
+  flex: 1;
+  min-width: 0;
+}
+.pipeline-stage-label {
+  font-size: var(--text-xs);
+  font-weight: 600;
+  font-family: 'JetBrains Mono', monospace;
+  color: var(--text-secondary);
+  transition: color var(--transition-fast);
+  margin-bottom: 3px;
+}
+.pipeline-stage.active .pipeline-stage-label {
+  color: var(--accent-cyan);
+}
+.pipeline-stage-detail {
+  font-size: 10px;
+  color: var(--text-muted);
+  line-height: 1.4;
+  word-break: break-word;
+}
+.pipeline-stage.complete .pipeline-stage-detail {
+  color: var(--text-secondary);
+}
+.pipeline-connector {
+  position: absolute;
+  left: calc(var(--sp-4) + 11px);
+  top: calc(100% - var(--sp-3));
+  width: 2px;
+  height: var(--sp-3);
+  background: var(--bg-border);
+  z-index: 0;
+}
+/* ============================================================
+   Metrics Bar (Response)
+   ============================================================ */
+.metrics-bar {
+  display: flex;
+  align-items: center;
+  flex-wrap: wrap;
+  gap: 0;
+  background: var(--bg-base);
+  border: 1px solid var(--bg-border);
+  border-radius: var(--radius-md);
+  overflow: hidden;
+  margin-bottom: var(--sp-4);
+}
+.metric-item {
+  flex: 1;
+  min-width: 80px;
+  padding: var(--sp-3) var(--sp-4);
+  text-align: center;
+}
+.metric-label {
+  font-size: 10px;
+  text-transform: uppercase;
+  letter-spacing: 0.08em;
+  color: var(--text-muted);
+  margin-bottom: 4px;
+}
+.metric-value {
+  font-family: 'JetBrains Mono', monospace;
+  font-size: var(--text-xs);
+  font-weight: 700;
+  color: var(--text-primary);
+}
+.metric-divider {
+  width: 1px;
+  height: 40px;
+  background: var(--bg-border);
+  flex-shrink: 0;
+}
+/* ============================================================
+   Response Content (markdown rendering)
+   ============================================================ */
+.response-content {
+  font-size: var(--text-sm);
+  line-height: 1.8;
+  color: var(--text-primary);
+}
+.response-content p { margin-bottom: var(--sp-4); }
+.response-content p:last-child { margin-bottom: 0; }
+.response-content h1,
+.response-content h2,
+.response-content h3 {
+  font-family: 'JetBrains Mono', monospace;
+  color: var(--accent-cyan);
+  margin: var(--sp-5) 0 var(--sp-3);
+  font-size: var(--text-base);
+}
+.response-content code {
+  font-family: 'Fira Code', 'JetBrains Mono', monospace;
+  font-size: 12px;
+  background: var(--bg-base);
+  border: 1px solid var(--bg-border);
+  border-radius: var(--radius-sm);
+  padding: 2px 6px;
+  color: var(--accent-cyan);
+}
+.response-content pre {
+  background: var(--bg-base);
+  border: 1px solid var(--bg-border);
+  border-radius: var(--radius-md);
+  padding: var(--sp-4);
+  overflow-x: auto;
+  margin: var(--sp-4) 0;
+}
+.response-content pre code {
+  background: none;
+  border: none;
+  padding: 0;
+  color: var(--text-primary);
+  font-size: 13px;
+  line-height: 1.6;
+}
+.response-content ul,
+.response-content ol {
+  padding-left: var(--sp-6);
+  margin-bottom: var(--sp-4);
+}
+.response-content li {
+  margin-bottom: var(--sp-2);
+}
+.response-content blockquote {
+  border-left: 3px solid var(--accent-cyan);
+  padding-left: var(--sp-4);
+  color: var(--text-secondary);
+  margin: var(--sp-4) 0;
+}
+.response-content table {
+  width: 100%;
+  border-collapse: collapse;
+  margin: var(--sp-4) 0;
+  font-size: var(--text-xs);
+}
+.response-content th,
+.response-content td {
+  padding: var(--sp-2) var(--sp-3);
+  border: 1px solid var(--bg-border);
+  text-align: left;
+}
+.response-content th {
+  background: var(--bg-elevated);
+  color: var(--accent-cyan);
+  font-weight: 700;
+}
+/* ============================================================
+   Example Prompt Buttons
+   ============================================================ */
+.example-prompt-btn {
+  display: flex;
+  align-items: flex-start;
+  gap: var(--sp-3);
+  padding: var(--sp-3) var(--sp-4);
+  background: var(--bg-elevated);
+  border: 1px solid var(--bg-border);
+  border-radius: var(--radius-md);
+  color: var(--text-secondary);
+  font-size: var(--text-sm);
+  text-align: left;
+  cursor: pointer;
+  transition: all var(--transition-fast);
+  width: 100%;
+  font-family: inherit;
+}
+.example-prompt-btn:hover {
+  border-color: var(--accent-cyan);
+  color: var(--text-primary);
+  background: rgba(0, 229, 255, 0.04);
+}
+.example-prompt-icon {
+  color: var(--accent-cyan);
+  font-weight: 700;
+  flex-shrink: 0;
+}
+/* ============================================================
+   Rationale Card
+   ============================================================ */
+.rationale-card {
+  background: var(--bg-surface);
+  border: 1px solid var(--bg-border);
+  border-radius: var(--radius-lg);
+  overflow: hidden;
+}
+.rationale-card-title {
+  display: flex;
+  align-items: center;
+  gap: var(--sp-2);
+  padding: var(--sp-4) var(--sp-5);
+  border-bottom: 1px solid var(--bg-border);
+  font-size: var(--text-xs);
+  font-weight: 700;
+  font-family: 'JetBrains Mono', monospace;
+  text-transform: uppercase;
+  letter-spacing: 0.08em;
+  color: var(--accent-cyan);
+}
+.rationale-section {
+  padding: var(--sp-4) var(--sp-5);
+  border-bottom: 1px solid var(--bg-border);
+}
+.rationale-section:last-child {
+  border-bottom: none;
+}
+.rationale-label {
+  font-size: 10px;
+  font-weight: 700;
+  text-transform: uppercase;
+  letter-spacing: 0.1em;
+  color: var(--text-muted);
+  margin-bottom: var(--sp-3);
+}
+/* ============================================================
+   Card component
+   ============================================================ */
+.card {
+  background: var(--bg-surface);
+  border: 1px solid var(--bg-border);
+  border-radius: var(--radius-lg);
+  padding: var(--sp-5);
+  transition: border-color var(--transition-normal);
+  flex-shrink: 0;
+}
+.card:hover {
+  border-color: rgba(0, 229, 255, 0.15);
+}
+.card-header {
+  font-family: 'JetBrains Mono', monospace;
+  font-size: var(--text-xs);
+  font-weight: 700;
+  text-transform: uppercase;
+  letter-spacing: 0.08em;
+  color: var(--text-muted);
+  margin-bottom: var(--sp-4);
+}
+/* Grid layout helpers */
+.grid-2 {
+  display: grid;
+  grid-template-columns: repeat(auto-fill, minmax(340px, 1fr));
+  gap: var(--sp-4);
+}
+/* Model card */
+.model-card {
+  background: var(--bg-surface);
+  border: 1px solid var(--bg-border);
+  border-radius: var(--radius-lg);
+  padding: var(--sp-5);
+  display: flex;
+  flex-direction: column;
+  gap: var(--sp-4);
+  transition: border-color var(--transition-normal), box-shadow var(--transition-normal);
+}
+.model-card:hover {
+  border-color: rgba(0, 229, 255, 0.2);
+}
+.model-card-name {
+  font-family: 'JetBrains Mono', monospace;
+  font-size: var(--text-sm);
+  font-weight: 700;
+  color: var(--text-primary);
+  word-break: break-all;
+}
+.model-card-pricing {
+  display: flex;
+  gap: var(--sp-4);
+  padding: var(--sp-3) 0;
+  border-top: 1px solid var(--bg-border);
+  border-bottom: 1px solid var(--bg-border);
+}
+.model-card-price-item {
+  flex: 1;
+  text-align: center;
+}
+.model-card-price-label {
+  font-size: 10px;
+  text-transform: uppercase;
+  letter-spacing: 0.08em;
+  color: var(--text-muted);
+  margin-bottom: 4px;
+}
+.model-card-price-value {
+  font-family: 'JetBrains Mono', monospace;
+  font-size: var(--text-xs);
+  font-weight: 700;
+  color: var(--text-primary);
+}
+/* Prevent browser autofill style overrides */
+input:-webkit-autofill,
+input:-webkit-autofill:hover,
+input:-webkit-autofill:focus,
+textarea:-webkit-autofill,
+textarea:-webkit-autofill:hover,
+textarea:-webkit-autofill:focus,
+select:-webkit-autofill,
+select:-webkit-autofill:hover,
+select:-webkit-autofill:focus {
+  border: 1px solid var(--accent-cyan) !important;
+  -webkit-text-fill-color: var(--text-primary) !important;
+  -webkit-box-shadow: 0 0 0px 1000px var(--bg-elevated) inset !important;
+  transition: background-color 5000s ease-in-out 0s;
+}

frontend/src/types.ts ADDED Viewed

	@@ -0,0 +1,170 @@

+// Global types for LLMOpt frontend — aligned with backend schemas
+export type BudgetMode = 'cheap' | 'balanced' | 'quality';
+export type PipelineStageStatus = 'idle' | 'active' | 'complete' | 'skipped' | 'error';
+export type ComplexityTier = 'trivial' | 'easy' | 'medium' | 'hard' | 'expert';
+export interface PipelineStage {
+  id: string;
+  label: string;
+  icon: string;
+  status: PipelineStageStatus;
+  latencyMs?: number;
+  detail?: string;
+}
+export interface GenerateRequest {
+  query: string;
+  budget_mode: BudgetMode;
+  max_cost_per_request?: number;
+  quality_threshold?: number;
+  exclude_providers?: string[];
+  only_providers?: string[];
+  prefer_local?: boolean;
+  conversation_history?: { role: string; content: string }[];
+  temperature?: number;
+  api_keys?: Record<string, string>;
+  alpha?: number;
+  beta?: number;
+  gamma?: number;
+  compression_enabled?: boolean;
+  evaluate?: boolean;
+}
+export interface GenerateResponse {
+  response: string;
+  model_used: string;
+  provider: string;
+  input_tokens: number;
+  output_tokens: number;
+  total_tokens: number;
+  estimated_cost: number;
+  tokens_saved: number;
+  cost_saved: number;
+  compression_ratio: number;
+  complexity_score: number;
+  complexity_tier: ComplexityTier;
+  latency_ms: number;
+}
+// The /explain endpoint returns nested objects from core.py
+export interface ExplainResponse {
+  query: string;
+  features: {
+    token_count: number;
+    sentence_count: number;
+    primary_domain: string;
+    estimated_output_length: string;
+    domain_code: boolean;
+    domain_math: boolean;
+    domain_science: boolean;
+    domain_reasoning: boolean;
+    domain_creative: boolean;
+    multi_step: boolean;
+    requires_comparison: boolean;
+    requires_generation: boolean;
+    requires_analysis: boolean;
+    requires_debate: boolean;
+    has_math_notation: boolean;
+    has_code_block: boolean;
+    [key: string]: unknown;
+  };
+  complexity: {
+    score: number;
+    tier: ComplexityTier;
+    required_reasoning: number;
+    required_coding: number;
+    required_math: number;
+    rationale: string[];
+    estimated_input_tokens: number;
+    estimated_output_tokens: number;
+    [key: string]: unknown;
+  };
+  optimization: {
+    selected_model: string;
+    provider: string;
+    fallback_model: string | null;
+    compression_enabled: boolean;
+    system_prompt_style: string;
+    estimated_input_tokens: number;
+    estimated_output_tokens: number;
+    estimated_cost: number;
+    rationale: string[];
+    budget_mode: string;
+    [key: string]: unknown;
+  };
+  optimized_prompt: {
+    tokens_before: number;
+    tokens_after: number;
+    tokens_saved: number;
+    compression_ratio: number;
+    [key: string]: unknown;
+  };
+}
+export interface HistoryItem {
+  id: number;
+  query: string;
+  response: string;
+  model_used: string;
+  provider: string;
+  input_tokens: number;
+  output_tokens: number;
+  total_tokens: number;
+  estimated_cost: number;
+  tokens_saved: number;
+  cost_saved: number;
+  latency_ms: number;
+  complexity_score: number;
+  complexity_tier: ComplexityTier;
+  time_ago: string;
+}
+export interface DashboardStats {
+  tokens_saved: string;
+  prompts_improved: number;
+  routing_savings: string;
+  avg_boost: string;
+  distribution: Record<string, number>;
+  recent_decisions: {
+    id: string;
+    time_ago: string;
+    model: string;
+    provider: string;
+    tier: string;
+    score: number;
+    reason: string;
+  }[];
+  recent_optimizations: {
+    name: string;
+    model_used: string;
+    time_ago: string;
+    score: string;
+    tokens_saved: string;
+  }[];
+}
+// ModelSpec from registry — uses model_name not id
+export interface ModelSpec {
+  model_name: string;
+  provider: string;
+  input_cost_per_1k: number;
+  output_cost_per_1k: number;
+  context_window: number;
+  reasoning_score: number;
+  coding_score: number;
+  math_score: number;
+  instruction_following_score: number;
+  latency_score: number;
+  max_complexity: number;
+  capability_score: number;
+  notes: string;
+}
+export interface HealthStatus {
+  redis: 'ok' | 'error' | 'unknown';
+  ml_deps: 'ok' | 'error' | 'unknown';
+  api: 'ok' | 'error';
+}

frontend/src/vite-env.d.ts ADDED Viewed

	@@ -0,0 +1 @@


1	+ /// <reference types="vite/client" />

frontend/tsconfig.app.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "compilerOptions": {
+    "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
+    "target": "ES2020",
+    "useDefineForClassFields": true,
+    "lib": ["ES2020", "DOM", "DOM.Iterable"],
+    "module": "ESNext",
+    "skipLibCheck": true,
+    /* Bundler mode */
+    "moduleResolution": "Bundler",
+    "allowImportingTsExtensions": true,
+    "isolatedModules": true,
+    "moduleDetection": "force",
+    "noEmit": true,
+    "jsx": "react-jsx",
+    /* Linting */
+    "strict": true,
+    "noUnusedLocals": true,
+    "noUnusedParameters": true,
+    "noFallthroughCasesInSwitch": true,
+    "noUncheckedSideEffectImports": true
+  },
+  "include": ["src"]
+}

frontend/tsconfig.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "files": [],
+  "references": [
+    { "path": "./tsconfig.app.json" },
+    { "path": "./tsconfig.node.json" }
+  ]
+}

frontend/tsconfig.node.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "compilerOptions": {
+    "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo",
+    "target": "ES2022",
+    "lib": ["ES2023"],
+    "module": "ESNext",
+    "skipLibCheck": true,
+    /* Bundler mode */
+    "moduleResolution": "Bundler",
+    "allowImportingTsExtensions": true,
+    "isolatedModules": true,
+    "moduleDetection": "force",
+    "noEmit": true,
+    /* Linting */
+    "strict": true,
+    "noUnusedLocals": true,
+    "noUnusedParameters": true,
+    "noFallthroughCasesInSwitch": true,
+    "noUncheckedSideEffectImports": true
+  },
+  "include": ["vite.config.ts"]
+}

frontend/vite.config.ts ADDED Viewed

	@@ -0,0 +1,40 @@

+import { defineConfig } from 'vite'
+import react from '@vitejs/plugin-react'
+import path from 'path'
+// https://vite.dev/config/
+export default defineConfig({
+  plugins: [react()],
+  base: '/ui/',
+  build: {
+    outDir: '../static',
+    emptyOutDir: true,
+    chunkSizeWarningLimit: 1600,
+    rollupOptions: {
+      output: {
+        manualChunks: {
+          vendor: ['react', 'react-dom', 'react-router-dom'],
+          charts: ['recharts'],
+          motion: ['framer-motion'],
+          markdown: ['react-markdown', 'react-syntax-highlighter'],
+        },
+      },
+    },
+  },
+  resolve: {
+    alias: {
+      '@': path.resolve(__dirname, './src'),
+    },
+  },
+  server: {
+    proxy: {
+      '/generate': 'http://localhost:8000',
+      '/explain': 'http://localhost:8000',
+      '/models': 'http://localhost:8000',
+      '/health': 'http://localhost:8000',
+      '/auth': 'http://localhost:8000',
+      '/stream': 'http://localhost:8000',
+    },
+  },
+})

llmopt/analyzer/query_analyzer.py CHANGED Viewed

@@ -161,18 +161,21 @@ class QueryAnalyzer:
             "code", "math", "science", "creative",
             "reasoning", "summarization", "translation", "factual"
         ]
-        try:
-            from transformers import pipeline  # type: ignore
-            logger.info("Loading ML Zero-Shot Classifier for Query Analyzer...")
-            self.ml_classifier = pipeline(
-                "zero-shot-classification",
-                model="cross-encoder/nli-distilroberta-base",
-                device=-1
-            )
-        except ImportError:
-            logger.info("transformers not found, using V1 heuristic Query Analyzer.")
-        except Exception as e:
-            logger.warning(f"Failed to load ML classifier: {e}. Falling back to V1.")
     def analyze(self, query: str) -> QueryFeatures:
         q = query.strip()

             "code", "math", "science", "creative",
             "reasoning", "summarization", "translation", "factual"
         ]
+        import os
+        if os.getenv("USE_ML_ANALYZER", "false").lower() == "true":
+            try:
+                from transformers import pipeline  # type: ignore
+                logger.info("Loading ML Zero-Shot Classifier for Query Analyzer...")
+                self.ml_classifier = pipeline(
+                    "zero-shot-classification",
+                    model="cross-encoder/nli-distilroberta-base",
+                    device=-1,
+                    local_files_only=True
+                )
+            except ImportError:
+                logger.info("transformers not found, using V1 heuristic Query Analyzer.")
+            except Exception as e:
+                logger.warning(f"Failed to load ML classifier: {e}. Falling back to V1.")
     def analyze(self, query: str) -> QueryFeatures:
         q = query.strip()

llmopt/api/app.py CHANGED Viewed

@@ -11,15 +11,31 @@ Endpoints:
 from __future__ import annotations
 import os
 import logging
 from typing import Optional, Dict
-from fastapi import FastAPI, HTTPException
-from fastapi.responses import StreamingResponse
 from pydantic import BaseModel, Field
 from llmopt.core import LLMOpt
 logger = logging.getLogger(__name__)
 # ---------------------------------------------------------------------------
@@ -32,9 +48,39 @@ app = FastAPI(
     version="0.1.0",
 )
-# Single shared client (stateless — safe for concurrent use)
 _client = LLMOpt(log_level=os.getenv("LOG_LEVEL", "WARNING"))
 # ---------------------------------------------------------------------------
 # Request / Response schemas
@@ -57,6 +103,11 @@ class GenerateRequest(BaseModel):
         None,
         description="Optional provider API keys (e.g. {'openai': 'sk-...', 'anthropic': '...' })"
     )
 class GenerateResponse(BaseModel):
@@ -78,20 +129,469 @@ class GenerateResponse(BaseModel):
 class ExplainRequest(BaseModel):
     query: str = Field(..., min_length=1, max_length=32000)
     budget_mode: str = Field("balanced")
 # ---------------------------------------------------------------------------
 # Endpoints
 # ---------------------------------------------------------------------------
-@app.get("/")
-def root():
     return {
-        "message": "LLMOpt V2 API is running!",
-        "docs": "/docs",
-        "health": "/health"
     }
 @app.get("/health")
 def health():
@@ -105,11 +605,24 @@ def list_models():
 @app.post("/generate", response_model=GenerateResponse)
-def generate(req: GenerateRequest):
     """
     Full pipeline: analyze → optimize → route → return response + metrics.
     """
     try:
         result = _client.generate(
             query=req.query,
             budget_mode=req.budget_mode,
@@ -122,31 +635,85 @@ def generate(req: GenerateRequest):
             temperature=req.temperature,
             dry_run=req.dry_run,
             api_keys=req.api_keys,  # Pass BYOK keys
         )
         return GenerateResponse(**result.to_dict())
     except KeyError as e:
         raise HTTPException(status_code=400, detail=f"Model not found: {e}")
     except Exception as e:
         logger.exception("generate() failed")
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/explain")
-def explain(req: ExplainRequest):
     """
     Returns the full routing decision for a query WITHOUT making an LLM API call.
     Useful for debugging, testing, and understanding optimization decisions.
     """
     try:
-        return _client.explain(query=req.query, budget_mode=req.budget_mode)
     except Exception as e:
         logger.exception("explain() failed")
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/stream")
-def stream_generate(req: GenerateRequest):
     """Server-sent stream of response tokens."""
     def token_generator():
         try:
             for chunk in _client.stream(

 from __future__ import annotations
 import os
+from dotenv import load_dotenv
+load_dotenv(os.path.join(os.path.dirname(__file__), "..", "..", "config", ".env"))
 import logging
 from typing import Optional, Dict
+from fastapi import FastAPI, HTTPException, Depends, Request, Response, status
+from fastapi.responses import StreamingResponse, RedirectResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field
 from llmopt.core import LLMOpt
+from llmopt.cache.redis_client import redis_manager
+from llmopt.api.security import (
+    create_session,
+    delete_session,
+    get_session_payload,
+    check_rate_limit,
+    get_session_id_from_request
+)
+from sqlalchemy.orm import Session
+from llmopt.db.session import engine, get_db
+from llmopt.db import models
+from llmopt.api import crud
 logger = logging.getLogger(__name__)
 # ---------------------------------------------------------------------------
     version="0.1.0",
 )
+# Detect dev mode — disable secure cookies on localhost
+IS_DEV = os.getenv("ENVIRONMENT", "development").lower() in ("development", "dev", "local")
+COOKIE_SECURE = not IS_DEV  # True only in production (HTTPS)
+COOKIE_SAMESITE = "lax" if IS_DEV else "none"  # lax works on HTTP localhost
+# Configure CORS — allow localhost in dev, full regex in prod
+if IS_DEV:
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["http://localhost:5173", "http://localhost:8000", "http://127.0.0.1:8000"],
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+else:
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origin_regex=r"https?://.*",
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
 _client = LLMOpt(log_level=os.getenv("LOG_LEVEL", "WARNING"))
+@app.on_event("startup")
+async def startup_event():
+    models.Base.metadata.create_all(bind=engine)
+    await redis_manager.connect()
+@app.on_event("shutdown")
+async def shutdown_event():
+    await redis_manager.close()
 # ---------------------------------------------------------------------------
 # Request / Response schemas
         None,
         description="Optional provider API keys (e.g. {'openai': 'sk-...', 'anthropic': '...' })"
     )
+    alpha: Optional[float] = Field(None, description="Custom cost weight")
+    beta: Optional[float] = Field(None, description="Custom token weight")
+    gamma: Optional[float] = Field(None, description="Custom quality weight")
+    compression_enabled: Optional[bool] = Field(None, description="Force enable/disable prompt compression")
+    evaluate: bool = Field(False, description="Enable LLM-as-judge evaluation")
 class GenerateResponse(BaseModel):
 class ExplainRequest(BaseModel):
     query: str = Field(..., min_length=1, max_length=32000)
     budget_mode: str = Field("balanced")
+    alpha: Optional[float] = Field(None)
+    beta: Optional[float] = Field(None)
+    gamma: Optional[float] = Field(None)
+    compression_enabled: Optional[bool] = Field(None)
+    exclude_providers: list[str] = Field(default_factory=list)
+    only_providers: list[str] = Field(default_factory=list)
+class AuthRequest(BaseModel):
+    api_keys: Dict[str, str] = Field(..., description="Provider API keys")
 # ---------------------------------------------------------------------------
 # Endpoints
 # ---------------------------------------------------------------------------
+@app.post("/auth/register")
+async def register(user: crud.UserCreate, db: Session = Depends(get_db)):
+    db_user = crud.get_user_by_email(db, email=user.email)
+    if db_user:
+        raise HTTPException(status_code=400, detail="Email already registered")
+    crud.create_user(db=db, user=user)
+    return {"message": "User created successfully"}
+@app.post("/auth/login")
+async def login_user(user: crud.UserLogin, response: Response, db: Session = Depends(get_db)):
+    db_user = crud.get_user_by_email(db, email=user.email)
+    if not db_user or not crud.verify_password(user.password, db_user.hashed_password):
+        raise HTTPException(status_code=400, detail="Incorrect email or password")
+    api_keys = {}
+    from llmopt.api.security import decrypt_string
+    user_keys_encrypted = crud.get_user_api_keys(db, db_user.id)
+    if user_keys_encrypted:
+        try:
+            api_keys = {p: decrypt_string(k) for p, k in user_keys_encrypted.items()}
+        except Exception:
+            pass
+    session_id = await create_session(api_keys, user_id=db_user.id)
+    response.set_cookie(
+        key="session_id",
+        value=session_id,
+        httponly=True,
+        secure=COOKIE_SECURE,
+        samesite=COOKIE_SAMESITE,
+        max_age=int(os.getenv("SESSION_TTL", 7200))
+    )
+    return {"message": "Logged in successfully", "session_id": session_id}
+@app.post("/auth/keys")
+async def update_keys(req: AuthRequest, session_id: str = Depends(get_session_id_from_request), db: Session = Depends(get_db)):
+    """
+    Securely store API keys in Redis and the persistent database.
+    """
+    if not req.api_keys:
+        raise HTTPException(status_code=400, detail="No API keys provided.")
+    payload = await get_session_payload(session_id)
+    user_id = payload.get("user_id")
+    # Merge keys with existing ones in Redis session
+    current_keys = payload.get("api_keys", {})
+    updated_keys = {**current_keys, **req.api_keys}
+    # Update redis session in-place
+    payload["api_keys"] = updated_keys
+    from llmopt.api.security import update_session_payload
+    await update_session_payload(session_id, payload)
+    # Save to db if authenticated
+    if user_id:
+        from llmopt.api.security import encrypt_string
+        encrypted_keys = {p: encrypt_string(k) for p, k in req.api_keys.items()}
+        crud.update_user_api_keys(db, user_id, encrypted_keys)
+    return {"message": "Keys updated securely"}
+@app.delete("/auth/keys/{provider}")
+async def delete_key(provider: str, session_id: str = Depends(get_session_id_from_request), db: Session = Depends(get_db)):
+    """
+    Delete an API key for a specific provider.
+    """
+    provider = provider.lower()
+    payload = await get_session_payload(session_id)
+    user_id = payload.get("user_id")
+    current_keys = payload.get("api_keys", {})
+    if provider in current_keys:
+        del current_keys[provider]
+    payload["api_keys"] = current_keys
+    from llmopt.api.security import update_session_payload
+    await update_session_payload(session_id, payload)
+    if user_id:
+        crud.delete_user_api_key(db, user_id, provider)
+    return {"message": f"Key for {provider} deleted successfully"}
+@app.get("/auth/keys")
+async def get_keys(session_id: str = Depends(get_session_id_from_request)):
+    """
+    Get the list of providers that have API keys configured in the current session.
+    """
+    payload = await get_session_payload(session_id)
+    api_keys = payload.get("api_keys", {})
+    connected = [provider for provider, key in api_keys.items() if key]
+    return {"connected_providers": connected}
+# OAuth configuration
+GOOGLE_CLIENT_ID = os.getenv("GOOGLE_CLIENT_ID")
+GOOGLE_CLIENT_SECRET = os.getenv("GOOGLE_CLIENT_SECRET")
+GITHUB_CLIENT_ID = os.getenv("GITHUB_CLIENT_ID")
+GITHUB_CLIENT_SECRET = os.getenv("GITHUB_CLIENT_SECRET")
+REDIRECT_URI_HOST = os.getenv("REDIRECT_URI_HOST", "http://localhost:8000")
+@app.get("/auth/login/google")
+def login_google():
+    if not GOOGLE_CLIENT_ID:
+        raise HTTPException(status_code=400, detail="Google Auth is not configured. Please set GOOGLE_CLIENT_ID env variable.")
+    redirect_uri = f"{REDIRECT_URI_HOST}/auth/callback/google"
+    auth_url = (
+        "https://accounts.google.com/o/oauth2/v2/auth"
+        f"?response_type=code"
+        f"&client_id={GOOGLE_CLIENT_ID}"
+        f"&redirect_uri={redirect_uri}"
+        f"&scope=openid%20email%20profile"
+        f"&state=google_auth_state"
+    )
+    return RedirectResponse(url=auth_url)
+@app.get("/auth/callback/google")
+async def callback_google(code: str, response: Response, db: Session = Depends(get_db)):
+    if not GOOGLE_CLIENT_ID or not GOOGLE_CLIENT_SECRET:
+        raise HTTPException(status_code=400, detail="Google Auth credentials missing.")
+    redirect_uri = f"{REDIRECT_URI_HOST}/auth/callback/google"
+    token_url = "https://oauth2.googleapis.com/token"
+    data = {
+        "code": code,
+        "client_id": GOOGLE_CLIENT_ID,
+        "client_secret": GOOGLE_CLIENT_SECRET,
+        "redirect_uri": redirect_uri,
+        "grant_type": "authorization_code",
+    }
+    import urllib.request
+    import urllib.parse
+    import json
+    try:
+        req_data = urllib.parse.urlencode(data).encode("utf-8")
+        req = urllib.request.Request(token_url, data=req_data, method="POST")
+        with urllib.request.urlopen(req) as r:
+            token_res = json.loads(r.read().decode("utf-8"))
+        access_token = token_res.get("access_token")
+        if not access_token:
+            raise HTTPException(status_code=400, detail="Failed to retrieve access token from Google.")
+        userinfo_url = "https://www.googleapis.com/oauth2/v3/userinfo"
+        req_user = urllib.request.Request(
+            userinfo_url,
+            headers={"Authorization": f"Bearer {access_token}"}
+        )
+        with urllib.request.urlopen(req_user) as r_user:
+            user_info = json.loads(r_user.read().decode("utf-8"))
+        email = user_info.get("email")
+        if not email:
+            raise HTTPException(status_code=400, detail="Google account has no email associated.")
+        db_user = crud.get_user_by_email(db, email=email)
+        if not db_user:
+            import secrets
+            random_pw = secrets.token_hex(16)
+            user_in = crud.UserCreate(email=email, password=random_pw)
+            db_user = crud.create_user(db, user_in)
+        api_keys = {}
+        from llmopt.api.security import decrypt_string
+        user_keys_encrypted = crud.get_user_api_keys(db, db_user.id)
+        if user_keys_encrypted:
+            try:
+                api_keys = {p: decrypt_string(k) for p, k in user_keys_encrypted.items()}
+            except Exception:
+                pass
+        session_id = await create_session(api_keys, user_id=db_user.id)
+        redirect_url = f"{REDIRECT_URI_HOST}/ui/workspace.html#api"
+        res = RedirectResponse(url=redirect_url)
+        res.set_cookie(
+            key="session_id",
+            value=session_id,
+            httponly=True,
+            secure=COOKIE_SECURE,
+            samesite=COOKIE_SAMESITE,
+            max_age=int(os.getenv("SESSION_TTL", 7200))
+        )
+        return res
+    except Exception as e:
+        logger.error(f"Google OAuth failed: {e}")
+        raise HTTPException(status_code=500, detail=f"Google OAuth failed: {str(e)}")
+@app.get("/auth/login/github")
+def login_github():
+    if not GITHUB_CLIENT_ID:
+        raise HTTPException(status_code=400, detail="GitHub Auth is not configured. Please set GITHUB_CLIENT_ID env variable.")
+    redirect_uri = f"{REDIRECT_URI_HOST}/auth/callback/github"
+    auth_url = (
+        "https://github.com/login/oauth/authorize"
+        f"?client_id={GITHUB_CLIENT_ID}"
+        f"&redirect_uri={redirect_uri}"
+        f"&scope=user:email"
+        f"&state=github_auth_state"
+    )
+    return RedirectResponse(url=auth_url)
+@app.get("/auth/callback/github")
+async def callback_github(code: str, response: Response, db: Session = Depends(get_db)):
+    if not GITHUB_CLIENT_ID or not GITHUB_CLIENT_SECRET:
+        raise HTTPException(status_code=400, detail="GitHub Auth credentials missing.")
+    redirect_uri = f"{REDIRECT_URI_HOST}/auth/callback/github"
+    token_url = "https://github.com/login/oauth/access_token"
+    data = {
+        "code": code,
+        "client_id": GITHUB_CLIENT_ID,
+        "client_secret": GITHUB_CLIENT_SECRET,
+        "redirect_uri": redirect_uri,
+    }
+    import urllib.request
+    import urllib.parse
+    import json
+    try:
+        req_data = urllib.parse.urlencode(data).encode("utf-8")
+        req = urllib.request.Request(
+            token_url,
+            data=req_data,
+            headers={"Accept": "application/json"},
+            method="POST"
+        )
+        with urllib.request.urlopen(req) as r:
+            token_res = json.loads(r.read().decode("utf-8"))
+        access_token = token_res.get("access_token")
+        if not access_token:
+            raise HTTPException(status_code=400, detail="Failed to retrieve access token from GitHub.")
+        email_url = "https://api.github.com/user/emails"
+        req_email = urllib.request.Request(
+            email_url,
+            headers={
+                "Authorization": f"token {access_token}",
+                "User-Agent": "LLMOpt-Server"
+            }
+        )
+        with urllib.request.urlopen(req_email) as r_email:
+            emails = json.loads(r_email.read().decode("utf-8"))
+        email = None
+        for email_info in emails:
+            if email_info.get("primary"):
+                email = email_info.get("email")
+                break
+        if not email and emails:
+            email = emails[0].get("email")
+        if not email:
+            raise HTTPException(status_code=400, detail="GitHub account has no email associated.")
+        db_user = crud.get_user_by_email(db, email=email)
+        if not db_user:
+            import secrets
+            random_pw = secrets.token_hex(16)
+            user_in = crud.UserCreate(email=email, password=random_pw)
+            db_user = crud.create_user(db, user_in)
+        api_keys = {}
+        from llmopt.api.security import decrypt_string
+        user_keys_encrypted = crud.get_user_api_keys(db, db_user.id)
+        if user_keys_encrypted:
+            try:
+                api_keys = {p: decrypt_string(k) for p, k in user_keys_encrypted.items()}
+            except Exception:
+                pass
+        session_id = await create_session(api_keys, user_id=db_user.id)
+        redirect_url = f"{REDIRECT_URI_HOST}/ui/workspace.html#api"
+        res = RedirectResponse(url=redirect_url)
+        res.set_cookie(
+            key="session_id",
+            value=session_id,
+            httponly=True,
+            secure=COOKIE_SECURE,
+            samesite=COOKIE_SAMESITE,
+            max_age=int(os.getenv("SESSION_TTL", 7200))
+        )
+        return res
+    except Exception as e:
+        logger.error(f"GitHub OAuth failed: {e}")
+        raise HTTPException(status_code=500, detail=f"GitHub OAuth failed: {str(e)}")
+@app.get("/auth/dashboard-stats")
+async def get_dashboard_stats(session_id: str = Depends(get_session_id_from_request), db: Session = Depends(get_db)):
+    """Calculate dashboard statistics dynamically from the generation logs in the database."""
+    session_payload = await get_session_payload(session_id)
+    user_id = session_payload.get("user_id")
+    logs = db.query(models.GenerationLog).filter(models.GenerationLog.user_id == user_id).order_by(models.GenerationLog.created_at.desc()).all()
+    prompts_improved = len(logs)
+    total_tokens_saved = sum(log.tokens_saved for log in logs if log.tokens_saved)
+    total_cost_saved = sum(log.cost_saved for log in logs if log.cost_saved)
+    distribution = {}
+    total_with_provider = 0
+    for log in logs:
+        if log.provider:
+            provider = log.provider.lower()
+            distribution[provider] = distribution.get(provider, 0) + 1
+            total_with_provider += 1
+    distribution_percentages = {}
+    if total_with_provider > 0:
+        for provider, count in distribution.items():
+            distribution_percentages[provider] = round((count / total_with_provider) * 100, 1)
+    recent_decisions = []
+    from datetime import datetime
+    for log in logs[:5]:
+        time_diff = datetime.utcnow() - log.created_at
+        if time_diff.days > 0:
+            time_str = f"{time_diff.days}d ago"
+        elif time_diff.seconds // 3600 > 0:
+            time_str = f"{time_diff.seconds // 3600}h ago"
+        else:
+            time_str = f"{(time_diff.seconds % 3600) // 60}m ago"
+            if time_str == "0m ago":
+                time_str = "just now"
+        recent_decisions.append({
+            "id": f"PROMPT_{log.id}",
+            "time_ago": time_str,
+            "model": log.model_used,
+            "provider": log.provider,
+            "tier": log.complexity_tier or "standard",
+            "score": round((log.complexity_score or 0.72) * 100, 1),
+            "reason": f"Routed based on {log.complexity_tier or 'standard'} tier (complexity score: {round((log.complexity_score or 0.72)*100)}/100)."
+        })
+    recent_optimizations = []
+    for log in logs[:3]:
+        time_diff = datetime.utcnow() - log.created_at
+        if time_diff.days > 0:
+            time_str = f"{time_diff.days}d ago"
+        elif time_diff.seconds // 3600 > 0:
+            time_str = f"{time_diff.seconds // 3600}h ago"
+        else:
+            time_str = f"{(time_diff.seconds % 3600) // 60}m ago"
+            if time_str == "0m ago":
+                time_str = "just now"
+        recent_optimizations.append({
+            "name": log.query[:40] + ("..." if len(log.query) > 40 else ""),
+            "model_used": log.model_used,
+            "time_ago": time_str,
+            "score": f"{round((log.complexity_score or 0.72) * 100, 1)}%",
+            "tokens_saved": f"-{log.tokens_saved or 0} tokens/avg"
+        })
+    avg_complexity = 0.0
+    valid_scores = [log.complexity_score for log in logs if log.complexity_score is not None]
+    if valid_scores:
+        avg_complexity = sum(valid_scores) / len(valid_scores)
+    avg_boost = f"+{round(avg_complexity * 30, 1)}%" if avg_complexity > 0 else "0%"
     return {
+        "tokens_saved": f"{total_tokens_saved:,}" if total_tokens_saved > 0 else "0",
+        "prompts_improved": prompts_improved,
+        "routing_savings": f"${total_cost_saved:,.2f}" if total_cost_saved > 0 else "$0.00",
+        "avg_boost": avg_boost,
+        "distribution": distribution_percentages,
+        "recent_decisions": recent_decisions,
+        "recent_optimizations": recent_optimizations,
+        "running_workflows": 0,
+        "queued_workflows": 0
     }
+@app.get("/auth/history")
+async def get_history(session_id: str = Depends(get_session_id_from_request), db: Session = Depends(get_db)):
+    """Fetch the list of recent generation logs for the authenticated user."""
+    session_payload = await get_session_payload(session_id)
+    user_id = session_payload.get("user_id")
+    if not user_id:
+        raise HTTPException(status_code=401, detail="Not authenticated")
+    logs = db.query(models.GenerationLog).filter(
+        models.GenerationLog.user_id == user_id
+    ).order_by(models.GenerationLog.created_at.desc()).limit(20).all()
+    # Calculate time-ago strings for frontend
+    from datetime import datetime
+    history_items = []
+    for log in logs:
+        time_diff = datetime.utcnow() - log.created_at
+        if time_diff.days > 0:
+            time_str = f"{time_diff.days}d ago"
+        elif time_diff.seconds // 3600 > 0:
+            time_str = f"{time_diff.seconds // 3600}h ago"
+        else:
+            time_str = f"{(time_diff.seconds % 3600) // 60}m ago"
+            if time_str == "0m ago":
+                time_str = "just now"
+        history_items.append({
+            "id": log.id,
+            "query": log.query,
+            "response": log.response,
+            "model_used": log.model_used,
+            "provider": log.provider,
+            "input_tokens": log.input_tokens,
+            "output_tokens": log.output_tokens,
+            "total_tokens": log.total_tokens,
+            "estimated_cost": log.estimated_cost,
+            "tokens_saved": log.tokens_saved,
+            "cost_saved": log.cost_saved,
+            "latency_ms": log.latency_ms,
+            "complexity_score": log.complexity_score,
+            "complexity_tier": log.complexity_tier,
+            "time_ago": time_str
+        })
+    return history_items
+@app.post("/auth/logout")
+async def logout(response: Response, session_id: str = Depends(get_session_id_from_request)):
+    """Clear the session from Redis and remove the cookie."""
+    await delete_session(session_id)
+    response.delete_cookie("session_id", samesite=COOKIE_SAMESITE, secure=COOKIE_SECURE)
+    return {"message": "Logged out"}
+@app.get("/")
+def root():
+    if IS_DEV:
+        return RedirectResponse(url="http://localhost:5173/ui/")
+    return RedirectResponse(url="/ui/")
+if not IS_DEV:
+    app.mount("/ui", StaticFiles(directory="static", html=True), name="static")
+else:
+    @app.get("/ui")
+    @app.get("/ui/{path:path}")
+    def redirect_to_vite(path: str = ""):
+        return RedirectResponse(url=f"http://localhost:5173/ui/{path}")
 @app.get("/health")
 def health():
 @app.post("/generate", response_model=GenerateResponse)
+async def generate(req: GenerateRequest, session_id: str = Depends(get_session_id_from_request), db: Session = Depends(get_db)):
     """
     Full pipeline: analyze → optimize → route → return response + metrics.
     """
+    await check_rate_limit(session_id)
+    # Override req.api_keys with the ones securely stored in the session
+    session_payload = await get_session_payload(session_id)
+    session_keys = session_payload.get("api_keys", {})
+    user_id = session_payload.get("user_id")
+    if not req.api_keys:
+        req.api_keys = {}
+    req.api_keys.update(session_keys)
     try:
+        # LLMOpt core relies on synchronous execution right now (litellm async is separate)
+        # Assuming _client.generate is synchronous, we run it normally
+        # In a high-concurrency async app, we might want run_in_threadpool
         result = _client.generate(
             query=req.query,
             budget_mode=req.budget_mode,
             temperature=req.temperature,
             dry_run=req.dry_run,
             api_keys=req.api_keys,  # Pass BYOK keys
+            alpha=req.alpha,
+            beta=req.beta,
+            gamma=req.gamma,
+            compression_enabled=req.compression_enabled,
+            evaluate=req.evaluate,
         )
+        # Save generation log to database
+        try:
+            log_entry = models.GenerationLog(
+                user_id=user_id,
+                query=req.query,
+                response=result.response,
+                model_used=result.model_used,
+                provider=result.provider,
+                input_tokens=result.input_tokens,
+                output_tokens=result.output_tokens,
+                total_tokens=result.total_tokens,
+                estimated_cost=result.estimated_cost,
+                tokens_saved=result.tokens_saved,
+                cost_saved=result.cost_saved,
+                latency_ms=result.latency_ms,
+                complexity_score=result.complexity.score,
+                complexity_tier=result.complexity.tier
+            )
+            db.add(log_entry)
+            db.commit()
+        except Exception as log_err:
+            logger.error(f"Failed to save generation log: {log_err}")
         return GenerateResponse(**result.to_dict())
     except KeyError as e:
         raise HTTPException(status_code=400, detail=f"Model not found: {e}")
     except Exception as e:
         logger.exception("generate() failed")
+        error_msg = str(e).lower()
+        if "authentication" in error_msg or "unauthorized" in error_msg or "invalid api key" in error_msg or "401" in error_msg:
+            raise HTTPException(status_code=401, detail="API is expired or token limit ended")
+        elif "rate limit" in error_msg or "429" in error_msg:
+            raise HTTPException(status_code=429, detail="API is expired or token limit ended")
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/explain")
+async def explain(req: ExplainRequest, session_id: str = Depends(get_session_id_from_request)):
     """
     Returns the full routing decision for a query WITHOUT making an LLM API call.
     Useful for debugging, testing, and understanding optimization decisions.
     """
+    session_payload = await get_session_payload(session_id)
+    session_keys = session_payload.get("api_keys", {})
     try:
+        return _client.explain(
+            query=req.query,
+            budget_mode=req.budget_mode,
+            alpha=req.alpha,
+            beta=req.beta,
+            gamma=req.gamma,
+            compression_enabled=req.compression_enabled,
+            exclude_providers=req.exclude_providers,
+            only_providers=req.only_providers,
+            api_keys=session_keys,
+        )
     except Exception as e:
         logger.exception("explain() failed")
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/stream")
+async def stream_generate(req: GenerateRequest, session_id: str = Depends(get_session_id_from_request)):
     """Server-sent stream of response tokens."""
+    await check_rate_limit(session_id)
+    session_payload = await get_session_payload(session_id)
+    session_keys = session_payload.get("api_keys", {})
+    if not req.api_keys:
+        req.api_keys = {}
+    req.api_keys.update(session_keys)
     def token_generator():
         try:
             for chunk in _client.stream(

llmopt/api/crud.py ADDED Viewed

	@@ -0,0 +1,59 @@

+from sqlalchemy.orm import Session
+from passlib.context import CryptContext
+from llmopt.db import models
+from pydantic import BaseModel
+pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
+class UserCreate(BaseModel):
+    email: str
+    password: str
+class UserLogin(BaseModel):
+    email: str
+    password: str
+def get_password_hash(password):
+    return pwd_context.hash(password)
+def verify_password(plain_password, hashed_password):
+    return pwd_context.verify(plain_password, hashed_password)
+def get_user_by_email(db: Session, email: str):
+    return db.query(models.User).filter(models.User.email == email).first()
+def create_user(db: Session, user: UserCreate):
+    hashed_password = get_password_hash(user.password)
+    db_user = models.User(email=user.email, hashed_password=hashed_password)
+    db.add(db_user)
+    db.commit()
+    db.refresh(db_user)
+    return db_user
+def get_user_by_id(db: Session, user_id: int):
+    return db.query(models.User).filter(models.User.id == user_id).first()
+def update_user_api_keys(db: Session, user_id: int, provider_keys: dict):
+    for provider, encrypted_key in provider_keys.items():
+        existing = db.query(models.UserAPIKey).filter(
+            models.UserAPIKey.user_id == user_id,
+            models.UserAPIKey.provider == provider
+        ).first()
+        if existing:
+            existing.encrypted_key = encrypted_key
+        else:
+            api_key_record = models.UserAPIKey(user_id=user_id, provider=provider, encrypted_key=encrypted_key)
+            db.add(api_key_record)
+    db.commit()
+def get_user_api_keys(db: Session, user_id: int) -> dict:
+    records = db.query(models.UserAPIKey).filter(models.UserAPIKey.user_id == user_id).all()
+    return {r.provider: r.encrypted_key for r in records}
+def delete_user_api_key(db: Session, user_id: int, provider: str) -> None:
+    db.query(models.UserAPIKey).filter(
+        models.UserAPIKey.user_id == user_id,
+        models.UserAPIKey.provider == provider
+    ).delete()
+    db.commit()

llmopt/api/security.py ADDED Viewed

	@@ -0,0 +1,186 @@

+import os
+import json
+import uuid
+import logging
+from typing import Dict, Optional
+from cryptography.fernet import Fernet
+from fastapi import Request, HTTPException, status
+from llmopt.cache.redis_client import get_redis
+import jwt
+import datetime
+from llmopt.db.session import SessionLocal
+from llmopt.api import crud
+logger = logging.getLogger(__name__)
+# Master key for encrypting user API keys in Redis
+# In production, this MUST be set via environment variable.
+_SECRET_KEY = os.getenv("SESSION_SECRET_KEY")
+if not _SECRET_KEY:
+    logger.warning("SESSION_SECRET_KEY not set. Generating a temporary one for this process.")
+    _SECRET_KEY = Fernet.generate_key().decode("utf-8")
+fernet = Fernet(_SECRET_KEY.encode("utf-8"))
+# Default session TTL: 2 hours
+SESSION_TTL = int(os.getenv("SESSION_TTL", 7200))
+def encrypt_payload(payload: dict) -> str:
+    """Encrypts the dictionary payload into a secure string."""
+    json_data = json.dumps(payload)
+    return fernet.encrypt(json_data.encode("utf-8")).decode("utf-8")
+def decrypt_payload(encrypted_data: str) -> dict:
+    """Decrypts the secure string back into a dictionary."""
+    json_data = fernet.decrypt(encrypted_data.encode("utf-8")).decode("utf-8")
+    return json.loads(json_data)
+def encrypt_string(data: str) -> str:
+    return fernet.encrypt(data.encode("utf-8")).decode("utf-8")
+def decrypt_string(encrypted_data: str) -> str:
+    return fernet.decrypt(encrypted_data.encode("utf-8")).decode("utf-8")
+async def create_session(api_keys: Dict[str, str], user_id: Optional[int] = None) -> str:
+    """Stores encrypted API keys and user_id in Redis and returns a session ID (JWT)."""
+    redis = await get_redis()
+    if not redis:
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="Redis cache is unavailable. Cannot create session."
+        )
+    # Generate JWT for session_id
+    jti = str(uuid.uuid4())
+    jwt_payload = {"jti": jti}
+    if user_id is not None:
+        jwt_payload["user_id"] = user_id
+    session_id = jwt.encode(jwt_payload, _SECRET_KEY, algorithm="HS256")
+    payload = {"api_keys": api_keys, "user_id": user_id}
+    encrypted_payload = encrypt_payload(payload)
+    # Store with TTL
+    await redis.setex(f"session:{session_id}", SESSION_TTL, encrypted_payload)
+    return session_id
+async def update_session_payload(session_id: str, payload: dict) -> None:
+    """Updates the encrypted session payload in Redis under the existing session ID."""
+    redis = await get_redis()
+    if not redis:
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="Redis cache is unavailable. Cannot update session."
+        )
+    encrypted_payload = encrypt_payload(payload)
+    await redis.setex(f"session:{session_id}", SESSION_TTL, encrypted_payload)
+async def get_session_payload(session_id: str) -> dict:
+    """Retrieves and decrypts the payload from Redis. On cache miss, restores from DB using JWT."""
+    redis = await get_redis()
+    if not redis:
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="Redis cache is unavailable."
+        )
+    encrypted_keys = await redis.get(f"session:{session_id}")
+    # Cache hit
+    if encrypted_keys:
+        await redis.expire(f"session:{session_id}", SESSION_TTL)
+        try:
+            return decrypt_payload(encrypted_keys)
+        except Exception as e:
+            logger.error(f"Failed to decrypt session keys: {e}")
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail="Failed to decrypt session."
+            )
+    # Cache miss: attempt to decode JWT and recover from database
+    try:
+        jwt_payload = jwt.decode(session_id, _SECRET_KEY, algorithms=["HS256"])
+        user_id = jwt_payload.get("user_id")
+        if not user_id:
+            raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Session expired. No user context.")
+        # Fetch from database
+        db = SessionLocal()
+        try:
+            user_keys_encrypted = crud.get_user_api_keys(db, user_id)
+        finally:
+            db.close()
+        # Decrypt keys from DB
+        api_keys = {p: decrypt_string(k) for p, k in user_keys_encrypted.items()}
+        payload = {"api_keys": api_keys, "user_id": user_id}
+        encrypted_payload = encrypt_payload(payload)
+        # Repopulate Redis
+        await redis.setex(f"session:{session_id}", SESSION_TTL, encrypted_payload)
+        return payload
+    except jwt.InvalidTokenError:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Session expired or invalid."
+        )
+async def delete_session(session_id: str) -> bool:
+    """Removes the session from Redis."""
+    redis = await get_redis()
+    if not redis:
+        return False
+    await redis.delete(f"session:{session_id}")
+    return True
+async def check_rate_limit(session_id: str) -> None:
+    """
+    Basic rate limiting: max 20 requests per minute per session.
+    """
+    redis = await get_redis()
+    if not redis:
+        return
+    key = f"ratelimit:{session_id}"
+    requests = await redis.incr(key)
+    if requests == 1:
+        await redis.expire(key, 60)
+    if requests > 20:
+        raise HTTPException(
+            status_code=status.HTTP_429_TOO_MANY_REQUESTS,
+            detail="Rate limit exceeded. Please try again later."
+        )
+def get_session_id_from_request(request: Request) -> str:
+    """Extracts session ID from cookies or Authorization header."""
+    # First try cookie
+    session_id = request.cookies.get("session_id")
+    if session_id:
+        return session_id
+    # Then try Authorization header (Bearer token)
+    auth_header = request.headers.get("Authorization")
+    if auth_header and auth_header.startswith("Bearer "):
+        return auth_header.split(" ")[1]
+    raise HTTPException(
+        status_code=status.HTTP_401_UNAUTHORIZED,
+        detail="Missing session_id cookie or Bearer token."
+    )

llmopt/cache/redis_client.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import os
+import logging
+from typing import Optional
+from redis import asyncio as aioredis
+logger = logging.getLogger(__name__)
+class RedisManager:
+    def __init__(self):
+        self.redis: Optional[aioredis.Redis] = None
+    async def connect(self):
+        redis_url = os.getenv("REDIS_URL")
+        if not redis_url:
+            logger.warning("REDIS_URL environment variable is not set. Redis features will be disabled.")
+            return
+        try:
+            self.redis = aioredis.from_url(
+                redis_url,
+                encoding="utf-8",
+                decode_responses=True,
+                socket_timeout=5.0,
+                socket_connect_timeout=5.0,
+                retry_on_timeout=True,
+                max_connections=10
+            )
+            await self.redis.ping()
+            logger.info("Successfully connected to Redis.")
+        except Exception as e:
+            logger.error(f"Failed to connect to Redis: {e}")
+            self.redis = None
+    async def close(self):
+        if self.redis:
+            await self.redis.close()
+redis_manager = RedisManager()
+async def get_redis():
+    return redis_manager.redis

llmopt/core.py CHANGED Viewed

@@ -18,17 +18,18 @@ import time
 import os
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Optional
 from llmopt.analyzer.query_analyzer import QueryAnalyzer, QueryFeatures
 from llmopt.estimator.complexity_estimator import ComplexityEstimator, ComplexityResult
 from llmopt.engine.optimization_engine import OptimizationEngine, OptimizationResult, UserConstraints
 from llmopt.optimizer.prompt_optimizer import PromptOptimizer, OptimizedPrompt
 from llmopt.router.model_router import ModelRouter, RoutedResponse
 from llmopt.registry.model_registry import ModelRegistry
 from llmopt.cache.semantic_cache import SemanticCache
 from llmopt.evaluation.evaluator import LLMJudge, EvaluationResult
-import os
 logger = logging.getLogger(__name__)
@@ -136,30 +137,40 @@ class LLMOpt:
         registry_path: Optional[Path] = None,
         ollama_base_url: Optional[str] = None,
         log_level: str = "WARNING",
     ):
         logging.basicConfig(level=getattr(logging, log_level.upper(), logging.WARNING))
         self.registry  = ModelRegistry(registry_path)
         self.analyzer  = QueryAnalyzer()
         self.estimator = ComplexityEstimator()
-        self.engine    = OptimizationEngine(self.registry)
         self.optimizer = PromptOptimizer()
         self.router    = ModelRouter(ollama_base_url=ollama_base_url)
         # Initialize Semantic Cache (reads REDIS_URL from env if available)
-        # Using python-dotenv to ensure .env is loaded
         try:
             from dotenv import load_dotenv  # type: ignore
-            # Attempt to load from both the root and config/.env
             load_dotenv()
             load_dotenv("config/.env")
         except ImportError:
             pass
         redis_url = os.environ.get("REDIS_URL")
         self.cache = SemanticCache(redis_url=redis_url)
         self.judge = LLMJudge(judge_model="gpt-4o-mini")
     # ------------------------------------------------------------------
     # Primary API
     # ------------------------------------------------------------------
@@ -178,6 +189,10 @@ class LLMOpt:
         dry_run: bool = False,
         evaluate: bool = False,
         api_keys: Optional[Dict[str, str]] = None,
     ) -> GenerateResult:
         """
         Full pipeline: analyze → estimate → optimize → compress → route → return.
@@ -212,11 +227,17 @@ class LLMOpt:
                 latency_ms = (time.perf_counter() - t0) * 1000
                 logger.info("Returning cached response directly.")
-                constraints = UserConstraints(budget_mode=budget_mode)
                 optimization = self.engine.optimize(
                     complexity=complexity,
                     output_length_bucket=features.estimated_output_length,
                     constraints=constraints,
                 )
                 optimized_prompt = self.optimizer.optimize(
                     query=query,
@@ -257,16 +278,40 @@ class LLMOpt:
             exclude_providers=exclude_providers or [],
             only_providers=only_providers or [],
             prefer_local=prefer_local,
         )
         if prefer_local:
             constraints.only_providers = ["ollama"]
         # 4. Optimize (select model + config)
-        optimization = self.engine.optimize(
-            complexity=complexity,
-            output_length_bucket=features.estimated_output_length,
-            constraints=constraints,
-        )
         logger.debug(f"Selected: {optimization.selected_model}")
         # 5. Optimize prompt
@@ -284,16 +329,26 @@ class LLMOpt:
         if dry_run:
             routed = self._mock_response(optimization)
         else:
-            model_spec = self.registry.get(optimization.selected_model)
             routed = self.router.route(
                 model_name=optimization.selected_model,
                 provider=optimization.provider,
                 messages=messages,
                 max_tokens=optimization.max_tokens,
                 temperature=temperature,
-                input_cost_per_1k=model_spec.input_cost_per_1k,
-                output_cost_per_1k=model_spec.output_cost_per_1k,
-                api_keys=api_keys,  # Pass BYOK keys
             )
         latency_ms = (time.perf_counter() - t0) * 1000
@@ -308,11 +363,21 @@ class LLMOpt:
         )
         cost_saved = max(0.0, baseline_cost - routed.estimated_cost)
-        # 9. Evaluate (if requested) and feed Bayesian optimizer
         evaluation = None
         if evaluate and not dry_run:
             evaluation = self.judge.evaluate(query, routed.content)
-            if evaluation:
                 α, β, γ = self.engine.bayes.get_weights(constraints.budget_mode)
                 self.engine.bayes.record_outcome(
                     budget_mode=constraints.budget_mode,
@@ -351,15 +416,26 @@ class LLMOpt:
         api_keys: Optional[Dict[str, str]] = None,
         **kwargs,
     ):
-        """Yields text chunks.  Pipeline still runs fully before streaming."""
-        features    = self.analyzer.analyze(query)
-        complexity  = self.estimator.estimate(features)
-        constraints = UserConstraints(budget_mode=budget_mode)
-        optimization = self.engine.optimize(
-            complexity=complexity,
-            output_length_bucket=features.estimated_output_length,
-            constraints=constraints,
-        )
         optimized_prompt = self.optimizer.optimize(
             query=query,
             system_prompt_style=optimization.system_prompt_style,
@@ -370,27 +446,62 @@ class LLMOpt:
             model_name=optimization.selected_model,
             messages=messages,
             max_tokens=optimization.max_tokens,
-            provider=optimization.provider,  # Pass provider
-            api_keys=api_keys,  # Pass BYOK keys
         )
     # ------------------------------------------------------------------
     # Explainability (standalone)
     # ------------------------------------------------------------------
-    def explain(self, query: str, budget_mode: str = "balanced") -> dict:
         """
         Returns a structured explanation of what LLMOpt would do for a query,
         without making an actual API call.
         """
         features   = self.analyzer.analyze(query)
         complexity = self.estimator.estimate(features)
-        constraints = UserConstraints(budget_mode=budget_mode)
-        optimization = self.engine.optimize(
-            complexity=complexity,
-            output_length_bucket=features.estimated_output_length,
-            constraints=constraints,
-        )
         optimized_prompt = self.optimizer.optimize(
             query=query,
             system_prompt_style=optimization.system_prompt_style,
@@ -404,6 +515,7 @@ class LLMOpt:
             "optimized_prompt": optimized_prompt.to_dict(),
         }
     # ------------------------------------------------------------------
     # Helpers
     # ------------------------------------------------------------------
@@ -433,3 +545,62 @@ class LLMOpt:
             latency_ms=0.0,
             estimated_cost=optimization.estimated_cost,
         )

 import os
 from dataclasses import dataclass
 from pathlib import Path
+from typing import Optional, Dict, List
 from llmopt.analyzer.query_analyzer import QueryAnalyzer, QueryFeatures
 from llmopt.estimator.complexity_estimator import ComplexityEstimator, ComplexityResult
 from llmopt.engine.optimization_engine import OptimizationEngine, OptimizationResult, UserConstraints
+from llmopt.engine.llmopt_engine import LLMOptEngine
+from llmopt.engine.utility_engine import RoutingDecision
 from llmopt.optimizer.prompt_optimizer import PromptOptimizer, OptimizedPrompt
 from llmopt.router.model_router import ModelRouter, RoutedResponse
 from llmopt.registry.model_registry import ModelRegistry
 from llmopt.cache.semantic_cache import SemanticCache
 from llmopt.evaluation.evaluator import LLMJudge, EvaluationResult
 logger = logging.getLogger(__name__)
         registry_path: Optional[Path] = None,
         ollama_base_url: Optional[str] = None,
         log_level: str = "WARNING",
+        use_v2_engine: bool = True,
     ):
         logging.basicConfig(level=getattr(logging, log_level.upper(), logging.WARNING))
         self.registry  = ModelRegistry(registry_path)
         self.analyzer  = QueryAnalyzer()
         self.estimator = ComplexityEstimator()
+        self.engine    = OptimizationEngine(self.registry)   # V1 — kept for fallback
         self.optimizer = PromptOptimizer()
         self.router    = ModelRouter(ollama_base_url=ollama_base_url)
         # Initialize Semantic Cache (reads REDIS_URL from env if available)
         try:
             from dotenv import load_dotenv  # type: ignore
             load_dotenv()
             load_dotenv("config/.env")
         except ImportError:
             pass
         redis_url = os.environ.get("REDIS_URL")
         self.cache = SemanticCache(redis_url=redis_url)
         self.judge = LLMJudge(judge_model="gpt-4o-mini")
+        # V2 Utility Engine — default active
+        self._use_v2 = use_v2_engine
+        self._v2_engine: Optional[LLMOptEngine] = None
+        if use_v2_engine:
+            self._v2_engine = LLMOptEngine(
+                available_keys={},          # populated per-request via update_keys()
+                include_ollama=True,
+                log_level=logging.WARNING,
+            )
+            logger.info("[LLMOpt] V2 utility engine active.")
     # ------------------------------------------------------------------
     # Primary API
     # ------------------------------------------------------------------
         dry_run: bool = False,
         evaluate: bool = False,
         api_keys: Optional[Dict[str, str]] = None,
+        alpha: Optional[float] = None,
+        beta: Optional[float] = None,
+        gamma: Optional[float] = None,
+        compression_enabled: Optional[bool] = None,
     ) -> GenerateResult:
         """
         Full pipeline: analyze → estimate → optimize → compress → route → return.
                 latency_ms = (time.perf_counter() - t0) * 1000
                 logger.info("Returning cached response directly.")
+                constraints = UserConstraints(
+                    budget_mode=budget_mode,
+                    compression_enabled=compression_enabled,
+                )
                 optimization = self.engine.optimize(
                     complexity=complexity,
                     output_length_bucket=features.estimated_output_length,
                     constraints=constraints,
+                    alpha=alpha,
+                    beta=beta,
+                    gamma=gamma,
                 )
                 optimized_prompt = self.optimizer.optimize(
                     query=query,
             exclude_providers=exclude_providers or [],
             only_providers=only_providers or [],
             prefer_local=prefer_local,
+            compression_enabled=compression_enabled,
         )
         if prefer_local:
             constraints.only_providers = ["ollama"]
         # 4. Optimize (select model + config)
+        if self._use_v2 and self._v2_engine is not None:
+            # Update BYOK keys for this request
+            if api_keys:
+                self._v2_engine.update_keys(api_keys)
+            # Build constraints dict for V2 engine
+            v2_constraints = {
+                "exclude_providers":     exclude_providers or [],
+                "only_providers":        only_providers or [],
+            }
+            if max_cost_per_request is not None:
+                v2_constraints["max_cost_per_request"] = max_cost_per_request
+            if prefer_local:
+                v2_constraints["only_providers"] = ["ollama"]
+            decision = self._v2_engine.route(
+                query_features=features,
+                budget_mode=budget_mode,
+                constraints=v2_constraints,
+            )
+            optimization = self._v2_to_optimization_result(decision, complexity, features)
+        else:
+            optimization = self.engine.optimize(
+                complexity=complexity,
+                output_length_bucket=features.estimated_output_length,
+                constraints=constraints,
+                alpha=alpha,
+                beta=beta,
+                gamma=gamma,
+            )
         logger.debug(f"Selected: {optimization.selected_model}")
         # 5. Optimize prompt
         if dry_run:
             routed = self._mock_response(optimization)
         else:
+            # Fetch model spec from appropriate registry
+            if self._use_v2 and self._v2_engine is not None:
+                # V2: look up from the merged V2 registry (knows all new model IDs)
+                v2_spec = self._v2_engine._registry.get_model(optimization.selected_model)
+                in_cost  = v2_spec["input_cost_per_1k"]  if v2_spec else optimization.estimated_cost / 2
+                out_cost = v2_spec["output_cost_per_1k"] if v2_spec else optimization.estimated_cost / 2
+            else:
+                # V1: look up from the old ModelRegistry
+                model_spec = self.registry.get(optimization.selected_model)
+                in_cost  = model_spec.input_cost_per_1k
+                out_cost = model_spec.output_cost_per_1k
             routed = self.router.route(
                 model_name=optimization.selected_model,
                 provider=optimization.provider,
                 messages=messages,
                 max_tokens=optimization.max_tokens,
                 temperature=temperature,
+                input_cost_per_1k=in_cost,
+                output_cost_per_1k=out_cost,
+                api_keys=api_keys,
             )
         latency_ms = (time.perf_counter() - t0) * 1000
         )
         cost_saved = max(0.0, baseline_cost - routed.estimated_cost)
+        # 9. Evaluate (if requested) and feed optimizer
         evaluation = None
         if evaluate and not dry_run:
             evaluation = self.judge.evaluate(query, routed.content)
+            if self._use_v2 and self._v2_engine is not None:
+                # Feed outcome back into adaptive EMA updater
+                self._v2_engine.record_outcome(
+                    model_id=routed.model_used,
+                    latency_ms=routed.latency_ms,
+                    success=True,
+                    quality_score=evaluation.overall if evaluation else None,
+                    cost_usd=routed.estimated_cost,
+                )
+            elif evaluation:
+                # V1 path: feed Bayesian optimizer
                 α, β, γ = self.engine.bayes.get_weights(constraints.budget_mode)
                 self.engine.bayes.record_outcome(
                     budget_mode=constraints.budget_mode,
         api_keys: Optional[Dict[str, str]] = None,
         **kwargs,
     ):
+        """Yields text chunks. Pipeline still runs fully before streaming."""
+        features   = self.analyzer.analyze(query)
+        complexity = self.estimator.estimate(features)
+        if self._use_v2 and self._v2_engine is not None:
+            if api_keys:
+                self._v2_engine.update_keys(api_keys)
+            decision = self._v2_engine.route(
+                query_features=features,
+                budget_mode=budget_mode,
+            )
+            optimization = self._v2_to_optimization_result(decision, complexity, features)
+        else:
+            constraints = UserConstraints(budget_mode=budget_mode)
+            optimization = self.engine.optimize(
+                complexity=complexity,
+                output_length_bucket=features.estimated_output_length,
+                constraints=constraints,
+            )
         optimized_prompt = self.optimizer.optimize(
             query=query,
             system_prompt_style=optimization.system_prompt_style,
             model_name=optimization.selected_model,
             messages=messages,
             max_tokens=optimization.max_tokens,
+            provider=optimization.provider,
+            api_keys=api_keys,
         )
     # ------------------------------------------------------------------
     # Explainability (standalone)
     # ------------------------------------------------------------------
+    def explain(
+        self,
+        query: str,
+        budget_mode: str = "balanced",
+        alpha: Optional[float] = None,
+        beta: Optional[float] = None,
+        gamma: Optional[float] = None,
+        compression_enabled: Optional[bool] = None,
+        exclude_providers: Optional[list[str]] = None,
+        only_providers: Optional[list[str]] = None,
+        api_keys: Optional[Dict[str, str]] = None,
+    ) -> dict:
         """
         Returns a structured explanation of what LLMOpt would do for a query,
         without making an actual API call.
         """
         features   = self.analyzer.analyze(query)
         complexity = self.estimator.estimate(features)
+        if self._use_v2 and self._v2_engine is not None:
+            if api_keys:
+                self._v2_engine.update_keys(api_keys)
+            v2_constraints = {
+                "exclude_providers":     exclude_providers or [],
+                "only_providers":        only_providers or [],
+            }
+            decision = self._v2_engine.route(
+                query_features=features,
+                budget_mode=budget_mode,
+                constraints=v2_constraints,
+            )
+            optimization = self._v2_to_optimization_result(decision, complexity, features)
+        else:
+            constraints = UserConstraints(
+                budget_mode=budget_mode,
+                compression_enabled=compression_enabled,
+                exclude_providers=exclude_providers or [],
+                only_providers=only_providers or [],
+            )
+            optimization = self.engine.optimize(
+                complexity=complexity,
+                output_length_bucket=features.estimated_output_length,
+                constraints=constraints,
+                alpha=alpha,
+                beta=beta,
+                gamma=gamma,
+            )
         optimized_prompt = self.optimizer.optimize(
             query=query,
             system_prompt_style=optimization.system_prompt_style,
             "optimized_prompt": optimized_prompt.to_dict(),
         }
     # ------------------------------------------------------------------
     # Helpers
     # ------------------------------------------------------------------
             latency_ms=0.0,
             estimated_cost=optimization.estimated_cost,
         )
+    @staticmethod
+    def _v2_to_optimization_result(
+        decision: RoutingDecision,
+        complexity: ComplexityResult,
+        features: QueryFeatures,
+    ) -> OptimizationResult:
+        """
+        Compatibility shim: maps RoutingDecision (V2) → OptimizationResult (V1 shape).
+        This allows all downstream pipeline stages (PromptOptimizer, ModelRouter,
+        logging, GenerateResult) to remain completely unchanged while the routing
+        layer has been replaced by the utility engine.
+        OptimizationResult fields (from optimization_engine.py):
+            selected_model, provider, estimated_cost, estimated_input_tokens,
+            estimated_output_tokens, max_tokens, compression_enabled,
+            system_prompt_style, rationale, fallback_model, objective_score
+        """
+        ex = decision.explanation
+        # Build a rationale list from the V2 explanation dict
+        rationale = [
+            f"engine=utility_v2  domain={ex.get('primary_domain', 'general')}",
+            f"utility_score={decision.utility_score:.4f}  budget_lambda={ex.get('lambda', '?')}",
+            f"top_dims={list(ex.get('query_dimensions', {}).keys())[:3]}",
+            f"candidates_evaluated={ex.get('candidates_evaluated', '?')}",
+            f"registry_source={ex.get('registry_source', 'baseline')}",
+        ]
+        if decision.fallback_model_id:
+            rationale.append(f"fallback={decision.fallback_model_id} ({decision.fallback_provider})")
+        # Output length → token estimate lookup
+        output_token_map = {"short": 300, "medium": 700, "long": 1500, "very_long": 3000}
+        est_output = output_token_map.get(
+            str(getattr(features, 'estimated_output_length', 'medium')).lower(), 700
+        )
+        est_input = max(getattr(features, 'token_count', 100), 100)
+        # Budget mode drives compression and prompt style
+        budget_mode = ex.get("budget_mode", "balanced")
+        compression = (budget_mode == "cheap")
+        system_prompt_style = "minimal" if budget_mode == "cheap" else "standard"
+        max_tokens = min(est_output + 200, 4096)
+        return OptimizationResult(
+            selected_model=decision.model_id,
+            provider=decision.provider,
+            estimated_cost=decision.estimated_cost,
+            estimated_input_tokens=est_input,
+            estimated_output_tokens=est_output,
+            max_tokens=max_tokens,
+            compression_enabled=compression,
+            system_prompt_style=system_prompt_style,
+            rationale=rationale,
+            fallback_model=decision.fallback_model_id,
+            objective_score=1.0 - decision.utility_score,  # invert: lower is better (V1 convention)
+        )

llmopt/db/models.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from sqlalchemy import Column, Integer, String, DateTime, ForeignKey, Float
+from sqlalchemy.orm import relationship
+from datetime import datetime
+from llmopt.db.session import Base
+class User(Base):
+    __tablename__ = "users"
+    id = Column(Integer, primary_key=True, index=True)
+    email = Column(String, unique=True, index=True)
+    hashed_password = Column(String)
+    created_at = Column(DateTime, default=datetime.utcnow)
+    api_keys = relationship("UserAPIKey", back_populates="user", cascade="all, delete-orphan")
+    generation_logs = relationship("GenerationLog", back_populates="user", cascade="all, delete-orphan")
+class UserAPIKey(Base):
+    __tablename__ = "user_api_keys"
+    id = Column(Integer, primary_key=True, index=True)
+    user_id = Column(Integer, ForeignKey("users.id"))
+    provider = Column(String, index=True)
+    encrypted_key = Column(String)
+    user = relationship("User", back_populates="api_keys")
+class GenerationLog(Base):
+    __tablename__ = "generation_logs"
+    id = Column(Integer, primary_key=True, index=True)
+    user_id = Column(Integer, ForeignKey("users.id"), nullable=True)
+    query = Column(String)
+    response = Column(String, nullable=True)
+    model_used = Column(String)
+    provider = Column(String)
+    input_tokens = Column(Integer)
+    output_tokens = Column(Integer)
+    total_tokens = Column(Integer)
+    estimated_cost = Column(Float)
+    tokens_saved = Column(Integer)
+    cost_saved = Column(Float)
+    latency_ms = Column(Float)
+    complexity_score = Column(Float)
+    complexity_tier = Column(String)
+    created_at = Column(DateTime, default=datetime.utcnow)
+    user = relationship("User", back_populates="generation_logs")

llmopt/db/session.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import os
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker, declarative_base
+# Default to local SQLite if DATABASE_URL is not set
+SQLALCHEMY_DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./llmopt.db")
+# For SQLite, we need connect_args={"check_same_thread": False}
+if SQLALCHEMY_DATABASE_URL.startswith("sqlite"):
+    engine = create_engine(
+        SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False}
+    )
+else:
+    # For Postgres (e.g. Neon, Supabase)
+    # SQLAlchemy requires `postgresql://` instead of `postgres://`
+    if SQLALCHEMY_DATABASE_URL.startswith("postgres://"):
+        SQLALCHEMY_DATABASE_URL = SQLALCHEMY_DATABASE_URL.replace("postgres://", "postgresql://", 1)
+    engine = create_engine(
+        SQLALCHEMY_DATABASE_URL,
+        pool_pre_ping=True,
+        pool_recycle=300,
+    )
+SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+Base = declarative_base()
+# Dependency for FastAPI
+def get_db():
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()

llmopt/engine/__init__.py CHANGED Viewed

	@@ -0,0 +1,15 @@

+"""LLMOpt engine package."""
+# V1 (legacy, kept for compatibility)
+from llmopt.engine.optimization_engine import OptimizationEngine, OptimizationResult, UserConstraints
+# V2 — utility-based routing
+from llmopt.engine.utility_engine import UtilityOptimizationEngine, RoutingDecision, QueryUtilityProfile
+from llmopt.engine.llmopt_engine import LLMOptEngine
+__all__ = [
+    # V1
+    "OptimizationEngine", "OptimizationResult", "UserConstraints",
+    # V2
+    "UtilityOptimizationEngine", "RoutingDecision", "QueryUtilityProfile",
+    "LLMOptEngine",
+]

llmopt/engine/llmopt_engine.py ADDED Viewed

	@@ -0,0 +1,275 @@

+"""
+LLMOpt — Main Facade (V2 Engine Entry Point)
+=============================================
+Wires the utility engine into the existing LLMOpt pipeline.
+Replaces: OptimizationEngine
+Keeps intact: QueryAnalyzer, ComplexityEstimator, SemanticCache,
+              PromptOptimizer, ModelRouter, LLMJudge
+Usage (identical to old OptimizationEngine interface):
+------------------------------------------------------
+    from llmopt.engine.llmopt_engine import LLMOptEngine
+    engine = LLMOptEngine(
+        available_keys={
+            "openai":    "sk-...",
+            "anthropic": "sk-ant-...",
+        }
+    )
+    decision = engine.route(query_features, budget_mode="balanced")
+    # decision.model_id        → "claude-sonnet-4-5"
+    # decision.provider        → "anthropic"
+    # decision.utility_score   → 0.8241
+    # decision.estimated_cost  → 0.00312
+    # decision.explanation     → {...full reasoning...}
+    # decision.fallback_model_id → "gpt-4.1-mini"
+    # After getting a response, record outcome for adaptive updating:
+    engine.record_outcome(
+        model_id="claude-sonnet-4-5",
+        latency_ms=1340,
+        success=True,
+        quality_score=8.5,
+        cost_usd=0.00312,
+    )
+"""
+import logging
+import os
+from typing import Optional
+from llmopt.registry.hybrid_updater import HybridRegistryUpdater
+from llmopt.engine.utility_engine import UtilityOptimizationEngine, RoutingDecision
+from llmopt.updater.adaptive_updater import AdaptiveRuntimeUpdater
+logger = logging.getLogger(__name__)
+class LLMOptEngine:
+    """
+    Main entry point for the utility-based routing engine.
+    Pipeline position: slots in between QueryAnalyzer/ComplexityEstimator
+    and PromptOptimizer/ModelRouter — identical interface to old OptimizationEngine.
+    Args:
+        available_keys:       Dict of provider → API key. Only provided keys are routed to.
+        openrouter_api_key:   Optional OpenRouter key for live pricing patches.
+        include_ollama:       Whether to include local Ollama as a routing option.
+        log_level:            Logging verbosity.
+    """
+    def __init__(
+        self,
+        available_keys:      Optional[dict] = None,
+        openrouter_api_key:  Optional[str]  = None,
+        include_ollama:      bool = True,
+        log_level:           int  = logging.WARNING,
+    ):
+        logging.basicConfig(level=log_level)
+        # Resolve API keys: constructor args > environment variables
+        resolved_keys = self._resolve_keys(available_keys or {})
+        # Layer 1: Registry (baseline JSON + live OpenRouter patch)
+        or_key = openrouter_api_key or os.getenv("OPENROUTER_API_KEY", "")
+        self._registry = HybridRegistryUpdater(openrouter_api_key=or_key)
+        # Layer 2: Utility Engine (BYOK-aware routing)
+        self._engine = UtilityOptimizationEngine(
+            registry_updater=self._registry,
+            available_keys=resolved_keys,
+            include_ollama=include_ollama,
+        )
+        # Layer 3: Adaptive Runtime Stats (EMA-based, no RL)
+        self._runtime = AdaptiveRuntimeUpdater()
+        logger.info(
+            f"[LLMOptEngine] Initialized. "
+            f"Providers: {list(resolved_keys.keys())}"
+        )
+    # ── Main routing method — drop-in for old OptimizationEngine ─────────────
+    def route(
+        self,
+        query_features,
+        budget_mode:            str   = "balanced",
+        constraints:            dict  = None,
+        force_refresh_registry: bool  = False,
+    ) -> RoutingDecision:
+        """
+        Route a query to the best available model.
+        Args:
+            query_features:  QueryFeatures dataclass (from QueryAnalyzer) or dict.
+            budget_mode:     "cheap" | "balanced" | "quality"
+            constraints:     Optional hard overrides. See UtilityOptimizationEngine.route().
+            force_refresh_registry: Force live registry refresh from OpenRouter.
+        Returns:
+            RoutingDecision — same fields as old OptimizationEngine output.
+        """
+        decision = self._engine.route(
+            query_features=query_features,
+            budget_mode=budget_mode,
+            constraints=constraints or {},
+            force_refresh_registry=force_refresh_registry,
+        )
+        # Inject runtime adjustment into utility score
+        adj = self._runtime.get_utility_adjustment(decision.model_id)
+        if adj != 0.0:
+            decision.utility_score = round(decision.utility_score + adj, 4)
+            decision.explanation["runtime_adjustment"] = adj
+            logger.debug(
+                f"[LLMOptEngine] Runtime adj for {decision.model_id}: {adj:+.4f}"
+            )
+        # Override latency estimate if we have runtime data
+        runtime_lat = self._runtime.get_latency_estimate(decision.model_id)
+        if runtime_lat:
+            decision.explanation["observed_latency_ms"] = round(runtime_lat, 0)
+        return decision
+    # ── Outcome recording — call after each LLM API response ─────────────────
+    def record_outcome(
+        self,
+        model_id:      str,
+        latency_ms:    Optional[float] = None,
+        success:       bool = True,
+        quality_score: Optional[float] = None,
+        cost_usd:      Optional[float] = None,
+    ):
+        """
+        Record the outcome of a routing decision for adaptive updating.
+        Call this after the LLM API call completes (in ModelRouter or the main
+        generate() method). quality_score comes from LLMJudge if evaluate=True.
+        Args:
+            model_id:      The model that was used.
+            latency_ms:    Actual end-to-end latency.
+            success:       Whether the API call succeeded.
+            quality_score: Optional 1–10 quality score from LLMJudge.
+            cost_usd:      Actual cost of the request.
+        """
+        self._runtime.record_outcome(
+            model_id=model_id,
+            latency_ms=latency_ms,
+            success=success,
+            quality_score=quality_score,
+            cost_usd=cost_usd,
+        )
+    # ── Key management ────────────────────────────────────────────────────────
+    def update_keys(self, keys: dict):
+        """
+        Update available API keys mid-session.
+        Use this when keys are passed per-request (BYOK REST API mode).
+        """
+        resolved = self._resolve_keys(keys)
+        self._engine.update_available_keys(resolved)
+    # ── Observability ─────────────────────────────────────────────────────────
+    def explain(
+        self,
+        query_features,
+        budget_mode: str = "balanced",
+        constraints: dict = None,
+    ) -> str:
+        """
+        Dry-run routing — returns formatted explanation without making an API call.
+        Drop-in replacement for old client.explain() method.
+        """
+        decision = self.route(query_features, budget_mode, constraints)
+        return self._format_explanation(decision)
+    def get_registry_info(self) -> dict:
+        """Returns registry runtime metadata."""
+        return self._registry.get_registry().get("_runtime_meta", {})
+    def get_model_stats(self, model_id: str) -> dict:
+        """Returns runtime stats for a specific model."""
+        return self._runtime.get_stats_summary(model_id)
+    def save_runtime_stats(self):
+        """Persist runtime stats to disk (call on shutdown)."""
+        self._runtime.save()
+    # ── Internal helpers ──────────────────────────────────────────────────────
+    @staticmethod
+    def _resolve_keys(keys: dict) -> dict:
+        """
+        Merge provided keys with environment variables.
+        Provided keys take precedence over env vars.
+        """
+        env_map = {
+            "openai":    "OPENAI_API_KEY",
+            "anthropic": "ANTHROPIC_API_KEY",
+            "google":    "GEMINI_API_KEY",
+            "mistral":   "MISTRAL_API_KEY",
+            "deepseek":  "DEEPSEEK_API_KEY",
+        }
+        resolved = {}
+        for provider, env_var in env_map.items():
+            # Explicit key takes priority; fall back to env
+            val = keys.get(provider) or os.getenv(env_var, "")
+            if val and not val.startswith("your_") and val != env_var:
+                resolved[provider] = val
+        # Pass through any extra keys provided (custom providers)
+        for k, v in keys.items():
+            if k not in resolved and v and not str(v).startswith("your_"):
+                resolved[k] = v
+        return resolved
+    @staticmethod
+    def _format_explanation(decision: RoutingDecision) -> str:
+        """Formats a RoutingDecision as a human-readable explanation string."""
+        ex = decision.explanation
+        dims = ex.get("query_dimensions", {})
+        shortlist = ex.get("shortlist", [])
+        lines = [
+            "=" * 55,
+            "LLMOpt — Routing Decision",
+            "=" * 55,
+            f"Selected model   : {decision.model_id} ({decision.provider})",
+            f"Utility score    : {decision.utility_score:.4f}",
+            f"Estimated cost   : ${decision.estimated_cost:.6f}",
+            f"Budget mode      : {ex.get('budget_mode')} (λ={ex.get('lambda')})",
+            f"Primary domain   : {ex.get('primary_domain')}",
+            "",
+            "Query dimensions (active weights):",
+        ]
+        for dim, weight in dims.items():
+            lines.append(f"  {dim:<25} weight={weight:.2f}")
+        if shortlist:
+            lines += ["", "Top candidates:"]
+            for item in shortlist:
+                lines.append(
+                    f"  {item['model_id']:<35} U={item['utility_score']:.4f}  "
+                    f"cap={item['capability']:.3f}  ${item['est_cost_usd']:.6f}"
+                )
+        if decision.fallback_model_id:
+            lines.append(f"\nFallback model   : {decision.fallback_model_id}")
+        adj = ex.get("runtime_adjustment")
+        if adj:
+            lines.append(f"Runtime adj      : {adj:+.4f} (from observed outcomes)")
+        lines.append("=" * 55)
+        return "\n".join(lines)

llmopt/engine/optimization_engine.py CHANGED Viewed

@@ -240,12 +240,19 @@ class OptimizationEngine:
         complexity: ComplexityResult,
         output_length_bucket: str,
         constraints: Optional[UserConstraints] = None,
     ) -> OptimizationResult:
         if constraints is None:
             constraints = UserConstraints()
-        α, β, γ = self.bayes.get_weights(constraints.budget_mode)
-        logger.debug(f"Using weights α={α:.3f} β={β:.3f} γ={γ:.3f} for mode '{constraints.budget_mode}'")
         # --- 1. Build candidate set ---
         candidates = self.registry.capable_of(

         complexity: ComplexityResult,
         output_length_bucket: str,
         constraints: Optional[UserConstraints] = None,
+        alpha: Optional[float] = None,
+        beta: Optional[float] = None,
+        gamma: Optional[float] = None,
     ) -> OptimizationResult:
         if constraints is None:
             constraints = UserConstraints()
+        if alpha is not None and beta is not None and gamma is not None:
+            α, β, γ = alpha, beta, gamma
+            logger.debug(f"Using custom weights α={α:.3f} β={β:.3f} γ={γ:.3f}")
+        else:
+            α, β, γ = self.bayes.get_weights(constraints.budget_mode)
+            logger.debug(f"Using weights α={α:.3f} β={β:.3f} γ={γ:.3f} for mode '{constraints.budget_mode}'")
         # --- 1. Build candidate set ---
         candidates = self.registry.capable_of(

llmopt/engine/utility_engine.py ADDED Viewed

	@@ -0,0 +1,665 @@

+"""
+LLMOpt — Utility-Based Optimization Engine (V2 Drop-in Replacement)
+====================================================================
+Replaces the old J(x) = α·Cost + β·Tokens − γ·Quality complexity-routing engine.
+NEW APPROACH — Utility-Constrained Routing:
+--------------------------------------------
+Instead of routing on query complexity alone, the engine:
+  1. Resolves available models from the user's actual API keys (BYOK)
+  2. Applies hard constraints (context window, required features, cost cap)
+  3. Builds a query utility profile — what dimensions matter FOR THIS QUERY
+  4. Scores each candidate: U(m, q) = Σ wᵢ · capabilityᵢ(m) − λ · cost_norm(m)
+     where weights wᵢ come from the query profile, not global defaults
+  5. Returns the best model + fallback + full explanation
+Key differences from old engine:
+  - Routing is driven by WHAT THE QUERY NEEDS, not a global complexity score
+  - Only models with available API keys are considered (BYOK)
+  - Weights are query-derived, not budget-mode static
+  - Budget mode adjusts λ (cost penalty), not the capability weights
+  - No Bayesian/Optuna dependency — deterministic, debuggable, stable
+Drop-in interface:
+  engine = UtilityOptimizationEngine(available_keys={"openai": "sk-...", ...})
+  result = engine.route(query_features, budget_mode="balanced", constraints={})
+  # result.model_id, result.score, result.explanation, result.fallback_model_id
+"""
+import math
+import logging
+from dataclasses import dataclass, field
+from typing import Optional
+logger = logging.getLogger(__name__)
+# ── Data contracts (same shape as old engine output) ─────────────────────────
+@dataclass
+class QueryUtilityProfile:
+    """
+    What this query actually needs — extracted from QueryFeatures.
+    Each weight ∈ [0.0, 1.0] indicating how important that dimension is.
+    Weights do NOT need to sum to 1 — they're importance signals, not probabilities.
+    """
+    # Capability dimension weights
+    reasoning:            float = 0.0
+    coding:               float = 0.0
+    math:                 float = 0.0
+    creativity:           float = 0.0
+    factuality:           float = 0.0
+    instruction_following: float = 0.5   # always baseline important
+    long_context:         float = 0.0
+    multilingual:         float = 0.0
+    tool_use:             float = 0.0
+    summarization:        float = 0.0
+    conversation:         float = 0.0
+    # Hard requirements (boolean)
+    requires_tool_calling:  bool = False
+    requires_image_input:   bool = False
+    requires_json_mode:     bool = False
+    min_context_tokens:     int  = 0
+    # Estimated token budget for this query
+    estimated_input_tokens:  int = 500
+    estimated_output_tokens: int = 500
+    # Primary domain label (for logging/explainability)
+    primary_domain: str = "general"
+@dataclass
+class RoutingDecision:
+    """
+    Output of the engine — same fields the old OptimizationEngine returned,
+    plus richer explanation. Drop-in compatible.
+    """
+    model_id:          str
+    provider:          str
+    utility_score:     float          # U(m,q) — higher is better
+    estimated_cost:    float          # USD for this request
+    input_cost_per_1k: float
+    output_cost_per_1k: float
+    context_window:    int
+    fallback_model_id: Optional[str] = None
+    fallback_provider: Optional[str] = None
+    # Explainability — replaces old "rationale" string
+    explanation: dict = field(default_factory=dict)
+    # Mirrors old engine fields for pipeline compatibility
+    capability_score:  float = 0.0   # overall capability of selected model
+    complexity_score:  float = 0.0   # pass-through from QueryFeatures if available
+    tokens_saved:      int   = 0
+    compression_ratio: float = 0.0
+# ── Budget mode → cost penalty λ ─────────────────────────────────────────────
+BUDGET_LAMBDA = {
+    # λ scales how much cost penalizes utility score
+    # higher λ = cost matters more = cheaper models win more often
+    "cheap":    3.0,
+    "balanced": 1.2,
+    "quality":  0.3,
+}
+# Minimum acceptable utility score for a model to be considered
+# (filters out wildly incapable models even if they're the only ones available)
+MIN_UTILITY_THRESHOLD = 0.25
+# ── Core Engine ───────────────────────────────────────────────────────────────
+class UtilityOptimizationEngine:
+    """
+    Drop-in replacement for the old OptimizationEngine.
+    Instantiate once per request session (or per server lifecycle if keys are static).
+    Keys can be updated between requests via update_available_keys().
+    """
+    def __init__(
+        self,
+        registry_updater,           # HybridRegistryUpdater instance
+        available_keys: dict,       # {"openai": "sk-...", "anthropic": "sk-ant-..."}
+        include_ollama: bool = True  # whether local Ollama counts as available
+    ):
+        self._updater = registry_updater
+        self._available_keys = {}
+        self._include_ollama = include_ollama
+        self.update_available_keys(available_keys)
+    def update_available_keys(self, keys: dict):
+        """
+        Call this when user's API keys change.
+        keys format: {"openai": "sk-...", "anthropic": "...", "ollama": "local", ...}
+        Providers with empty/None values are treated as unavailable.
+        Ollama is included only if:
+          - "ollama" key is explicitly in keys dict, OR
+          - include_ollama=True AND keys dict is empty (no cloud keys at all)
+        This prevents Ollama from silently dominating routing when the user
+        only provided cloud API keys.
+        """
+        self._available_keys = {
+            provider.lower(): key
+            for provider, key in keys.items()
+            if key and str(key).strip()
+        }
+        # Include Ollama only when explicitly requested or as last-resort fallback
+        ollama_explicitly_set = "ollama" in {k.lower() for k in keys}
+        no_cloud_keys = not any(
+            p in self._available_keys
+            for p in ("openai", "anthropic", "google", "mistral", "deepseek")
+        )
+        if self._include_ollama and (ollama_explicitly_set or no_cloud_keys):
+            self._available_keys.setdefault("ollama", "__local__")
+        logger.info(f"[Engine] Available providers: {list(self._available_keys.keys())}")
+    # ── Main routing method ───────────────────────────────────────────────────
+    def route(
+        self,
+        query_features,          # QueryFeatures dataclass from QueryAnalyzer
+        budget_mode: str = "balanced",
+        constraints: dict = None,
+        force_refresh_registry: bool = False,
+    ) -> RoutingDecision:
+        """
+        Route a query to the best available model.
+        Args:
+            query_features:  Output of QueryAnalyzer (QueryFeatures dataclass or dict)
+            budget_mode:     "cheap" | "balanced" | "quality"
+            constraints:     Optional hard overrides:
+                               max_cost_per_request: float (USD)
+                               max_latency_ms: int
+                               min_context_tokens: int
+                               exclude_providers: list[str]
+                               only_providers: list[str]
+                               require_tool_calling: bool
+                               require_image_input: bool
+                               require_json_mode: bool
+            force_refresh_registry: Force live registry refresh
+        Returns:
+            RoutingDecision (drop-in compatible with old engine output)
+        """
+        constraints = constraints or {}
+        budget_mode = budget_mode if budget_mode in BUDGET_LAMBDA else "balanced"
+        # 1. Get merged registry (baseline + live patch)
+        registry = self._updater.get_registry(force_refresh=force_refresh_registry)
+        all_models = registry.get("models", {})
+        # 2. Build query utility profile from query features
+        profile = self._build_utility_profile(query_features, constraints)
+        # 3. Resolve available candidate pool (BYOK filter)
+        candidates = self._filter_by_availability(all_models, constraints)
+        if not candidates:
+            raise RuntimeError(
+                "No models available. Please provide at least one valid API key "
+                "(OpenAI, Anthropic, Google, Mistral, DeepSeek) or run Ollama locally."
+            )
+        # 4. Apply hard constraints (context window, features, cost cap)
+        viable = self._apply_hard_constraints(candidates, profile, constraints)
+        if not viable:
+            # Relax hard constraints partially — fall back to best available
+            logger.warning(
+                "[Engine] No models passed hard constraints. "
+                "Relaxing cost/latency caps and retrying."
+            )
+            relaxed_constraints = {
+                k: v for k, v in constraints.items()
+                if k not in ("max_cost_per_request", "max_latency_ms")
+            }
+            viable = self._apply_hard_constraints(candidates, profile, relaxed_constraints)
+        if not viable:
+            # Last resort: use all available candidates
+            logger.warning("[Engine] Using all available candidates (no constraints).")
+            viable = candidates
+        # 5. Score each viable model by utility
+        scored = self._score_candidates(viable, profile, budget_mode)
+        if not scored:
+            raise RuntimeError("Scoring produced no results. Check model registry integrity.")
+        # Sort: highest utility first
+        scored.sort(key=lambda x: x[1], reverse=True)
+        best_id,    best_score    = scored[0]
+        best_spec = viable[best_id]
+        fallback_id   = None
+        fallback_prov = None
+        if len(scored) > 1:
+            fallback_id   = scored[1][0]
+            fallback_prov = viable[fallback_id]["provider"]
+        # 6. Estimate request cost
+        est_cost = self._estimate_cost(best_spec, profile)
+        # 7. Build explanation
+        explanation = self._build_explanation(
+            scored, viable, profile, budget_mode, constraints, best_id
+        )
+        return RoutingDecision(
+            model_id=best_id,
+            provider=best_spec["provider"],
+            utility_score=round(best_score, 4),
+            estimated_cost=round(est_cost, 8),
+            input_cost_per_1k=best_spec["input_cost_per_1k"],
+            output_cost_per_1k=best_spec["output_cost_per_1k"],
+            context_window=best_spec["context_window"],
+            fallback_model_id=fallback_id,
+            fallback_provider=fallback_prov,
+            explanation=explanation,
+            capability_score=self._overall_capability(best_spec),
+        )
+    # ── Step 2: Build Query Utility Profile ───────────────────────────────────
+    def _build_utility_profile(self, qf, constraints: dict) -> QueryUtilityProfile:
+        """
+        Convert QueryFeatures → QueryUtilityProfile.
+        Works with both QueryFeatures dataclass and plain dict.
+        Uses domain flags to derive per-dimension importance weights.
+        """
+        # Normalize input — support both dataclass and dict
+        def g(attr, default=False):
+            if isinstance(qf, dict):
+                return qf.get(attr, default)
+            return getattr(qf, attr, default)
+        profile = QueryUtilityProfile()
+        # ── Dimension weights from domain flags ──────────────────────────────
+        # These are NOT boolean — they express HOW IMPORTANT each dim is.
+        # Multiple domains can be active simultaneously.
+        if g("domain_reasoning") or g("requires_analysis") or g("requires_debate"):
+            profile.reasoning = 0.85
+            profile.factuality = 0.70
+        if g("domain_code") or g("domain_coding"):
+            profile.coding = 0.90
+            profile.reasoning = max(profile.reasoning, 0.60)
+            profile.instruction_following = max(profile.instruction_following, 0.70)
+        if g("domain_math"):
+            profile.math = 0.90
+            profile.reasoning = max(profile.reasoning, 0.70)
+        if g("domain_creative") or g("domain_creative_writing"):
+            profile.creativity = 0.88
+            profile.instruction_following = max(profile.instruction_following, 0.60)
+        if g("domain_factual") or g("domain_science"):
+            profile.factuality = max(profile.factuality, 0.80)
+            profile.reasoning = max(profile.reasoning, 0.55)
+        if g("domain_summarization"):
+            profile.summarization = 0.85
+            profile.long_context = 0.60
+        if g("domain_translation") or g("domain_multilingual"):
+            profile.multilingual = 0.90
+            profile.factuality = max(profile.factuality, 0.60)
+        if g("domain_conversational") or g("domain_factual"):
+            profile.conversation = 0.70
+        # Multi-step / complex reasoning boost
+        if g("multi_step") or g("requires_comparison"):
+            profile.reasoning = min(1.0, profile.reasoning + 0.15)
+        # Expert-level signal — raise the bar on all active dimensions
+        if g("_expert_signal") or g("expert_signal"):
+            for dim in ["reasoning", "coding", "math"]:
+                val = getattr(profile, dim)
+                if val > 0:
+                    setattr(profile, dim, min(1.0, val + 0.10))
+        # Tool use requirement
+        if g("requires_tool_use") or g("has_tool_calls") or constraints.get("require_tool_calling"):
+            profile.tool_use = 0.80
+            profile.requires_tool_calling = True
+        # Image input requirement
+        if g("has_image") or constraints.get("require_image_input"):
+            profile.requires_image_input = True
+        # JSON mode requirement
+        if g("requires_json") or constraints.get("require_json_mode"):
+            profile.requires_json_mode = True
+        # Context window requirement
+        token_count = g("token_count", 0)
+        min_ctx = constraints.get("min_context_tokens", 0)
+        profile.min_context_tokens = max(
+            int(min_ctx),
+            int(token_count * 3)   # conservative: input tokens × 3 headroom
+        )
+        profile.estimated_input_tokens = max(int(token_count), 100)
+        # Estimate output length
+        output_len_map = {
+            "short":     300,
+            "medium":    700,
+            "long":      1500,
+            "very_long": 3000,
+        }
+        est_output = g("estimated_output_length", "medium")
+        profile.estimated_output_tokens = output_len_map.get(
+            str(est_output).lower(), 700
+        )
+        # Primary domain label
+        domain_priority = [
+            ("domain_code", "coding"),
+            ("domain_coding", "coding"),
+            ("domain_math", "math"),
+            ("domain_reasoning", "reasoning"),
+            ("domain_creative", "creative"),
+            ("domain_science", "science"),
+            ("domain_summarization", "summarization"),
+            ("domain_translation", "translation"),
+            ("domain_factual", "factual"),
+        ]
+        for flag, label in domain_priority:
+            if g(flag):
+                profile.primary_domain = label
+                break
+        return profile
+    # ── Step 3: BYOK Provider Filter ─────────────────────────────────────────
+    def _filter_by_availability(self, all_models: dict, constraints: dict) -> dict:
+        """
+        Filter models to only those whose provider has an available API key.
+        Respects:
+          - available_keys (BYOK)
+          - constraints["exclude_providers"]
+          - constraints["only_providers"]
+        """
+        exclude = {p.lower() for p in constraints.get("exclude_providers", [])}
+        only    = {p.lower() for p in constraints.get("only_providers", [])} \
+                  if constraints.get("only_providers") else None
+        available = {}
+        for mid, spec in all_models.items():
+            provider = spec.get("provider", "").lower()
+            # Must have a key for this provider
+            if provider not in self._available_keys:
+                continue
+            # Respect exclude list
+            if provider in exclude:
+                continue
+            # Respect only list
+            if only and provider not in only:
+                continue
+            available[mid] = spec
+        logger.debug(
+            f"[Engine] Available candidate pool: {len(available)} models "
+            f"from providers: {set(s['provider'] for s in available.values())}"
+        )
+        return available
+    # ── Step 4: Hard Constraints Filter ──────────────────────────────────────
+    def _apply_hard_constraints(
+        self, candidates: dict, profile: QueryUtilityProfile, constraints: dict
+    ) -> dict:
+        """
+        Filter candidates by hard constraints that are non-negotiable.
+        Returns a potentially empty dict — caller handles the empty case.
+        """
+        viable = {}
+        max_cost = constraints.get("max_cost_per_request")  # USD
+        max_latency = constraints.get("max_latency_ms")      # ms
+        for mid, spec in candidates.items():
+            # Context window check
+            if spec.get("context_window", 0) < profile.min_context_tokens:
+                logger.debug(f"[Filter] {mid}: context too small "
+                             f"({spec['context_window']} < {profile.min_context_tokens})")
+                continue
+            # Feature: tool calling
+            if profile.requires_tool_calling and not spec.get("features", {}).get("tool_calling"):
+                logger.debug(f"[Filter] {mid}: no tool_calling support")
+                continue
+            # Feature: image input
+            if profile.requires_image_input and not spec.get("features", {}).get("image_input"):
+                logger.debug(f"[Filter] {mid}: no image_input support")
+                continue
+            # Feature: json mode
+            if profile.requires_json_mode and not spec.get("features", {}).get("json_mode"):
+                logger.debug(f"[Filter] {mid}: no json_mode support")
+                continue
+            # Cost cap
+            if max_cost is not None:
+                est = self._estimate_cost(spec, profile)
+                if est > max_cost:
+                    logger.debug(f"[Filter] {mid}: cost {est:.6f} > cap {max_cost}")
+                    continue
+            # Latency cap
+            if max_latency is not None:
+                if spec.get("avg_latency_ms", 99999) > max_latency:
+                    logger.debug(f"[Filter] {mid}: latency too high")
+                    continue
+            viable[mid] = spec
+        return viable
+    # ── Step 5: Utility Scoring ───────────────────────────────────────────────
+    def _score_candidates(
+        self, candidates: dict, profile: QueryUtilityProfile, budget_mode: str
+    ) -> list:
+        """
+        Score each candidate model with:
+          U(m, q) = (Σ wᵢ · capᵢ(m)) / (Σ wᵢ)  −  λ · cost_norm(m)
+        where:
+          wᵢ       = importance weight for capability dimension i (from profile)
+          capᵢ(m)  = model m's score on dimension i (0–1, from registry)
+          λ        = budget penalty (from BUDGET_LAMBDA)
+          cost_norm = model's estimated request cost normalized across candidates
+        Returns list of (model_id, utility_score) tuples.
+        """
+        lam = BUDGET_LAMBDA[budget_mode]
+        # Dimension weights from profile
+        dimension_weights = {
+            "reasoning":             profile.reasoning,
+            "coding":                profile.coding,
+            "math":                  profile.math,
+            "creativity":            profile.creativity,
+            "factuality":            profile.factuality,
+            "instruction_following": profile.instruction_following,
+            "long_context":          profile.long_context,
+            "multilingual":          profile.multilingual,
+            "tool_use":              profile.tool_use,
+            "summarization":         profile.summarization,
+            "conversation":          profile.conversation,
+        }
+        # Only keep dimensions with non-zero weight
+        active_dims = {k: w for k, w in dimension_weights.items() if w > 0}
+        total_weight = sum(active_dims.values())
+        if total_weight == 0:
+            # Pathological case: no signals — use instruction_following as baseline
+            active_dims = {"instruction_following": 1.0, "conversation": 0.5}
+            total_weight = 1.5
+        # Compute raw costs for normalization
+        costs = {
+            mid: self._estimate_cost(spec, profile)
+            for mid, spec in candidates.items()
+        }
+        # Log-scale normalization: separates $0.0001 from $0.003 from $0.020
+        # meaningfully — linear scale collapses these differences when one
+        # expensive model anchors the range.
+        # Free models (Ollama, cost=0) stay at cost_norm=0.
+        LOG_EPS = 1e-7   # prevents log(0); smaller than any real API cost
+        log_costs = {mid: math.log(c + LOG_EPS) for mid, c in costs.items()}
+        log_max = max(log_costs.values())
+        log_min = min(log_costs.values())
+        log_range = max(log_max - log_min, 1e-9)
+        scored = []
+        for mid, spec in candidates.items():
+            caps = spec.get("capabilities", {})
+            # Weighted capability sum
+            cap_sum = sum(
+                w * caps.get(dim, 0.0)
+                for dim, w in active_dims.items()
+            )
+            cap_score = cap_sum / total_weight   # normalized to [0, 1]
+            # Cost normalization on log scale (0 = cheapest, 1 = most expensive)
+            cost_norm = (log_costs[mid] - log_min) / log_range
+            # Final utility
+            utility = cap_score - (lam * cost_norm) / (1 + lam)
+            # The division by (1+lam) prevents λ from pushing utility below 0
+            # for genuinely capable but expensive models
+            # Provider-tier adjustment:
+            # Ollama (free, local) is great for "cheap" mode but should not
+            # dominate "balanced" or "quality" modes — local inference has
+            # higher latency variance and lower reliability than cloud APIs.
+            provider = spec.get("provider", "")
+            if provider == "ollama":
+                # At cheap(λ=3): penalty≈0.0 | balanced(λ=1.2): ≈0.04 | quality(λ=0.3): ≈0.10
+                ollama_penalty = 0.12 / (1 + lam)
+                utility -= ollama_penalty
+            if utility >= MIN_UTILITY_THRESHOLD or len(candidates) <= 2:
+                scored.append((mid, utility))
+            logger.debug(
+                f"[Score] {mid}: cap={cap_score:.3f} cost_norm={cost_norm:.3f} "
+                f"U={utility:.4f} (λ={lam})"
+            )
+        return scored
+    # ── Helpers ───────────────────────────────────────────────────────────────
+    def _estimate_cost(self, spec: dict, profile: QueryUtilityProfile) -> float:
+        """Estimate USD cost for one request with this model."""
+        in_cost  = spec.get("input_cost_per_1k", 0)  * profile.estimated_input_tokens  / 1000
+        out_cost = spec.get("output_cost_per_1k", 0) * profile.estimated_output_tokens / 1000
+        return in_cost + out_cost
+    def _overall_capability(self, spec: dict) -> float:
+        """Single overall capability score for a model (for legacy field compatibility)."""
+        caps = spec.get("capabilities", {})
+        weights = {"reasoning": 0.30, "coding": 0.25, "math": 0.15,
+                   "instruction_following": 0.15, "factuality": 0.15}
+        return round(
+            sum(caps.get(k, 0) * w for k, w in weights.items()), 4
+        )
+    def _build_explanation(
+        self,
+        scored: list,
+        viable: dict,
+        profile: QueryUtilityProfile,
+        budget_mode: str,
+        constraints: dict,
+        winner_id: str,
+    ) -> dict:
+        """Build the full explainability dict — replaces old rationale string."""
+        winner_spec = viable[winner_id]
+        # Top 4 capability dimensions that drove this decision
+        dim_weights = {
+            "reasoning":             profile.reasoning,
+            "coding":                profile.coding,
+            "math":                  profile.math,
+            "creativity":            profile.creativity,
+            "factuality":            profile.factuality,
+            "instruction_following": profile.instruction_following,
+            "long_context":          profile.long_context,
+            "multilingual":          profile.multilingual,
+            "tool_use":              profile.tool_use,
+            "summarization":         profile.summarization,
+            "conversation":          profile.conversation,
+        }
+        top_dims = sorted(
+            [(k, v) for k, v in dim_weights.items() if v > 0],
+            key=lambda x: x[1], reverse=True
+        )[:4]
+        # Shortlist with scores
+        shortlist = [
+            {
+                "model_id": mid,
+                "provider": viable[mid]["provider"],
+                "utility_score": round(score, 4),
+                "capability": self._overall_capability(viable[mid]),
+                "est_cost_usd": round(self._estimate_cost(viable[mid], profile), 8),
+            }
+            for mid, score in scored[:5]
+        ]
+        return {
+            "selected_model":  winner_id,
+            "provider":        winner_spec["provider"],
+            "budget_mode":     budget_mode,
+            "lambda":          BUDGET_LAMBDA[budget_mode],
+            "primary_domain":  profile.primary_domain,
+            "query_dimensions": {k: round(v, 2) for k, v in top_dims},
+            "hard_constraints_applied": {
+                k: v for k, v in constraints.items()
+                if k in ("max_cost_per_request", "max_latency_ms", "min_context_tokens")
+            },
+            "feature_requirements": {
+                "tool_calling":  profile.requires_tool_calling,
+                "image_input":   profile.requires_image_input,
+                "json_mode":     profile.requires_json_mode,
+                "min_context":   profile.min_context_tokens,
+            },
+            "estimated_tokens": {
+                "input":  profile.estimated_input_tokens,
+                "output": profile.estimated_output_tokens,
+            },
+            "shortlist":           shortlist,
+            "candidates_evaluated": len(scored),
+            "registry_source":     winner_spec.get("live_patch", {}).get("source", "baseline"),
+        }

llmopt/registry/__init__.py CHANGED Viewed

	@@ -0,0 +1,4 @@

+"""LLMOpt registry package — hybrid model registry."""
+from llmopt.registry.hybrid_updater import HybridRegistryUpdater
+__all__ = ["HybridRegistryUpdater"]

llmopt/registry/hybrid_updater.py ADDED Viewed

	@@ -0,0 +1,267 @@

+"""
+LLMOpt — Hybrid Registry Updater
+=================================
+Strategy:
+  1. Load data/model_registry_v2.json as the authoritative capability baseline
+     (benchmark scores, context windows, feature support)
+  2. Fetch live data from OpenRouter API to patch:
+     - Current pricing (input/output cost per 1k)
+     - Model availability (is it still listed?)
+     - Any new models to flag for manual addition
+  3. Merge: registry baseline + live patch → runtime model pool
+  4. Cache the merged result for TTL minutes to avoid hammering the API
+This runs at startup and on a background refresh cycle.
+"""
+import json
+import time
+import logging
+import os
+import copy
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Optional
+try:
+    import requests
+    REQUESTS_AVAILABLE = True
+except ImportError:
+    REQUESTS_AVAILABLE = False
+logger = logging.getLogger(__name__)
+# ── Constants ────────────────────────────────────────────────────────────────
+# V2 registry — benchmark-derived capability vectors
+REGISTRY_PATH = Path(__file__).parent.parent.parent / "data" / "model_registry_v2.json"
+OPENROUTER_MODELS_URL = "https://openrouter.ai/api/v1/models"
+CACHE_TTL_SECONDS = 1800  # 30 minutes — pricing changes infrequently
+# ── Provider → OpenRouter prefix map ─────────────────────────────────────────
+# OpenRouter model IDs follow the pattern: "provider/model-name"
+# This maps our registry provider names to OpenRouter's prefix scheme
+PROVIDER_PREFIX_MAP = {
+    "openai":    "openai/",
+    "anthropic": "anthropic/",
+    "google":    "google/",
+    "mistral":   "mistral/",
+    "deepseek":  "deepseek/",
+    "meta":      "meta-llama/",
+}
+# Maps our registry model_id → OpenRouter model id (where they differ)
+MODEL_ID_ALIASES = {
+    "gpt-4o":               "openai/gpt-4o",
+    "gpt-4o-mini":          "openai/gpt-4o-mini",
+    "gpt-4.1":              "openai/gpt-4.1",
+    "gpt-4.1-mini":         "openai/gpt-4.1-mini",
+    "claude-opus-4-5":      "anthropic/claude-opus-4-5",
+    "claude-sonnet-4-5":    "anthropic/claude-sonnet-4-5",
+    "claude-haiku-3-5":     "anthropic/claude-3-5-haiku",
+    "gemini-2.5-pro":       "google/gemini-2.5-pro",
+    "gemini-2.5-flash":     "google/gemini-2.5-flash",
+    "gemini-1.5-flash":     "google/gemini-2.5-flash",
+    "mistral-large-latest": "mistral/mistral-large",
+    "mistral-small-latest": "mistral/mistral-small",
+    "deepseek-chat":        "deepseek/deepseek-chat",
+    "deepseek-reasoner":    "deepseek/deepseek-r1",
+    # Ollama is local — no OpenRouter equivalent
+}
+# ── Main Updater Class ────────────────────────────────────────────────────────
+class HybridRegistryUpdater:
+    """
+    Loads the registry JSON baseline and patches it with live OpenRouter data.
+    Usage:
+        updater = HybridRegistryUpdater()
+        registry = updater.get_registry()   # always returns a valid registry
+    """
+    def __init__(self, openrouter_api_key: Optional[str] = None):
+        self._baseline: dict = {}
+        self._live_patch: dict = {}          # openrouter_model_id → pricing dict
+        self._merged: dict = {}              # final merged runtime registry
+        self._cache_timestamp: float = 0.0
+        self._openrouter_key = openrouter_api_key or os.getenv("OPENROUTER_API_KEY", "")
+        # Load baseline immediately (synchronous — always available)
+        self._load_baseline()
+    # ── Public API ────────────────────────────────────────────────────────────
+    def get_registry(self, force_refresh: bool = False) -> dict:
+        """
+        Returns the merged registry dict.
+        Refreshes live patch if cache is stale or force_refresh=True.
+        Falls back gracefully to baseline if live fetch fails.
+        """
+        now = time.time()
+        cache_expired = (now - self._cache_timestamp) > CACHE_TTL_SECONDS
+        if force_refresh or cache_expired or not self._merged:
+            self._refresh_live_patch()
+            self._build_merged()
+            self._cache_timestamp = now
+        return self._merged
+    def get_model(self, model_id: str) -> Optional[dict]:
+        """Returns a single model's merged spec, or None if not found."""
+        registry = self.get_registry()
+        return registry.get("models", {}).get(model_id)
+    def list_available_for_providers(self, available_providers: set) -> dict:
+        """
+        Returns only models whose provider is in available_providers.
+        'ollama' is always included if it's in the registry (local, no key needed unless specified).
+        """
+        registry = self.get_registry()
+        return {
+            mid: spec
+            for mid, spec in registry.get("models", {}).items()
+            if spec.get("provider") in available_providers
+        }
+    def get_last_updated(self) -> str:
+        return datetime.fromtimestamp(self._cache_timestamp, tz=timezone.utc).isoformat() \
+               if self._cache_timestamp else "never"
+    # ── Internal: Load Baseline ───────────────────────────────────────────────
+    def _load_baseline(self):
+        """Load registry JSON from disk. Dies loudly if missing — it's required."""
+        if not REGISTRY_PATH.exists():
+            raise FileNotFoundError(
+                f"Model registry not found at {REGISTRY_PATH}. "
+                "This file is required for LLMOpt to function."
+            )
+        with open(REGISTRY_PATH, "r") as f:
+            self._baseline = json.load(f)
+        logger.info(
+            f"[Registry] Loaded baseline: {len(self._baseline.get('models', {}))} models"
+        )
+    # ── Internal: Live Patch from OpenRouter ─────────────────────────────────
+    def _refresh_live_patch(self):
+        """
+        Fetch current model list + pricing from OpenRouter.
+        Stores results in self._live_patch keyed by openrouter model id.
+        Silently skips on any error — baseline is always the fallback.
+        """
+        if not REQUESTS_AVAILABLE:
+            logger.warning("[Registry] 'requests' not installed. Skipping live patch.")
+            return
+        headers = {"Content-Type": "application/json"}
+        if self._openrouter_key:
+            headers["Authorization"] = f"Bearer {self._openrouter_key}"
+        try:
+            resp = requests.get(
+                OPENROUTER_MODELS_URL,
+                headers=headers,
+                timeout=8
+            )
+            resp.raise_for_status()
+            data = resp.json()
+        except Exception as e:
+            logger.warning(f"[Registry] Live fetch failed: {e}. Using baseline only.")
+            return
+        patch = {}
+        for model in data.get("data", []):
+            model_id = model.get("id", "")
+            pricing = model.get("pricing", {})
+            # OpenRouter returns pricing as strings like "0.000002" per token
+            # We normalize to per-1k-token cost (float)
+            try:
+                input_per_token  = float(pricing.get("prompt", 0) or 0)
+                output_per_token = float(pricing.get("completion", 0) or 0)
+                input_per_1k  = round(input_per_token * 1000, 8)
+                output_per_1k = round(output_per_token * 1000, 8)
+            except (ValueError, TypeError):
+                continue
+            patch[model_id] = {
+                "input_cost_per_1k":  input_per_1k,
+                "output_cost_per_1k": output_per_1k,
+                "context_window":     model.get("context_length"),
+                "available_on_openrouter": True,
+                "fetched_at": datetime.now(timezone.utc).isoformat(),
+            }
+        self._live_patch = patch
+        logger.info(f"[Registry] Live patch: {len(patch)} models from OpenRouter")
+        # Flag new models we don't have in registry (for manual review)
+        known_or_ids = set(MODEL_ID_ALIASES.values())
+        for or_id in patch:
+            if or_id not in known_or_ids:
+                logger.debug(f"[Registry] Unknown OpenRouter model (not in registry): {or_id}")
+    # ── Internal: Build Merged Registry ──────────────────────────────────────
+    def _build_merged(self):
+        """
+        Merge baseline + live_patch into self._merged.
+        Merge rules:
+        - Capability scores:     always from baseline (benchmark-sourced, stable)
+        - Feature support:       always from baseline
+        - Pricing (cost/1k):     live_patch wins if available, else baseline
+        - Context window:        live_patch wins if non-null, else baseline
+        - live_patch metadata:   stored in model["live_patch"] for observability
+        """
+        merged = copy.deepcopy(self._baseline)
+        models = merged.get("models", {})
+        for our_model_id, spec in models.items():
+            # Find the OpenRouter ID for this model
+            or_id = MODEL_ID_ALIASES.get(our_model_id)
+            if or_id and or_id in self._live_patch:
+                patch = self._live_patch[or_id]
+                # Price override
+                if patch.get("input_cost_per_1k") is not None:
+                    spec["input_cost_per_1k"]  = patch["input_cost_per_1k"]
+                if patch.get("output_cost_per_1k") is not None:
+                    spec["output_cost_per_1k"] = patch["output_cost_per_1k"]
+                # Context window override (OpenRouter may have more accurate values)
+                if patch.get("context_window"):
+                    spec["context_window"] = patch["context_window"]
+                # Store patch metadata for explainability
+                spec["live_patch"] = {
+                    "source": "openrouter",
+                    "fetched_at": patch.get("fetched_at"),
+                    "input_cost_per_1k":  patch["input_cost_per_1k"],
+                    "output_cost_per_1k": patch["output_cost_per_1k"],
+                }
+            else:
+                spec["live_patch"] = {"source": "baseline_only"}
+        merged["_runtime_meta"] = {
+            "last_live_fetch": datetime.now(timezone.utc).isoformat(),
+            "live_models_patched": sum(
+                1 for s in models.values()
+                if s.get("live_patch", {}).get("source") == "openrouter"
+            ),
+            "total_models": len(models),
+        }
+        self._merged = merged
+        logger.info(
+            f"[Registry] Merged registry ready: "
+            f"{merged['_runtime_meta']['live_models_patched']} live-patched, "
+            f"{merged['_runtime_meta']['total_models']} total"
+        )

llmopt/router/model_router.py CHANGED Viewed

@@ -53,26 +53,44 @@ class RoutedResponse:
 # LiteLLM uses "provider/model" strings for non-OpenAI providers
 _LITELLM_MODEL_MAP = {
-    # OpenAI — no prefix needed
-    "gpt-4o": "gpt-4o",
-    "gpt-4o-mini": "gpt-4o-mini",
-    "gpt-3.5-turbo": "gpt-3.5-turbo",
-    # Anthropic
-    "claude-3-5-haiku-20241022": "claude-3-5-haiku-20241022",
-    "claude-3-5-sonnet-20241022": "claude-3-5-sonnet-20241022",
-    "claude-3-haiku-20240307": "claude-3-haiku-20240307",
-    # Google
-    "gemini-1.5-flash": "gemini/gemini-1.5-flash",
-    "gemini-1.5-pro": "gemini/gemini-1.5-pro",
-    # Mistral
-    "mistral-small-latest": "mistral/mistral-small-latest",
     "mistral-large-latest": "mistral/mistral-large-latest",
-    # DeepSeek
-    "deepseek-chat": "deepseek/deepseek-chat",
-    # Ollama — handled separately
-    "llama3.2:3b": "ollama/llama3.2:3b",
-    "llama3.1:8b": "ollama/llama3.1:8b",
-    "llama3.1:70b": "ollama/llama3.1:70b",
 }
 _OLLAMA_PROVIDER = "ollama"

 # LiteLLM uses "provider/model" strings for non-OpenAI providers
 _LITELLM_MODEL_MAP = {
+    # ── OpenAI ───────────────────────────────────────────────────────────────
+    # no prefix needed for OpenAI models
+    "gpt-4o":               "gpt-4o",
+    "gpt-4o-mini":          "gpt-4o-mini",
+    "gpt-4.1":              "gpt-4.1",
+    "gpt-4.1-mini":         "gpt-4.1-mini",
+    "gpt-3.5-turbo":        "gpt-3.5-turbo",
+    # ── Anthropic ────────────────────────────────────────────────────────────
+    "claude-opus-4-5":               "anthropic/claude-opus-4-5",
+    "claude-sonnet-4-5":             "anthropic/claude-sonnet-4-5",
+    "claude-haiku-3-5":              "anthropic/claude-3-5-haiku-20241022",
+    # Legacy Anthropic IDs (V1 registry)
+    "claude-3-5-haiku-20241022":     "claude-3-5-haiku-20241022",
+    "claude-3-5-sonnet-20241022":    "claude-3-5-sonnet-20241022",
+    "claude-3-haiku-20240307":       "claude-3-haiku-20240307",
+    # ── Google ───────────────────────────────────────────────────────────────
+    "gemini-2.5-pro":       "gemini/gemini-2.5-pro",
+    "gemini-2.5-flash":     "gemini/gemini-2.5-flash",
+    "gemini-1.5-flash":     "gemini/gemini-2.5-flash",
+    "gemini-1.5-pro":       "gemini/gemini-2.5-pro",
+    # ── Mistral ──────────────────────────────────────────────────────────────
     "mistral-large-latest": "mistral/mistral-large-latest",
+    "mistral-small-latest": "mistral/mistral-small-latest",
+    # ── DeepSeek ─────────────────────────────────────────────────────────────
+    "deepseek-chat":        "deepseek/deepseek-chat",
+    "deepseek-reasoner":    "deepseek/deepseek-reasoner",
+    # ── Ollama (local) ───────────────────────────────────────────────────────
+    "llama3.3-70b":         "ollama/llama3.3:70b",
+    "llama3.2-vision":      "ollama/llama3.2-vision",
+    # Legacy Ollama IDs (V1 registry)
+    "llama3.2:3b":          "ollama/llama3.2:3b",
+    "llama3.1:8b":          "ollama/llama3.1:8b",
+    "llama3.1:70b":         "ollama/llama3.1:70b",
 }
 _OLLAMA_PROVIDER = "ollama"

llmopt/updater/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+"""LLMOpt updater package — adaptive runtime statistics."""
+from llmopt.updater.adaptive_updater import AdaptiveRuntimeUpdater
+__all__ = ["AdaptiveRuntimeUpdater"]

llmopt/updater/adaptive_updater.py ADDED Viewed

	@@ -0,0 +1,268 @@

+"""
+LLMOpt — Adaptive Runtime Statistics (EMA-Based)
+=================================================
+Lightweight online learning — NO RL, NO Optuna, NO GPU.
+What this updates at runtime:
+  - avg_latency_ms        (per model, exponential moving average)
+  - provider_reliability  (rolling success rate)
+  - observed_utility      (quality × cost-efficiency product, EMA)
+These stats are combined with registry capability scores at routing time
+to produce small dynamic adjustments. They do NOT overwrite benchmark scores.
+Formula:
+  EMA update:  s_new = α · s_old + (1 − α) · x_observed
+  where α = momentum (0.85–0.95 for stability)
+Confidence decay:
+  If a model hasn't been observed recently, its runtime adjustment
+  fades back toward 0 (no adjustment), so baseline registry scores take over.
+Storage: Simple JSON file (no DB needed for MVP).
+         Can be swapped for Redis or SQLite later.
+"""
+import json
+import math
+import logging
+import os
+import time
+from dataclasses import dataclass, field, asdict
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Optional
+logger = logging.getLogger(__name__)
+# Runtime stats persisted to data/ at project root
+STATS_PATH = Path(__file__).parent.parent.parent / "data" / "runtime_stats.json"
+# EMA momentum — higher = slower to update (more stable)
+# 0.90 means new obs counts for 10% of the new value
+LATENCY_ALPHA      = 0.90
+RELIABILITY_ALPHA  = 0.92
+UTILITY_ALPHA      = 0.88
+# After this many seconds without an observation, decay confidence to 0
+CONFIDENCE_DECAY_HALF_LIFE_SECONDS = 3 * 24 * 3600  # 3 days
+# Minimum observations before runtime stats influence routing
+MIN_OBS_FOR_INFLUENCE = 5
+@dataclass
+class ModelRuntimeStats:
+    model_id:               str
+    obs_count:              int   = 0
+    ema_latency_ms:         float = 0.0    # 0 = no data yet
+    ema_reliability:        float = 1.0    # starts optimistic
+    ema_utility:            float = 0.0    # 0 = no feedback yet
+    last_observed_ts:       float = 0.0    # unix timestamp
+    confidence:             float = 0.0    # 0–1, grows with observations
+    # Raw accumulators for logging
+    total_successes:        int   = 0
+    total_failures:         int   = 0
+    total_requests:         int   = 0
+class AdaptiveRuntimeUpdater:
+    """
+    Tracks per-model runtime statistics and provides small adjustments
+    to utility scores during routing.
+    Usage:
+        updater = AdaptiveRuntimeUpdater()
+        updater.record_outcome(model_id, latency_ms=1200, success=True, quality_score=8.5)
+        adjustment = updater.get_utility_adjustment(model_id)
+    """
+    def __init__(self, stats_path: Optional[Path] = None):
+        self._path = stats_path or STATS_PATH
+        self._stats: dict[str, ModelRuntimeStats] = {}
+        self._load()
+    # ── Public API ────────────────────────────────────────────────────────────
+    def record_outcome(
+        self,
+        model_id:        str,
+        latency_ms:      Optional[float] = None,
+        success:         bool = True,
+        quality_score:   Optional[float] = None,   # 1–10 from LLMJudge, or None
+        cost_usd:        Optional[float] = None,
+    ):
+        """
+        Record a single routing outcome for a model.
+        Called after each LLM API response.
+        quality_score: optional 1–10 score (from LLMJudge or user feedback)
+        """
+        stats = self._get_or_create(model_id)
+        now   = time.time()
+        stats.obs_count      += 1
+        stats.total_requests += 1
+        stats.last_observed_ts = now
+        # ── Latency EMA ──────────────────────────────────────────────────────
+        if latency_ms is not None and latency_ms > 0:
+            if stats.ema_latency_ms == 0.0:
+                # Cold start: initialize to first observation
+                stats.ema_latency_ms = latency_ms
+            else:
+                stats.ema_latency_ms = (
+                    LATENCY_ALPHA * stats.ema_latency_ms +
+                    (1 - LATENCY_ALPHA) * latency_ms
+                )
+        # ── Reliability EMA ──────────────────────────────────────────────────
+        outcome_val = 1.0 if success else 0.0
+        if success:
+            stats.total_successes += 1
+        else:
+            stats.total_failures  += 1
+        stats.ema_reliability = (
+            RELIABILITY_ALPHA * stats.ema_reliability +
+            (1 - RELIABILITY_ALPHA) * outcome_val
+        )
+        # ── Utility EMA (from quality + cost efficiency) ──────────────────
+        if quality_score is not None and cost_usd is not None and cost_usd > 0:
+            # Observed utility = quality (normalized to 0–1) × cost-efficiency
+            # cost_efficiency: higher means cheaper relative to quality delivered
+            q_norm    = quality_score / 10.0
+            cost_eff  = 1.0 / (1.0 + cost_usd * 100)   # sigmoid-like penalty
+            obs_util  = q_norm * (0.7 + 0.3 * cost_eff)  # quality-dominant
+            if stats.ema_utility == 0.0:
+                stats.ema_utility = obs_util
+            else:
+                stats.ema_utility = (
+                    UTILITY_ALPHA * stats.ema_utility +
+                    (1 - UTILITY_ALPHA) * obs_util
+                )
+        elif quality_score is not None:
+            obs_util = quality_score / 10.0
+            if stats.ema_utility == 0.0:
+                stats.ema_utility = obs_util
+            else:
+                stats.ema_utility = (
+                    UTILITY_ALPHA * stats.ema_utility +
+                    (1 - UTILITY_ALPHA) * obs_util
+                )
+        # ── Confidence ───────────────────────────────────────────────────────
+        # Grows with observations (saturates at 1.0 after ~50 obs)
+        stats.confidence = min(1.0, stats.obs_count / MIN_OBS_FOR_INFLUENCE) * \
+                           self._time_decay_factor(stats.last_observed_ts)
+        logger.debug(
+            f"[Runtime] {model_id}: lat={stats.ema_latency_ms:.0f}ms "
+            f"rel={stats.ema_reliability:.3f} util={stats.ema_utility:.3f} "
+            f"conf={stats.confidence:.3f} n={stats.obs_count}"
+        )
+        # Persist every 10 observations to avoid too many writes
+        if stats.obs_count % 10 == 0:
+            self._save()
+    def get_utility_adjustment(self, model_id: str) -> float:
+        """
+        Returns a small adjustment ∈ [-0.15, +0.15] to add to the
+        utility score during routing.
+        Returns 0.0 if we don't have enough observations yet
+        (< MIN_OBS_FOR_INFLUENCE), ensuring cold start doesn't distort routing.
+        The adjustment is intentionally small — runtime observations refine
+        the routing, they don't override benchmark-based capability scores.
+        """
+        stats = self._stats.get(model_id)
+        if not stats or stats.obs_count < MIN_OBS_FOR_INFLUENCE:
+            return 0.0
+        conf = stats.confidence
+        if conf < 0.1:
+            return 0.0
+        # Reliability penalty (poor reliability → negative adjustment)
+        reliability_adj = (stats.ema_reliability - 0.95) * 0.5
+        # e.g. 90% reliability → (0.90 - 0.95) * 0.5 = -0.025
+        # Utility signal (if we have quality feedback)
+        utility_adj = 0.0
+        if stats.ema_utility > 0:
+            utility_adj = (stats.ema_utility - 0.7) * 0.2
+            # e.g. avg quality 8/10 = 0.8 → (0.8 - 0.7) * 0.2 = +0.02
+        total_adj = (reliability_adj + utility_adj) * conf
+        return max(-0.15, min(0.15, total_adj))
+    def get_latency_estimate(self, model_id: str) -> Optional[float]:
+        """Returns EMA latency estimate if available, else None."""
+        stats = self._stats.get(model_id)
+        if stats and stats.ema_latency_ms > 0 and stats.obs_count >= 3:
+            return stats.ema_latency_ms
+        return None
+    def get_stats_summary(self, model_id: str) -> dict:
+        """Returns full stats dict for observability / logging."""
+        stats = self._stats.get(model_id)
+        if not stats:
+            return {"model_id": model_id, "obs_count": 0, "status": "no_data"}
+        return {
+            **asdict(stats),
+            "success_rate": (
+                stats.total_successes / stats.total_requests
+                if stats.total_requests > 0 else None
+            ),
+        }
+    def save(self):
+        """Explicitly save stats to disk."""
+        self._save()
+    # ── Internal ──────────────────────────────────────────────────────────────
+    def _get_or_create(self, model_id: str) -> ModelRuntimeStats:
+        if model_id not in self._stats:
+            self._stats[model_id] = ModelRuntimeStats(model_id=model_id)
+        return self._stats[model_id]
+    def _time_decay_factor(self, last_ts: float) -> float:
+        """
+        Returns 1.0 if recently observed, decays toward 0 if stale.
+        Uses exponential decay with CONFIDENCE_DECAY_HALF_LIFE_SECONDS.
+        """
+        if last_ts == 0:
+            return 0.0
+        elapsed = time.time() - last_ts
+        half_life = CONFIDENCE_DECAY_HALF_LIFE_SECONDS
+        return math.exp(-math.log(2) * elapsed / half_life)
+    def _load(self):
+        if not self._path.exists():
+            logger.info("[Runtime] No existing stats file. Starting fresh.")
+            return
+        try:
+            with open(self._path, "r") as f:
+                raw = json.load(f)
+            for mid, data in raw.items():
+                self._stats[mid] = ModelRuntimeStats(**data)
+            logger.info(f"[Runtime] Loaded stats for {len(self._stats)} models.")
+        except Exception as e:
+            logger.warning(f"[Runtime] Failed to load stats: {e}. Starting fresh.")
+    def _save(self):
+        self._path.parent.mkdir(parents=True, exist_ok=True)
+        try:
+            with open(self._path, "w") as f:
+                json.dump(
+                    {mid: asdict(s) for mid, s in self._stats.items()},
+                    f, indent=2
+                )
+        except Exception as e:
+            logger.warning(f"[Runtime] Failed to save stats: {e}")