Spaces:
Running
Running
feat: initialize core LLMOpt framework including model routing, optimization engines, and frontend dashboard infrastructure.
Browse filesThis view is limited to 50 files because it contains too many changes. Β See raw diff
- .dockerignore +9 -0
- .env.example +38 -8
- .gitignore +8 -0
- Dockerfile +0 -2
- data/model_registry.json +20 -20
- data/model_registry_v2.json +507 -0
- docs/design.md +361 -0
- frontend/.gitignore +24 -0
- frontend/README.md +50 -0
- frontend/eslint.config.js +28 -0
- frontend/index.html +16 -0
- frontend/package-lock.json +0 -0
- frontend/package.json +38 -0
- frontend/public/vite.svg +1 -0
- frontend/src/App.css +42 -0
- frontend/src/App.tsx +217 -0
- frontend/src/api.ts +153 -0
- frontend/src/assets/react.svg +1 -0
- frontend/src/index.css +68 -0
- frontend/src/main.tsx +10 -0
- frontend/src/pages/Analytics.tsx +385 -0
- frontend/src/pages/Login.tsx +209 -0
- frontend/src/pages/ModelRegistry.tsx +354 -0
- frontend/src/pages/Playground.tsx +606 -0
- frontend/src/pages/Settings.tsx +349 -0
- frontend/src/store.ts +88 -0
- frontend/src/theme.css +1982 -0
- frontend/src/types.ts +170 -0
- frontend/src/vite-env.d.ts +1 -0
- frontend/tsconfig.app.json +26 -0
- frontend/tsconfig.json +7 -0
- frontend/tsconfig.node.json +24 -0
- frontend/vite.config.ts +40 -0
- llmopt/analyzer/query_analyzer.py +15 -12
- llmopt/api/app.py +580 -13
- llmopt/api/crud.py +59 -0
- llmopt/api/security.py +186 -0
- llmopt/cache/redis_client.py +41 -0
- llmopt/core.py +208 -37
- llmopt/db/models.py +47 -0
- llmopt/db/session.py +34 -0
- llmopt/engine/__init__.py +15 -0
- llmopt/engine/llmopt_engine.py +275 -0
- llmopt/engine/optimization_engine.py +9 -2
- llmopt/engine/utility_engine.py +665 -0
- llmopt/registry/__init__.py +4 -0
- llmopt/registry/hybrid_updater.py +267 -0
- llmopt/router/model_router.py +37 -19
- llmopt/updater/__init__.py +4 -0
- llmopt/updater/adaptive_updater.py +268 -0
.dockerignore
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.git
|
| 2 |
+
.github
|
| 3 |
+
__pycache__/
|
| 4 |
+
*.py[cod]
|
| 5 |
+
.pytest_cache/
|
| 6 |
+
llmopt.db
|
| 7 |
+
config/.env
|
| 8 |
+
tests/
|
| 9 |
+
.env
|
.env.example
CHANGED
|
@@ -1,12 +1,42 @@
|
|
| 1 |
# LLMOpt Environment Variables
|
| 2 |
|
| 3 |
-
#
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
-
#
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
-
#
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# LLMOpt Environment Variables
|
| 2 |
|
| 3 |
+
# ==========================================
|
| 4 |
+
# 1. Database & Redis Session Cache (Production)
|
| 5 |
+
# ==========================================
|
| 6 |
+
# PostgreSQL Database URL (e.g. Neon, Supabase, etc.)
|
| 7 |
+
# If not set, LLMOpt defaults to local SQLite.
|
| 8 |
+
DATABASE_URL=postgresql://user:password@ep-cool-fog-12345.aws.neon.tech/neondb?sslmode=require
|
| 9 |
|
| 10 |
+
# Upstash Redis or Redis Cloud connection string (Mandatory for sessions)
|
| 11 |
+
REDIS_URL=redis://default:password@endpoint.upstash.io:30000
|
| 12 |
+
|
| 13 |
+
# 32-byte base64-encoded session key for encrypting user API keys in transit/at rest.
|
| 14 |
+
# Generate in python with: cryptography.fernet.Fernet.generate_key().decode()
|
| 15 |
+
SESSION_SECRET_KEY=generate_your_own_32_byte_base64_key_here
|
| 16 |
+
|
| 17 |
+
# Session Time-to-Live (TTL) in seconds (default: 7200 seconds / 2 hours)
|
| 18 |
+
SESSION_TTL=7200
|
| 19 |
+
|
| 20 |
+
# ==========================================
|
| 21 |
+
# 2. Third-Party OAuth Sign-In (Optional)
|
| 22 |
+
# ==========================================
|
| 23 |
+
# Google OAuth
|
| 24 |
+
GOOGLE_CLIENT_ID=your_google_client_id.apps.googleusercontent.com
|
| 25 |
+
GOOGLE_CLIENT_SECRET=GOCSPX-your_google_client_secret_here
|
| 26 |
|
| 27 |
+
# GitHub OAuth
|
| 28 |
+
GITHUB_CLIENT_ID=your_github_client_id_here
|
| 29 |
+
GITHUB_CLIENT_SECRET=your_github_client_secret_here
|
| 30 |
+
|
| 31 |
+
# The base URL of the frontend for OAuth redirect callbacks (e.g. your Vercel URL)
|
| 32 |
+
REDIRECT_URI_HOST=https://your-frontend.vercel.app
|
| 33 |
+
|
| 34 |
+
# ==========================================
|
| 35 |
+
# 3. Direct LLM Provider Keys (Fallback / Local run only)
|
| 36 |
+
# ==========================================
|
| 37 |
+
# In Bring Your Own Key (BYOK) mode, these are not stored on the server.
|
| 38 |
+
# Provide them here only if running locally or using server-wide default keys.
|
| 39 |
+
OPENAI_API_KEY=your_openai_api_key_here
|
| 40 |
+
ANTHROPIC_API_KEY=your_anthropic_api_key_here
|
| 41 |
+
GEMINI_API_KEY=your_gemini_api_key_here
|
| 42 |
+
OLLAMA_API_BASE=http://localhost:11434
|
.gitignore
CHANGED
|
@@ -51,3 +51,11 @@ coverage.xml
|
|
| 51 |
.idea/
|
| 52 |
*.swp
|
| 53 |
*.swo
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
.idea/
|
| 52 |
*.swp
|
| 53 |
*.swo
|
| 54 |
+
|
| 55 |
+
# Runtime/Database files
|
| 56 |
+
llmopt.db
|
| 57 |
+
data/runtime_stats.json
|
| 58 |
+
|
| 59 |
+
# Local Environment secrets
|
| 60 |
+
/config/.env
|
| 61 |
+
|
Dockerfile
CHANGED
|
@@ -2,10 +2,8 @@ FROM python:3.10-slim
|
|
| 2 |
|
| 3 |
# Install system dependencies
|
| 4 |
# build-essential is needed for some ML package wheels
|
| 5 |
-
# redis-server is needed for the local Semantic Caching layer
|
| 6 |
RUN apt-get update && apt-get install -y \
|
| 7 |
build-essential \
|
| 8 |
-
redis-server \
|
| 9 |
&& rm -rf /var/lib/apt/lists/*
|
| 10 |
|
| 11 |
# Set up a new user named "user" with user ID 1000 (Mandatory for Hugging Face Spaces)
|
|
|
|
| 2 |
|
| 3 |
# Install system dependencies
|
| 4 |
# build-essential is needed for some ML package wheels
|
|
|
|
| 5 |
RUN apt-get update && apt-get install -y \
|
| 6 |
build-essential \
|
|
|
|
| 7 |
&& rm -rf /var/lib/apt/lists/*
|
| 8 |
|
| 9 |
# Set up a new user named "user" with user ID 1000 (Mandatory for Hugging Face Spaces)
|
data/model_registry.json
CHANGED
|
@@ -84,32 +84,32 @@
|
|
| 84 |
"notes": "Cheapest Anthropic model. Good for classification, summarization."
|
| 85 |
},
|
| 86 |
{
|
| 87 |
-
"model_name": "gemini-
|
| 88 |
"provider": "google",
|
| 89 |
-
"input_cost_per_1k": 0.
|
| 90 |
-
"output_cost_per_1k": 0.
|
| 91 |
-
"context_window":
|
| 92 |
-
"reasoning_score": 0.
|
| 93 |
-
"coding_score": 0.
|
| 94 |
-
"math_score": 0.
|
| 95 |
-
"instruction_following_score": 0.
|
| 96 |
-
"latency_score": 0.
|
| 97 |
-
"max_complexity": 0.
|
| 98 |
-
"notes": "
|
| 99 |
},
|
| 100 |
{
|
| 101 |
-
"model_name": "gemini-
|
| 102 |
"provider": "google",
|
| 103 |
"input_cost_per_1k": 0.00125,
|
| 104 |
-
"output_cost_per_1k": 0.
|
| 105 |
-
"context_window":
|
| 106 |
-
"reasoning_score": 0.
|
| 107 |
-
"coding_score": 0.
|
| 108 |
-
"math_score": 0.
|
| 109 |
-
"instruction_following_score": 0.
|
| 110 |
-
"latency_score": 0.
|
| 111 |
"max_complexity": 0.95,
|
| 112 |
-
"notes": "
|
| 113 |
},
|
| 114 |
{
|
| 115 |
"model_name": "mistral-small-latest",
|
|
|
|
| 84 |
"notes": "Cheapest Anthropic model. Good for classification, summarization."
|
| 85 |
},
|
| 86 |
{
|
| 87 |
+
"model_name": "gemini-2.5-flash",
|
| 88 |
"provider": "google",
|
| 89 |
+
"input_cost_per_1k": 0.00015,
|
| 90 |
+
"output_cost_per_1k": 0.0006,
|
| 91 |
+
"context_window": 1048576,
|
| 92 |
+
"reasoning_score": 0.83,
|
| 93 |
+
"coding_score": 0.82,
|
| 94 |
+
"math_score": 0.84,
|
| 95 |
+
"instruction_following_score": 0.85,
|
| 96 |
+
"latency_score": 0.90,
|
| 97 |
+
"max_complexity": 0.83,
|
| 98 |
+
"notes": "Very cheap and fast Gemini 2.5 model."
|
| 99 |
},
|
| 100 |
{
|
| 101 |
+
"model_name": "gemini-2.5-pro",
|
| 102 |
"provider": "google",
|
| 103 |
"input_cost_per_1k": 0.00125,
|
| 104 |
+
"output_cost_per_1k": 0.010,
|
| 105 |
+
"context_window": 1048576,
|
| 106 |
+
"reasoning_score": 0.94,
|
| 107 |
+
"coding_score": 0.92,
|
| 108 |
+
"math_score": 0.93,
|
| 109 |
+
"instruction_following_score": 0.92,
|
| 110 |
+
"latency_score": 0.75,
|
| 111 |
"max_complexity": 0.95,
|
| 112 |
+
"notes": "Powerful Gemini 2.5 model with massive context window."
|
| 113 |
},
|
| 114 |
{
|
| 115 |
"model_name": "mistral-small-latest",
|
data/model_registry_v2.json
ADDED
|
@@ -0,0 +1,507 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_meta": {
|
| 3 |
+
"version": "2.0.0",
|
| 4 |
+
"description": "LLMOpt Utility-Based Model Registry. Scores sourced from LMSYS Arena, Artificial Analysis, HumanEval, MMLU-Pro, MATH, IFEval benchmarks. Pricing from provider docs + OpenRouter. Updated via hybrid fetcher.",
|
| 5 |
+
"last_updated": "2025-01-01T00:00:00Z",
|
| 6 |
+
"score_range": "All capability scores normalized 0.0β1.0",
|
| 7 |
+
"pricing_unit": "USD per 1000 tokens"
|
| 8 |
+
},
|
| 9 |
+
"models": {
|
| 10 |
+
"gpt-4o": {
|
| 11 |
+
"provider": "openai",
|
| 12 |
+
"model_family": "gpt-4o",
|
| 13 |
+
"context_window": 128000,
|
| 14 |
+
"max_output_tokens": 16384,
|
| 15 |
+
"input_cost_per_1k": 0.0025,
|
| 16 |
+
"output_cost_per_1k": 0.010,
|
| 17 |
+
"avg_latency_ms": 1800,
|
| 18 |
+
"tokens_per_second": 80,
|
| 19 |
+
"capabilities": {
|
| 20 |
+
"reasoning": 0.92,
|
| 21 |
+
"coding": 0.91,
|
| 22 |
+
"math": 0.87,
|
| 23 |
+
"creativity": 0.88,
|
| 24 |
+
"factuality": 0.89,
|
| 25 |
+
"instruction_following": 0.94,
|
| 26 |
+
"long_context": 0.85,
|
| 27 |
+
"multilingual": 0.84,
|
| 28 |
+
"tool_use": 0.93,
|
| 29 |
+
"summarization": 0.90,
|
| 30 |
+
"conversation": 0.91
|
| 31 |
+
},
|
| 32 |
+
"features": {
|
| 33 |
+
"tool_calling": true,
|
| 34 |
+
"json_mode": true,
|
| 35 |
+
"streaming": true,
|
| 36 |
+
"image_input": true,
|
| 37 |
+
"function_calling": true
|
| 38 |
+
},
|
| 39 |
+
"benchmark_sources": ["lmsys_arena", "mmlu_pro", "humaneval", "math_benchmark", "ifeval"],
|
| 40 |
+
"pricing_last_updated": "2025-01-01T00:00:00Z",
|
| 41 |
+
"live_patch": {}
|
| 42 |
+
},
|
| 43 |
+
"gpt-4o-mini": {
|
| 44 |
+
"provider": "openai",
|
| 45 |
+
"model_family": "gpt-4o",
|
| 46 |
+
"context_window": 128000,
|
| 47 |
+
"max_output_tokens": 16384,
|
| 48 |
+
"input_cost_per_1k": 0.00015,
|
| 49 |
+
"output_cost_per_1k": 0.0006,
|
| 50 |
+
"avg_latency_ms": 900,
|
| 51 |
+
"tokens_per_second": 120,
|
| 52 |
+
"capabilities": {
|
| 53 |
+
"reasoning": 0.78,
|
| 54 |
+
"coding": 0.76,
|
| 55 |
+
"math": 0.72,
|
| 56 |
+
"creativity": 0.74,
|
| 57 |
+
"factuality": 0.75,
|
| 58 |
+
"instruction_following": 0.82,
|
| 59 |
+
"long_context": 0.76,
|
| 60 |
+
"multilingual": 0.72,
|
| 61 |
+
"tool_use": 0.80,
|
| 62 |
+
"summarization": 0.78,
|
| 63 |
+
"conversation": 0.82
|
| 64 |
+
},
|
| 65 |
+
"features": {
|
| 66 |
+
"tool_calling": true,
|
| 67 |
+
"json_mode": true,
|
| 68 |
+
"streaming": true,
|
| 69 |
+
"image_input": true,
|
| 70 |
+
"function_calling": true
|
| 71 |
+
},
|
| 72 |
+
"benchmark_sources": ["lmsys_arena", "mmlu_pro", "humaneval"],
|
| 73 |
+
"pricing_last_updated": "2025-01-01T00:00:00Z",
|
| 74 |
+
"live_patch": {}
|
| 75 |
+
},
|
| 76 |
+
"gpt-4.1": {
|
| 77 |
+
"provider": "openai",
|
| 78 |
+
"model_family": "gpt-4.1",
|
| 79 |
+
"context_window": 1047576,
|
| 80 |
+
"max_output_tokens": 32768,
|
| 81 |
+
"input_cost_per_1k": 0.002,
|
| 82 |
+
"output_cost_per_1k": 0.008,
|
| 83 |
+
"avg_latency_ms": 1600,
|
| 84 |
+
"tokens_per_second": 85,
|
| 85 |
+
"capabilities": {
|
| 86 |
+
"reasoning": 0.93,
|
| 87 |
+
"coding": 0.95,
|
| 88 |
+
"math": 0.88,
|
| 89 |
+
"creativity": 0.87,
|
| 90 |
+
"factuality": 0.90,
|
| 91 |
+
"instruction_following": 0.95,
|
| 92 |
+
"long_context": 0.97,
|
| 93 |
+
"multilingual": 0.85,
|
| 94 |
+
"tool_use": 0.95,
|
| 95 |
+
"summarization": 0.92,
|
| 96 |
+
"conversation": 0.90
|
| 97 |
+
},
|
| 98 |
+
"features": {
|
| 99 |
+
"tool_calling": true,
|
| 100 |
+
"json_mode": true,
|
| 101 |
+
"streaming": true,
|
| 102 |
+
"image_input": true,
|
| 103 |
+
"function_calling": true
|
| 104 |
+
},
|
| 105 |
+
"benchmark_sources": ["openai_evals", "swe_bench", "humaneval"],
|
| 106 |
+
"pricing_last_updated": "2025-01-01T00:00:00Z",
|
| 107 |
+
"live_patch": {}
|
| 108 |
+
},
|
| 109 |
+
"gpt-4.1-mini": {
|
| 110 |
+
"provider": "openai",
|
| 111 |
+
"model_family": "gpt-4.1",
|
| 112 |
+
"context_window": 1047576,
|
| 113 |
+
"max_output_tokens": 32768,
|
| 114 |
+
"input_cost_per_1k": 0.0004,
|
| 115 |
+
"output_cost_per_1k": 0.0016,
|
| 116 |
+
"avg_latency_ms": 750,
|
| 117 |
+
"tokens_per_second": 140,
|
| 118 |
+
"capabilities": {
|
| 119 |
+
"reasoning": 0.80,
|
| 120 |
+
"coding": 0.82,
|
| 121 |
+
"math": 0.75,
|
| 122 |
+
"creativity": 0.76,
|
| 123 |
+
"factuality": 0.78,
|
| 124 |
+
"instruction_following": 0.85,
|
| 125 |
+
"long_context": 0.92,
|
| 126 |
+
"multilingual": 0.74,
|
| 127 |
+
"tool_use": 0.83,
|
| 128 |
+
"summarization": 0.80,
|
| 129 |
+
"conversation": 0.83
|
| 130 |
+
},
|
| 131 |
+
"features": {
|
| 132 |
+
"tool_calling": true,
|
| 133 |
+
"json_mode": true,
|
| 134 |
+
"streaming": true,
|
| 135 |
+
"image_input": true,
|
| 136 |
+
"function_calling": true
|
| 137 |
+
},
|
| 138 |
+
"benchmark_sources": ["openai_evals"],
|
| 139 |
+
"pricing_last_updated": "2025-01-01T00:00:00Z",
|
| 140 |
+
"live_patch": {}
|
| 141 |
+
},
|
| 142 |
+
"claude-opus-4-5": {
|
| 143 |
+
"provider": "anthropic",
|
| 144 |
+
"model_family": "claude-4",
|
| 145 |
+
"context_window": 200000,
|
| 146 |
+
"max_output_tokens": 32000,
|
| 147 |
+
"input_cost_per_1k": 0.015,
|
| 148 |
+
"output_cost_per_1k": 0.075,
|
| 149 |
+
"avg_latency_ms": 2500,
|
| 150 |
+
"tokens_per_second": 65,
|
| 151 |
+
"capabilities": {
|
| 152 |
+
"reasoning": 0.96,
|
| 153 |
+
"coding": 0.95,
|
| 154 |
+
"math": 0.91,
|
| 155 |
+
"creativity": 0.95,
|
| 156 |
+
"factuality": 0.93,
|
| 157 |
+
"instruction_following": 0.96,
|
| 158 |
+
"long_context": 0.94,
|
| 159 |
+
"multilingual": 0.87,
|
| 160 |
+
"tool_use": 0.94,
|
| 161 |
+
"summarization": 0.95,
|
| 162 |
+
"conversation": 0.96
|
| 163 |
+
},
|
| 164 |
+
"features": {
|
| 165 |
+
"tool_calling": true,
|
| 166 |
+
"json_mode": true,
|
| 167 |
+
"streaming": true,
|
| 168 |
+
"image_input": true,
|
| 169 |
+
"function_calling": true
|
| 170 |
+
},
|
| 171 |
+
"benchmark_sources": ["lmsys_arena", "mmlu_pro", "swe_bench", "humaneval", "math_benchmark"],
|
| 172 |
+
"pricing_last_updated": "2025-01-01T00:00:00Z",
|
| 173 |
+
"live_patch": {}
|
| 174 |
+
},
|
| 175 |
+
"claude-sonnet-4-5": {
|
| 176 |
+
"provider": "anthropic",
|
| 177 |
+
"model_family": "claude-4",
|
| 178 |
+
"context_window": 200000,
|
| 179 |
+
"max_output_tokens": 16000,
|
| 180 |
+
"input_cost_per_1k": 0.003,
|
| 181 |
+
"output_cost_per_1k": 0.015,
|
| 182 |
+
"avg_latency_ms": 1400,
|
| 183 |
+
"tokens_per_second": 90,
|
| 184 |
+
"capabilities": {
|
| 185 |
+
"reasoning": 0.91,
|
| 186 |
+
"coding": 0.93,
|
| 187 |
+
"math": 0.86,
|
| 188 |
+
"creativity": 0.90,
|
| 189 |
+
"factuality": 0.90,
|
| 190 |
+
"instruction_following": 0.93,
|
| 191 |
+
"long_context": 0.91,
|
| 192 |
+
"multilingual": 0.84,
|
| 193 |
+
"tool_use": 0.92,
|
| 194 |
+
"summarization": 0.91,
|
| 195 |
+
"conversation": 0.92
|
| 196 |
+
},
|
| 197 |
+
"features": {
|
| 198 |
+
"tool_calling": true,
|
| 199 |
+
"json_mode": true,
|
| 200 |
+
"streaming": true,
|
| 201 |
+
"image_input": true,
|
| 202 |
+
"function_calling": true
|
| 203 |
+
},
|
| 204 |
+
"benchmark_sources": ["lmsys_arena", "mmlu_pro", "swe_bench", "humaneval"],
|
| 205 |
+
"pricing_last_updated": "2025-01-01T00:00:00Z",
|
| 206 |
+
"live_patch": {}
|
| 207 |
+
},
|
| 208 |
+
"claude-haiku-3-5": {
|
| 209 |
+
"provider": "anthropic",
|
| 210 |
+
"model_family": "claude-3.5",
|
| 211 |
+
"context_window": 200000,
|
| 212 |
+
"max_output_tokens": 8192,
|
| 213 |
+
"input_cost_per_1k": 0.0008,
|
| 214 |
+
"output_cost_per_1k": 0.004,
|
| 215 |
+
"avg_latency_ms": 700,
|
| 216 |
+
"tokens_per_second": 150,
|
| 217 |
+
"capabilities": {
|
| 218 |
+
"reasoning": 0.74,
|
| 219 |
+
"coding": 0.77,
|
| 220 |
+
"math": 0.68,
|
| 221 |
+
"creativity": 0.72,
|
| 222 |
+
"factuality": 0.73,
|
| 223 |
+
"instruction_following": 0.80,
|
| 224 |
+
"long_context": 0.78,
|
| 225 |
+
"multilingual": 0.72,
|
| 226 |
+
"tool_use": 0.78,
|
| 227 |
+
"summarization": 0.76,
|
| 228 |
+
"conversation": 0.80
|
| 229 |
+
},
|
| 230 |
+
"features": {
|
| 231 |
+
"tool_calling": true,
|
| 232 |
+
"json_mode": true,
|
| 233 |
+
"streaming": true,
|
| 234 |
+
"image_input": true,
|
| 235 |
+
"function_calling": true
|
| 236 |
+
},
|
| 237 |
+
"benchmark_sources": ["lmsys_arena", "humaneval"],
|
| 238 |
+
"pricing_last_updated": "2025-01-01T00:00:00Z",
|
| 239 |
+
"live_patch": {}
|
| 240 |
+
},
|
| 241 |
+
"gemini-2.5-pro": {
|
| 242 |
+
"provider": "google",
|
| 243 |
+
"model_family": "gemini-2.5",
|
| 244 |
+
"context_window": 1048576,
|
| 245 |
+
"max_output_tokens": 65536,
|
| 246 |
+
"input_cost_per_1k": 0.00125,
|
| 247 |
+
"output_cost_per_1k": 0.010,
|
| 248 |
+
"avg_latency_ms": 2000,
|
| 249 |
+
"tokens_per_second": 75,
|
| 250 |
+
"capabilities": {
|
| 251 |
+
"reasoning": 0.94,
|
| 252 |
+
"coding": 0.92,
|
| 253 |
+
"math": 0.93,
|
| 254 |
+
"creativity": 0.88,
|
| 255 |
+
"factuality": 0.91,
|
| 256 |
+
"instruction_following": 0.92,
|
| 257 |
+
"long_context": 0.98,
|
| 258 |
+
"multilingual": 0.90,
|
| 259 |
+
"tool_use": 0.89,
|
| 260 |
+
"summarization": 0.92,
|
| 261 |
+
"conversation": 0.89
|
| 262 |
+
},
|
| 263 |
+
"features": {
|
| 264 |
+
"tool_calling": true,
|
| 265 |
+
"json_mode": true,
|
| 266 |
+
"streaming": true,
|
| 267 |
+
"image_input": true,
|
| 268 |
+
"function_calling": true
|
| 269 |
+
},
|
| 270 |
+
"benchmark_sources": ["lmsys_arena", "mmlu_pro", "math_benchmark", "humaneval"],
|
| 271 |
+
"pricing_last_updated": "2025-01-01T00:00:00Z",
|
| 272 |
+
"live_patch": {}
|
| 273 |
+
},
|
| 274 |
+
"gemini-2.5-flash": {
|
| 275 |
+
"provider": "google",
|
| 276 |
+
"model_family": "gemini-2.5",
|
| 277 |
+
"context_window": 1048576,
|
| 278 |
+
"max_output_tokens": 65536,
|
| 279 |
+
"input_cost_per_1k": 0.00015,
|
| 280 |
+
"output_cost_per_1k": 0.0006,
|
| 281 |
+
"avg_latency_ms": 800,
|
| 282 |
+
"tokens_per_second": 130,
|
| 283 |
+
"capabilities": {
|
| 284 |
+
"reasoning": 0.83,
|
| 285 |
+
"coding": 0.82,
|
| 286 |
+
"math": 0.84,
|
| 287 |
+
"creativity": 0.80,
|
| 288 |
+
"factuality": 0.82,
|
| 289 |
+
"instruction_following": 0.85,
|
| 290 |
+
"long_context": 0.95,
|
| 291 |
+
"multilingual": 0.84,
|
| 292 |
+
"tool_use": 0.82,
|
| 293 |
+
"summarization": 0.83,
|
| 294 |
+
"conversation": 0.84
|
| 295 |
+
},
|
| 296 |
+
"features": {
|
| 297 |
+
"tool_calling": true,
|
| 298 |
+
"json_mode": true,
|
| 299 |
+
"streaming": true,
|
| 300 |
+
"image_input": true,
|
| 301 |
+
"function_calling": true
|
| 302 |
+
},
|
| 303 |
+
"benchmark_sources": ["lmsys_arena", "mmlu_pro", "humaneval"],
|
| 304 |
+
"pricing_last_updated": "2025-01-01T00:00:00Z",
|
| 305 |
+
"live_patch": {}
|
| 306 |
+
},
|
| 307 |
+
|
| 308 |
+
"mistral-large-latest": {
|
| 309 |
+
"provider": "mistral",
|
| 310 |
+
"model_family": "mistral-large",
|
| 311 |
+
"context_window": 128000,
|
| 312 |
+
"max_output_tokens": 8192,
|
| 313 |
+
"input_cost_per_1k": 0.003,
|
| 314 |
+
"output_cost_per_1k": 0.009,
|
| 315 |
+
"avg_latency_ms": 1600,
|
| 316 |
+
"tokens_per_second": 75,
|
| 317 |
+
"capabilities": {
|
| 318 |
+
"reasoning": 0.82,
|
| 319 |
+
"coding": 0.82,
|
| 320 |
+
"math": 0.78,
|
| 321 |
+
"creativity": 0.78,
|
| 322 |
+
"factuality": 0.80,
|
| 323 |
+
"instruction_following": 0.84,
|
| 324 |
+
"long_context": 0.78,
|
| 325 |
+
"multilingual": 0.88,
|
| 326 |
+
"tool_use": 0.82,
|
| 327 |
+
"summarization": 0.82,
|
| 328 |
+
"conversation": 0.82
|
| 329 |
+
},
|
| 330 |
+
"features": {
|
| 331 |
+
"tool_calling": true,
|
| 332 |
+
"json_mode": true,
|
| 333 |
+
"streaming": true,
|
| 334 |
+
"image_input": false,
|
| 335 |
+
"function_calling": true
|
| 336 |
+
},
|
| 337 |
+
"benchmark_sources": ["lmsys_arena", "mmlu_pro", "humaneval"],
|
| 338 |
+
"pricing_last_updated": "2025-01-01T00:00:00Z",
|
| 339 |
+
"live_patch": {}
|
| 340 |
+
},
|
| 341 |
+
"mistral-small-latest": {
|
| 342 |
+
"provider": "mistral",
|
| 343 |
+
"model_family": "mistral-small",
|
| 344 |
+
"context_window": 32000,
|
| 345 |
+
"max_output_tokens": 8192,
|
| 346 |
+
"input_cost_per_1k": 0.0001,
|
| 347 |
+
"output_cost_per_1k": 0.0003,
|
| 348 |
+
"avg_latency_ms": 700,
|
| 349 |
+
"tokens_per_second": 140,
|
| 350 |
+
"capabilities": {
|
| 351 |
+
"reasoning": 0.68,
|
| 352 |
+
"coding": 0.68,
|
| 353 |
+
"math": 0.62,
|
| 354 |
+
"creativity": 0.66,
|
| 355 |
+
"factuality": 0.65,
|
| 356 |
+
"instruction_following": 0.72,
|
| 357 |
+
"long_context": 0.60,
|
| 358 |
+
"multilingual": 0.80,
|
| 359 |
+
"tool_use": 0.68,
|
| 360 |
+
"summarization": 0.70,
|
| 361 |
+
"conversation": 0.72
|
| 362 |
+
},
|
| 363 |
+
"features": {
|
| 364 |
+
"tool_calling": true,
|
| 365 |
+
"json_mode": true,
|
| 366 |
+
"streaming": true,
|
| 367 |
+
"image_input": false,
|
| 368 |
+
"function_calling": true
|
| 369 |
+
},
|
| 370 |
+
"benchmark_sources": ["lmsys_arena"],
|
| 371 |
+
"pricing_last_updated": "2025-01-01T00:00:00Z",
|
| 372 |
+
"live_patch": {}
|
| 373 |
+
},
|
| 374 |
+
"deepseek-chat": {
|
| 375 |
+
"provider": "deepseek",
|
| 376 |
+
"model_family": "deepseek-v3",
|
| 377 |
+
"context_window": 64000,
|
| 378 |
+
"max_output_tokens": 8192,
|
| 379 |
+
"input_cost_per_1k": 0.00014,
|
| 380 |
+
"output_cost_per_1k": 0.00028,
|
| 381 |
+
"avg_latency_ms": 1200,
|
| 382 |
+
"tokens_per_second": 95,
|
| 383 |
+
"capabilities": {
|
| 384 |
+
"reasoning": 0.87,
|
| 385 |
+
"coding": 0.90,
|
| 386 |
+
"math": 0.91,
|
| 387 |
+
"creativity": 0.78,
|
| 388 |
+
"factuality": 0.82,
|
| 389 |
+
"instruction_following": 0.85,
|
| 390 |
+
"long_context": 0.72,
|
| 391 |
+
"multilingual": 0.75,
|
| 392 |
+
"tool_use": 0.82,
|
| 393 |
+
"summarization": 0.82,
|
| 394 |
+
"conversation": 0.82
|
| 395 |
+
},
|
| 396 |
+
"features": {
|
| 397 |
+
"tool_calling": true,
|
| 398 |
+
"json_mode": true,
|
| 399 |
+
"streaming": true,
|
| 400 |
+
"image_input": false,
|
| 401 |
+
"function_calling": true
|
| 402 |
+
},
|
| 403 |
+
"benchmark_sources": ["lmsys_arena", "humaneval", "math_benchmark", "mmlu_pro"],
|
| 404 |
+
"pricing_last_updated": "2025-01-01T00:00:00Z",
|
| 405 |
+
"live_patch": {}
|
| 406 |
+
},
|
| 407 |
+
"deepseek-reasoner": {
|
| 408 |
+
"provider": "deepseek",
|
| 409 |
+
"model_family": "deepseek-r1",
|
| 410 |
+
"context_window": 64000,
|
| 411 |
+
"max_output_tokens": 8192,
|
| 412 |
+
"input_cost_per_1k": 0.00055,
|
| 413 |
+
"output_cost_per_1k": 0.00219,
|
| 414 |
+
"avg_latency_ms": 3500,
|
| 415 |
+
"tokens_per_second": 40,
|
| 416 |
+
"capabilities": {
|
| 417 |
+
"reasoning": 0.95,
|
| 418 |
+
"coding": 0.91,
|
| 419 |
+
"math": 0.96,
|
| 420 |
+
"creativity": 0.72,
|
| 421 |
+
"factuality": 0.88,
|
| 422 |
+
"instruction_following": 0.83,
|
| 423 |
+
"long_context": 0.70,
|
| 424 |
+
"multilingual": 0.72,
|
| 425 |
+
"tool_use": 0.75,
|
| 426 |
+
"summarization": 0.78,
|
| 427 |
+
"conversation": 0.72
|
| 428 |
+
},
|
| 429 |
+
"features": {
|
| 430 |
+
"tool_calling": false,
|
| 431 |
+
"json_mode": true,
|
| 432 |
+
"streaming": true,
|
| 433 |
+
"image_input": false,
|
| 434 |
+
"function_calling": false
|
| 435 |
+
},
|
| 436 |
+
"benchmark_sources": ["aime", "math_benchmark", "humaneval", "mmlu_pro"],
|
| 437 |
+
"pricing_last_updated": "2025-01-01T00:00:00Z",
|
| 438 |
+
"live_patch": {}
|
| 439 |
+
},
|
| 440 |
+
"llama3.3-70b": {
|
| 441 |
+
"provider": "ollama",
|
| 442 |
+
"model_family": "llama3",
|
| 443 |
+
"context_window": 128000,
|
| 444 |
+
"max_output_tokens": 8192,
|
| 445 |
+
"input_cost_per_1k": 0.0,
|
| 446 |
+
"output_cost_per_1k": 0.0,
|
| 447 |
+
"avg_latency_ms": 2000,
|
| 448 |
+
"tokens_per_second": 50,
|
| 449 |
+
"capabilities": {
|
| 450 |
+
"reasoning": 0.80,
|
| 451 |
+
"coding": 0.79,
|
| 452 |
+
"math": 0.74,
|
| 453 |
+
"creativity": 0.78,
|
| 454 |
+
"factuality": 0.76,
|
| 455 |
+
"instruction_following": 0.82,
|
| 456 |
+
"long_context": 0.76,
|
| 457 |
+
"multilingual": 0.72,
|
| 458 |
+
"tool_use": 0.76,
|
| 459 |
+
"summarization": 0.80,
|
| 460 |
+
"conversation": 0.82
|
| 461 |
+
},
|
| 462 |
+
"features": {
|
| 463 |
+
"tool_calling": true,
|
| 464 |
+
"json_mode": true,
|
| 465 |
+
"streaming": true,
|
| 466 |
+
"image_input": false,
|
| 467 |
+
"function_calling": true
|
| 468 |
+
},
|
| 469 |
+
"benchmark_sources": ["lmsys_arena", "mmlu_pro", "humaneval"],
|
| 470 |
+
"pricing_last_updated": "2025-01-01T00:00:00Z",
|
| 471 |
+
"live_patch": {}
|
| 472 |
+
},
|
| 473 |
+
"llama3.2-vision": {
|
| 474 |
+
"provider": "ollama",
|
| 475 |
+
"model_family": "llama3",
|
| 476 |
+
"context_window": 128000,
|
| 477 |
+
"max_output_tokens": 8192,
|
| 478 |
+
"input_cost_per_1k": 0.0,
|
| 479 |
+
"output_cost_per_1k": 0.0,
|
| 480 |
+
"avg_latency_ms": 2500,
|
| 481 |
+
"tokens_per_second": 40,
|
| 482 |
+
"capabilities": {
|
| 483 |
+
"reasoning": 0.70,
|
| 484 |
+
"coding": 0.66,
|
| 485 |
+
"math": 0.62,
|
| 486 |
+
"creativity": 0.70,
|
| 487 |
+
"factuality": 0.68,
|
| 488 |
+
"instruction_following": 0.74,
|
| 489 |
+
"long_context": 0.70,
|
| 490 |
+
"multilingual": 0.65,
|
| 491 |
+
"tool_use": 0.65,
|
| 492 |
+
"summarization": 0.72,
|
| 493 |
+
"conversation": 0.75
|
| 494 |
+
},
|
| 495 |
+
"features": {
|
| 496 |
+
"tool_calling": false,
|
| 497 |
+
"json_mode": true,
|
| 498 |
+
"streaming": true,
|
| 499 |
+
"image_input": true,
|
| 500 |
+
"function_calling": false
|
| 501 |
+
},
|
| 502 |
+
"benchmark_sources": ["lmsys_arena"],
|
| 503 |
+
"pricing_last_updated": "2025-01-01T00:00:00Z",
|
| 504 |
+
"live_patch": {}
|
| 505 |
+
}
|
| 506 |
+
}
|
| 507 |
+
}
|
docs/design.md
ADDED
|
@@ -0,0 +1,361 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# LLMOpt UI Design Specification
|
| 2 |
+
|
| 3 |
+
## Project Overview
|
| 4 |
+
**LLMOpt** is an enterprise-grade LLM middleware that intelligently routes queries to the most cost-effective model. The UI must communicate: intelligence, efficiency, cost savings, and observability β all in real time.
|
| 5 |
+
|
| 6 |
+
---
|
| 7 |
+
|
| 8 |
+
## Aesthetic Direction: "Dark Industrial Dashboard"
|
| 9 |
+
|
| 10 |
+
**Concept**: Think Bloomberg Terminal meets cyberpunk command center. Utilitarian precision with electric accents. Every pixel earns its place. Data-dense but crystal clear.
|
| 11 |
+
|
| 12 |
+
**Mood**: Authoritative. Efficient. Technical. Like a cockpit for LLM operations.
|
| 13 |
+
|
| 14 |
+
**One unforgettable thing**: A real-time animated pipeline that lights up as a query flows through each stage β users *watch* the optimization happen.
|
| 15 |
+
|
| 16 |
+
---
|
| 17 |
+
|
| 18 |
+
## Color Palette
|
| 19 |
+
|
| 20 |
+
```
|
| 21 |
+
--bg-base: #0A0B0E /* Near-black base */
|
| 22 |
+
--bg-surface: #111318 /* Card/panel surface */
|
| 23 |
+
--bg-elevated: #1A1D26 /* Elevated panels */
|
| 24 |
+
--bg-border: #252A38 /* Borders */
|
| 25 |
+
|
| 26 |
+
--accent-cyan: #00E5FF /* Primary accent β pipeline glow */
|
| 27 |
+
--accent-green: #00FF94 /* Success, savings, cache hits */
|
| 28 |
+
--accent-amber: #FFB300 /* Warnings, "balanced" tier */
|
| 29 |
+
--accent-red: #FF3D57 /* Errors, expensive routes */
|
| 30 |
+
--accent-purple: #7C4DFF /* ML / AI stage indicators */
|
| 31 |
+
|
| 32 |
+
--text-primary: #E8ECF4 /* Main text */
|
| 33 |
+
--text-secondary: #7A8299 /* Labels, metadata */
|
| 34 |
+
--text-muted: #3D4357 /* Disabled / placeholder */
|
| 35 |
+
|
| 36 |
+
--gradient-glow: linear-gradient(135deg, #00E5FF22, #7C4DFF11)
|
| 37 |
+
```
|
| 38 |
+
|
| 39 |
+
---
|
| 40 |
+
|
| 41 |
+
## Typography
|
| 42 |
+
|
| 43 |
+
```
|
| 44 |
+
Display / Headers : "JetBrains Mono" (monospace β fits the terminal DNA)
|
| 45 |
+
Body / UI Labels : "DM Sans" (clean, readable, modern)
|
| 46 |
+
Data / Numbers : "JetBrains Mono" (monospace alignment for metrics)
|
| 47 |
+
Code Blocks : "Fira Code" with ligatures
|
| 48 |
+
|
| 49 |
+
Sizes:
|
| 50 |
+
--text-xs: 11px
|
| 51 |
+
--text-sm: 13px
|
| 52 |
+
--text-base: 15px
|
| 53 |
+
--text-lg: 18px
|
| 54 |
+
--text-xl: 24px
|
| 55 |
+
--text-2xl: 32px
|
| 56 |
+
--text-3xl: 48px
|
| 57 |
+
```
|
| 58 |
+
|
| 59 |
+
---
|
| 60 |
+
|
| 61 |
+
## Layout Structure
|
| 62 |
+
|
| 63 |
+
```
|
| 64 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 65 |
+
β TOPBAR: Logo | Nav Tabs | Status Indicators β
|
| 66 |
+
ββββββββββββββββ¬βββββββββββββββββββββββββββββββββββββββ€
|
| 67 |
+
β β β
|
| 68 |
+
β LEFT PANEL β MAIN CONTENT AREA β
|
| 69 |
+
β (280px) β β
|
| 70 |
+
β β [Query Input + Pipeline Visualizer] β
|
| 71 |
+
β β’ Config β [Response Output] β
|
| 72 |
+
β β’ Budget β [Explainability Card] β
|
| 73 |
+
β β’ Providers β β
|
| 74 |
+
β β’ History β β
|
| 75 |
+
β ββββββββββββββββββββββββββββββββββββββββ€
|
| 76 |
+
β β METRICS STRIP (bottom) β
|
| 77 |
+
β β Cost | Tokens | Latency | Savings β
|
| 78 |
+
ββββββββββββββββ΄βββββββββββββββββββββββββββββββββββββββ
|
| 79 |
+
```
|
| 80 |
+
|
| 81 |
+
---
|
| 82 |
+
|
| 83 |
+
## Page / View Breakdown
|
| 84 |
+
|
| 85 |
+
### 1. `/` β Playground (Main View)
|
| 86 |
+
|
| 87 |
+
The core query interface. This is what users interact with daily.
|
| 88 |
+
|
| 89 |
+
**Components:**
|
| 90 |
+
|
| 91 |
+
#### Query Input Box
|
| 92 |
+
- Large dark textarea with subtle cyan border-glow on focus
|
| 93 |
+
- Font: JetBrains Mono
|
| 94 |
+
- Placeholder: `// Enter your query...`
|
| 95 |
+
- Right side: Budget Mode selector (3 pills: `CHEAP` / `BALANCED` / `QUALITY`)
|
| 96 |
+
- Bottom bar inside textarea: token count estimate, `[RUN]` button (cyan, full-right)
|
| 97 |
+
|
| 98 |
+
#### Pipeline Visualizer (HERO COMPONENT)
|
| 99 |
+
A horizontal animated flow diagram that activates on query submission:
|
| 100 |
+
|
| 101 |
+
```
|
| 102 |
+
[CACHE] βββΊ [NLI ANALYZE] βββΊ [GBR ESTIMATE] βββΊ [BAYESIAN OPT] βββΊ [COMPRESS] βββΊ [ROUTE] βββΊ [LLM]
|
| 103 |
+
```
|
| 104 |
+
|
| 105 |
+
- Each stage is a pill/node with icon + label
|
| 106 |
+
- Inactive: `--bg-elevated` fill, `--text-muted` text
|
| 107 |
+
- Active (processing): Cyan pulsing border + glow, animated spinner inside
|
| 108 |
+
- Complete: Green fill, checkmark icon, latency badge underneath (e.g., `12ms`)
|
| 109 |
+
- Skipped (cache hit): Amber fill with "CACHED" label β flow skips to end
|
| 110 |
+
- Connecting lines animate left-to-right as each stage completes
|
| 111 |
+
|
| 112 |
+
**Stage Icons:**
|
| 113 |
+
| Stage | Icon |
|
| 114 |
+
|-------|------|
|
| 115 |
+
| Cache | β‘ (lightning) |
|
| 116 |
+
| NLI Analyze | π |
|
| 117 |
+
| GBR Estimate | π |
|
| 118 |
+
| Bayesian Opt | βοΈ |
|
| 119 |
+
| Compress | ποΈ |
|
| 120 |
+
| Route | π |
|
| 121 |
+
| LLM | π€ |
|
| 122 |
+
|
| 123 |
+
#### Response Panel
|
| 124 |
+
- Appears below pipeline after completion
|
| 125 |
+
- Markdown rendering with syntax highlighting (dark theme)
|
| 126 |
+
- Header strip: `Model: claude-3-5-haiku` | `Provider: Anthropic` | copy button
|
| 127 |
+
- Subtle fade-in animation on arrival
|
| 128 |
+
|
| 129 |
+
#### Explainability Card (collapsible)
|
| 130 |
+
- Monospace font block styled like a terminal output
|
| 131 |
+
- Cyan `>` prefix on each line
|
| 132 |
+
- Shows: complexity score, domain, selected model, scoring rationale, cost saved
|
| 133 |
+
- Toggle with `[EXPLAIN]` button next to Run
|
| 134 |
+
|
| 135 |
+
---
|
| 136 |
+
|
| 137 |
+
### 2. `/analytics` β Observability Dashboard
|
| 138 |
+
|
| 139 |
+
**Components:**
|
| 140 |
+
|
| 141 |
+
#### KPI Row (top 4 cards)
|
| 142 |
+
```
|
| 143 |
+
ββββββββββββββββ ββββββββββββββββ ββββββββββββββββ ββββββββββββββββ
|
| 144 |
+
β Total Saved β β Avg Latency β β Cache Hit % β β Total Queriesβ
|
| 145 |
+
β $12.48 β β 840ms β β 34% β β 1,204 β
|
| 146 |
+
β β 18% today β β β 12% β β β 5% β β β
|
| 147 |
+
ββββββββββββββββ ββββββββββββββββ ββββββββββββββββ ββββββββββββββββ
|
| 148 |
+
```
|
| 149 |
+
- Micro sparklines inside each card (7-day trend)
|
| 150 |
+
- Green arrows = good, Red = bad
|
| 151 |
+
|
| 152 |
+
#### Model Usage Breakdown
|
| 153 |
+
- Horizontal stacked bar chart
|
| 154 |
+
- Each provider has a distinct color segment
|
| 155 |
+
- Hover shows: model name, % of queries, avg cost
|
| 156 |
+
|
| 157 |
+
#### Cost Over Time
|
| 158 |
+
- Area chart, cyan fill with glow
|
| 159 |
+
- X-axis: time (last 7 days / 30 days toggle)
|
| 160 |
+
- Y-axis: USD
|
| 161 |
+
- Dotted line showing "cost if all GPT-4o" β dramatic visual of savings
|
| 162 |
+
|
| 163 |
+
#### Query Log Table
|
| 164 |
+
```
|
| 165 |
+
Timestamp | Query Preview | Complexity | Model Used | Cost | Latency | Score
|
| 166 |
+
```
|
| 167 |
+
- Zebra striping with `--bg-surface` / `--bg-elevated`
|
| 168 |
+
- Complexity shown as colored bar (green β amber β red)
|
| 169 |
+
- Clickable rows expand to show full explainability output
|
| 170 |
+
|
| 171 |
+
---
|
| 172 |
+
|
| 173 |
+
### 3. `/models` β Model Registry
|
| 174 |
+
|
| 175 |
+
**Components:**
|
| 176 |
+
|
| 177 |
+
#### Model Cards Grid (2-col)
|
| 178 |
+
Each card:
|
| 179 |
+
- Model name (large, monospace)
|
| 180 |
+
- Provider badge (colored pill)
|
| 181 |
+
- Capability score as radial gauge (0β1)
|
| 182 |
+
- Pricing: Input / Output per 1k tokens
|
| 183 |
+
- "Best For" tag
|
| 184 |
+
- Toggle: Enable / Disable this model
|
| 185 |
+
|
| 186 |
+
#### Comparison Table
|
| 187 |
+
- Sortable columns: Capability, Input Cost, Output Cost, Best For
|
| 188 |
+
- Highlight the "Best Value" row with cyan left border
|
| 189 |
+
|
| 190 |
+
---
|
| 191 |
+
|
| 192 |
+
### 4. `/settings` β Configuration
|
| 193 |
+
|
| 194 |
+
**Components:**
|
| 195 |
+
- API Key inputs per provider (masked, with test button)
|
| 196 |
+
- Redis URL config
|
| 197 |
+
- Budget weight sliders (Ξ± Cost / Ξ² Tokens / Ξ³ Quality) with live formula display
|
| 198 |
+
- Compression toggle + threshold slider
|
| 199 |
+
- Evaluation (LLM-as-Judge) toggle
|
| 200 |
+
|
| 201 |
+
---
|
| 202 |
+
|
| 203 |
+
## Component Design Details
|
| 204 |
+
|
| 205 |
+
### Sidebar Navigation
|
| 206 |
+
```
|
| 207 |
+
βββββββββββββββββββ
|
| 208 |
+
β β‘ LLMOpt β β Logo: monospace, cyan accent
|
| 209 |
+
βββββββββββββββββββ€
|
| 210 |
+
β βΈ Playground β β Active: cyan left border + bg highlight
|
| 211 |
+
β βΈ Analytics β
|
| 212 |
+
β βΈ Models β
|
| 213 |
+
β βΈ Settings β
|
| 214 |
+
βββββββββββββββββββ€
|
| 215 |
+
β SYSTEM STATUS β
|
| 216 |
+
β β Redis OK β β Green dot
|
| 217 |
+
β β ML Deps OK β
|
| 218 |
+
β β Cache 34% β
|
| 219 |
+
βββββββββββββββββββ
|
| 220 |
+
```
|
| 221 |
+
|
| 222 |
+
### Budget Mode Pills
|
| 223 |
+
```
|
| 224 |
+
[ CHEAP ] [ BALANCED ] [ QUALITY ]
|
| 225 |
+
```
|
| 226 |
+
- Inactive: `--bg-elevated` + `--text-secondary`
|
| 227 |
+
- Active CHEAP: Green fill
|
| 228 |
+
- Active BALANCED: Amber fill
|
| 229 |
+
- Active QUALITY: Cyan fill
|
| 230 |
+
|
| 231 |
+
### Metric Cards
|
| 232 |
+
```css
|
| 233 |
+
.metric-card {
|
| 234 |
+
background: var(--bg-surface);
|
| 235 |
+
border: 1px solid var(--bg-border);
|
| 236 |
+
border-radius: 8px;
|
| 237 |
+
padding: 20px 24px;
|
| 238 |
+
position: relative;
|
| 239 |
+
overflow: hidden;
|
| 240 |
+
}
|
| 241 |
+
.metric-card::before {
|
| 242 |
+
content: '';
|
| 243 |
+
position: absolute;
|
| 244 |
+
top: 0; left: 0; right: 0;
|
| 245 |
+
height: 2px;
|
| 246 |
+
background: var(--accent-cyan); /* or green/amber/purple per card */
|
| 247 |
+
}
|
| 248 |
+
```
|
| 249 |
+
|
| 250 |
+
### Status Dots
|
| 251 |
+
```css
|
| 252 |
+
.dot-live {
|
| 253 |
+
width: 8px; height: 8px;
|
| 254 |
+
border-radius: 50%;
|
| 255 |
+
background: var(--accent-green);
|
| 256 |
+
box-shadow: 0 0 8px var(--accent-green);
|
| 257 |
+
animation: pulse 2s infinite;
|
| 258 |
+
}
|
| 259 |
+
```
|
| 260 |
+
|
| 261 |
+
---
|
| 262 |
+
|
| 263 |
+
## Animation Spec
|
| 264 |
+
|
| 265 |
+
### Pipeline Stage Activation
|
| 266 |
+
```
|
| 267 |
+
Trigger: query submitted
|
| 268 |
+
Sequence:
|
| 269 |
+
t=0ms β CACHE node: border glows cyan, spinner starts
|
| 270 |
+
t=~200ms β CACHE completes (hit/miss), NLI node activates
|
| 271 |
+
t=~400ms β NLI completes, GBR node activates
|
| 272 |
+
...and so on until ROUTE
|
| 273 |
+
Final β Response panel fades in (opacity 0β1, translateY 8pxβ0, 300ms ease)
|
| 274 |
+
```
|
| 275 |
+
|
| 276 |
+
### Page Load
|
| 277 |
+
- Sidebar slides in from left (translateX -100% β 0, 400ms ease-out)
|
| 278 |
+
- KPI cards stagger in with 80ms delay each (opacity 0β1, translateY 16pxβ0)
|
| 279 |
+
- Chart areas draw from left (width 0β100%, 600ms ease-in-out)
|
| 280 |
+
|
| 281 |
+
### Hover States
|
| 282 |
+
- Cards: `border-color` transitions to `--accent-cyan` at 30% opacity
|
| 283 |
+
- Buttons: subtle scale(1.02) + glow intensification
|
| 284 |
+
- Table rows: `--bg-elevated` background fill
|
| 285 |
+
|
| 286 |
+
---
|
| 287 |
+
|
| 288 |
+
## Responsive Breakpoints
|
| 289 |
+
|
| 290 |
+
```
|
| 291 |
+
Desktop (β₯1280px) : Full 2-panel layout as described
|
| 292 |
+
Tablet (β₯768px) : Sidebar collapses to icon rail (48px)
|
| 293 |
+
Mobile (<768px) : Full-screen single column, bottom tab nav
|
| 294 |
+
```
|
| 295 |
+
|
| 296 |
+
---
|
| 297 |
+
|
| 298 |
+
## Tech Stack Recommendation
|
| 299 |
+
|
| 300 |
+
```
|
| 301 |
+
Framework : React 18 + TypeScript
|
| 302 |
+
Styling : Tailwind CSS + CSS custom properties for theming
|
| 303 |
+
Charts : Recharts (area, bar, sparklines)
|
| 304 |
+
Animation : Framer Motion (pipeline, page transitions)
|
| 305 |
+
Markdown : react-markdown + react-syntax-highlighter
|
| 306 |
+
Icons : Lucide React
|
| 307 |
+
API Client : axios / fetch with React Query for caching
|
| 308 |
+
State : Zustand (lightweight global state)
|
| 309 |
+
```
|
| 310 |
+
|
| 311 |
+
---
|
| 312 |
+
|
| 313 |
+
## Key UX Principles
|
| 314 |
+
|
| 315 |
+
1. **Show, don't tell** β the pipeline animation IS the explainability
|
| 316 |
+
2. **Every number has context** β cost shown alongside "vs GPT-4o baseline"
|
| 317 |
+
3. **Progressive disclosure** β simple by default, deep data on demand
|
| 318 |
+
4. **Zero loading skeletons** β use optimistic UI and instant local feedback
|
| 319 |
+
5. **Error states are designed** β not afterthoughts. Red glow on failed stages, clear recovery path.
|
| 320 |
+
|
| 321 |
+
---
|
| 322 |
+
|
| 323 |
+
## Sample Data / Placeholders
|
| 324 |
+
|
| 325 |
+
Use these for mockups:
|
| 326 |
+
|
| 327 |
+
```json
|
| 328 |
+
{
|
| 329 |
+
"query": "Write a recursive Fibonacci function in Rust",
|
| 330 |
+
"model_used": "claude-3-5-haiku-20241022",
|
| 331 |
+
"provider": "anthropic",
|
| 332 |
+
"complexity_score": 0.62,
|
| 333 |
+
"complexity_tier": "hard",
|
| 334 |
+
"estimated_cost": 0.001452,
|
| 335 |
+
"tokens_saved": 28,
|
| 336 |
+
"compression_ratio": 0.21,
|
| 337 |
+
"latency_ms": 1140,
|
| 338 |
+
"evaluation": {
|
| 339 |
+
"overall": 9.5,
|
| 340 |
+
"accuracy": 10.0,
|
| 341 |
+
"feedback": "The code is idiomatic and correctly implements recursion."
|
| 342 |
+
}
|
| 343 |
+
}
|
| 344 |
+
```
|
| 345 |
+
|
| 346 |
+
---
|
| 347 |
+
|
| 348 |
+
## Deliverables Checklist for Agent
|
| 349 |
+
|
| 350 |
+
- [ ] `App.tsx` β root layout with sidebar + router
|
| 351 |
+
- [ ] `Playground.tsx` β main query interface
|
| 352 |
+
- [ ] `PipelineVisualizer.tsx` β animated stage flow
|
| 353 |
+
- [ ] `ResponsePanel.tsx` β markdown response display
|
| 354 |
+
- [ ] `ExplainCard.tsx` β monospace terminal-style explanation
|
| 355 |
+
- [ ] `Analytics.tsx` β dashboard with charts
|
| 356 |
+
- [ ] `ModelRegistry.tsx` β model cards + table
|
| 357 |
+
- [ ] `Settings.tsx` β config form
|
| 358 |
+
- [ ] `theme.css` β all CSS variables
|
| 359 |
+
- [ ] `components/MetricCard.tsx`
|
| 360 |
+
- [ ] `components/BudgetPills.tsx`
|
| 361 |
+
- [ ] `components/StatusDot.tsx`
|
frontend/.gitignore
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Logs
|
| 2 |
+
logs
|
| 3 |
+
*.log
|
| 4 |
+
npm-debug.log*
|
| 5 |
+
yarn-debug.log*
|
| 6 |
+
yarn-error.log*
|
| 7 |
+
pnpm-debug.log*
|
| 8 |
+
lerna-debug.log*
|
| 9 |
+
|
| 10 |
+
node_modules
|
| 11 |
+
dist
|
| 12 |
+
dist-ssr
|
| 13 |
+
*.local
|
| 14 |
+
|
| 15 |
+
# Editor directories and files
|
| 16 |
+
.vscode/*
|
| 17 |
+
!.vscode/extensions.json
|
| 18 |
+
.idea
|
| 19 |
+
.DS_Store
|
| 20 |
+
*.suo
|
| 21 |
+
*.ntvs*
|
| 22 |
+
*.njsproj
|
| 23 |
+
*.sln
|
| 24 |
+
*.sw?
|
frontend/README.md
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# React + TypeScript + Vite
|
| 2 |
+
|
| 3 |
+
This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
|
| 4 |
+
|
| 5 |
+
Currently, two official plugins are available:
|
| 6 |
+
|
| 7 |
+
- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react/README.md) uses [Babel](https://babeljs.io/) for Fast Refresh
|
| 8 |
+
- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh
|
| 9 |
+
|
| 10 |
+
## Expanding the ESLint configuration
|
| 11 |
+
|
| 12 |
+
If you are developing a production application, we recommend updating the configuration to enable type aware lint rules:
|
| 13 |
+
|
| 14 |
+
- Configure the top-level `parserOptions` property like this:
|
| 15 |
+
|
| 16 |
+
```js
|
| 17 |
+
export default tseslint.config({
|
| 18 |
+
languageOptions: {
|
| 19 |
+
// other options...
|
| 20 |
+
parserOptions: {
|
| 21 |
+
project: ['./tsconfig.node.json', './tsconfig.app.json'],
|
| 22 |
+
tsconfigRootDir: import.meta.dirname,
|
| 23 |
+
},
|
| 24 |
+
},
|
| 25 |
+
})
|
| 26 |
+
```
|
| 27 |
+
|
| 28 |
+
- Replace `tseslint.configs.recommended` to `tseslint.configs.recommendedTypeChecked` or `tseslint.configs.strictTypeChecked`
|
| 29 |
+
- Optionally add `...tseslint.configs.stylisticTypeChecked`
|
| 30 |
+
- Install [eslint-plugin-react](https://github.com/jsx-eslint/eslint-plugin-react) and update the config:
|
| 31 |
+
|
| 32 |
+
```js
|
| 33 |
+
// eslint.config.js
|
| 34 |
+
import react from 'eslint-plugin-react'
|
| 35 |
+
|
| 36 |
+
export default tseslint.config({
|
| 37 |
+
// Set the react version
|
| 38 |
+
settings: { react: { version: '18.3' } },
|
| 39 |
+
plugins: {
|
| 40 |
+
// Add the react plugin
|
| 41 |
+
react,
|
| 42 |
+
},
|
| 43 |
+
rules: {
|
| 44 |
+
// other rules...
|
| 45 |
+
// Enable its recommended rules
|
| 46 |
+
...react.configs.recommended.rules,
|
| 47 |
+
...react.configs['jsx-runtime'].rules,
|
| 48 |
+
},
|
| 49 |
+
})
|
| 50 |
+
```
|
frontend/eslint.config.js
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import js from '@eslint/js'
|
| 2 |
+
import globals from 'globals'
|
| 3 |
+
import reactHooks from 'eslint-plugin-react-hooks'
|
| 4 |
+
import reactRefresh from 'eslint-plugin-react-refresh'
|
| 5 |
+
import tseslint from 'typescript-eslint'
|
| 6 |
+
|
| 7 |
+
export default tseslint.config(
|
| 8 |
+
{ ignores: ['dist'] },
|
| 9 |
+
{
|
| 10 |
+
extends: [js.configs.recommended, ...tseslint.configs.recommended],
|
| 11 |
+
files: ['**/*.{ts,tsx}'],
|
| 12 |
+
languageOptions: {
|
| 13 |
+
ecmaVersion: 2020,
|
| 14 |
+
globals: globals.browser,
|
| 15 |
+
},
|
| 16 |
+
plugins: {
|
| 17 |
+
'react-hooks': reactHooks,
|
| 18 |
+
'react-refresh': reactRefresh,
|
| 19 |
+
},
|
| 20 |
+
rules: {
|
| 21 |
+
...reactHooks.configs.recommended.rules,
|
| 22 |
+
'react-refresh/only-export-components': [
|
| 23 |
+
'warn',
|
| 24 |
+
{ allowConstantExport: true },
|
| 25 |
+
],
|
| 26 |
+
},
|
| 27 |
+
},
|
| 28 |
+
)
|
frontend/index.html
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!doctype html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8" />
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
| 6 |
+
<title>LLMOpt β Adaptive LLM Inference Optimization</title>
|
| 7 |
+
<meta name="description" content="LLMOpt is an enterprise-grade LLM gateway that intelligently routes queries to the most cost-effective model, saving costs while maintaining quality." />
|
| 8 |
+
<link rel="preconnect" href="https://fonts.googleapis.com" />
|
| 9 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
|
| 10 |
+
<link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;500;600;700&family=DM+Sans:wght@300;400;500;600;700&family=Fira+Code:wght@400;500&display=swap" rel="stylesheet" />
|
| 11 |
+
</head>
|
| 12 |
+
<body>
|
| 13 |
+
<div id="root"></div>
|
| 14 |
+
<script type="module" src="/src/main.tsx"></script>
|
| 15 |
+
</body>
|
| 16 |
+
</html>
|
frontend/package-lock.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
frontend/package.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "frontend",
|
| 3 |
+
"private": true,
|
| 4 |
+
"version": "0.0.0",
|
| 5 |
+
"type": "module",
|
| 6 |
+
"scripts": {
|
| 7 |
+
"dev": "vite",
|
| 8 |
+
"build": "tsc -b && vite build",
|
| 9 |
+
"lint": "eslint .",
|
| 10 |
+
"preview": "vite preview"
|
| 11 |
+
},
|
| 12 |
+
"dependencies": {
|
| 13 |
+
"@types/react-syntax-highlighter": "^15.5.13",
|
| 14 |
+
"framer-motion": "^12.40.0",
|
| 15 |
+
"lucide-react": "^1.16.0",
|
| 16 |
+
"react": "^18.3.1",
|
| 17 |
+
"react-dom": "^18.3.1",
|
| 18 |
+
"react-markdown": "^10.1.0",
|
| 19 |
+
"react-router-dom": "^7.15.1",
|
| 20 |
+
"react-syntax-highlighter": "^16.1.1",
|
| 21 |
+
"recharts": "^3.8.1",
|
| 22 |
+
"zustand": "^5.0.13"
|
| 23 |
+
},
|
| 24 |
+
"devDependencies": {
|
| 25 |
+
"@eslint/js": "^9.13.0",
|
| 26 |
+
"@types/node": "^25.9.1",
|
| 27 |
+
"@types/react": "^18.3.12",
|
| 28 |
+
"@types/react-dom": "^18.3.1",
|
| 29 |
+
"@vitejs/plugin-react": "^4.3.3",
|
| 30 |
+
"eslint": "^9.13.0",
|
| 31 |
+
"eslint-plugin-react-hooks": "^5.0.0",
|
| 32 |
+
"eslint-plugin-react-refresh": "^0.4.14",
|
| 33 |
+
"globals": "^15.11.0",
|
| 34 |
+
"typescript": "~5.6.2",
|
| 35 |
+
"typescript-eslint": "^8.11.0",
|
| 36 |
+
"vite": "^5.4.10"
|
| 37 |
+
}
|
| 38 |
+
}
|
frontend/public/vite.svg
ADDED
|
|
frontend/src/App.css
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#root {
|
| 2 |
+
max-width: 1280px;
|
| 3 |
+
margin: 0 auto;
|
| 4 |
+
padding: 2rem;
|
| 5 |
+
text-align: center;
|
| 6 |
+
}
|
| 7 |
+
|
| 8 |
+
.logo {
|
| 9 |
+
height: 6em;
|
| 10 |
+
padding: 1.5em;
|
| 11 |
+
will-change: filter;
|
| 12 |
+
transition: filter 300ms;
|
| 13 |
+
}
|
| 14 |
+
.logo:hover {
|
| 15 |
+
filter: drop-shadow(0 0 2em #646cffaa);
|
| 16 |
+
}
|
| 17 |
+
.logo.react:hover {
|
| 18 |
+
filter: drop-shadow(0 0 2em #61dafbaa);
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
@keyframes logo-spin {
|
| 22 |
+
from {
|
| 23 |
+
transform: rotate(0deg);
|
| 24 |
+
}
|
| 25 |
+
to {
|
| 26 |
+
transform: rotate(360deg);
|
| 27 |
+
}
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
@media (prefers-reduced-motion: no-preference) {
|
| 31 |
+
a:nth-of-type(2) .logo {
|
| 32 |
+
animation: logo-spin infinite 20s linear;
|
| 33 |
+
}
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
.card {
|
| 37 |
+
padding: 2em;
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
.read-the-docs {
|
| 41 |
+
color: #888;
|
| 42 |
+
}
|
frontend/src/App.tsx
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { BrowserRouter, Routes, Route, NavLink, useLocation } from 'react-router-dom';
|
| 2 |
+
import { motion, AnimatePresence } from 'framer-motion';
|
| 3 |
+
import { useEffect } from 'react';
|
| 4 |
+
import { Zap, LayoutDashboard, BarChart3, Database, Settings, LogOut, ChevronLeft, ChevronRight } from 'lucide-react';
|
| 5 |
+
import { useAppStore } from './store';
|
| 6 |
+
import { api, getStoredSession, setStoredSession } from './api';
|
| 7 |
+
|
| 8 |
+
import Playground from './pages/Playground';
|
| 9 |
+
import Analytics from './pages/Analytics';
|
| 10 |
+
import ModelRegistry from './pages/ModelRegistry';
|
| 11 |
+
import SettingsPage from './pages/Settings';
|
| 12 |
+
import LoginPage from './pages/Login';
|
| 13 |
+
|
| 14 |
+
function Sidebar({ collapsed, setCollapsed }: { collapsed: boolean; setCollapsed: (v: boolean) => void }) {
|
| 15 |
+
const { health, auth, setAuth } = useAppStore();
|
| 16 |
+
const location = useLocation();
|
| 17 |
+
|
| 18 |
+
const navItems = [
|
| 19 |
+
{ to: '/', icon: <LayoutDashboard size={18} />, label: 'Playground', exact: true },
|
| 20 |
+
{ to: '/analytics', icon: <BarChart3 size={18} />, label: 'Analytics' },
|
| 21 |
+
{ to: '/models', icon: <Database size={18} />, label: 'Models' },
|
| 22 |
+
{ to: '/settings', icon: <Settings size={18} />, label: 'Settings' },
|
| 23 |
+
];
|
| 24 |
+
|
| 25 |
+
const handleLogout = async () => {
|
| 26 |
+
try {
|
| 27 |
+
await api.logout();
|
| 28 |
+
} catch (_) { /* ignore */ }
|
| 29 |
+
setStoredSession(null);
|
| 30 |
+
setAuth({ isLoggedIn: false, sessionId: null });
|
| 31 |
+
};
|
| 32 |
+
|
| 33 |
+
const dotClass = (s: string) =>
|
| 34 |
+
s === 'ok' ? 'dot dot-live' : s === 'error' ? 'dot dot-error' : 'dot dot-muted';
|
| 35 |
+
|
| 36 |
+
return (
|
| 37 |
+
<motion.aside
|
| 38 |
+
className={`sidebar${collapsed ? ' collapsed' : ''}`}
|
| 39 |
+
initial={false}
|
| 40 |
+
animate={{ width: collapsed ? 56 : 280 }}
|
| 41 |
+
transition={{ duration: 0.25, ease: 'easeInOut' }}
|
| 42 |
+
>
|
| 43 |
+
{/* Logo */}
|
| 44 |
+
<div className="sidebar-logo">
|
| 45 |
+
<div className="sidebar-logo-icon"><Zap size={22} fill="currentColor" /></div>
|
| 46 |
+
{!collapsed && (
|
| 47 |
+
<div className="sidebar-logo-text">LLM<span>Opt</span></div>
|
| 48 |
+
)}
|
| 49 |
+
<button
|
| 50 |
+
onClick={() => setCollapsed(!collapsed)}
|
| 51 |
+
style={{
|
| 52 |
+
marginLeft: 'auto',
|
| 53 |
+
background: 'transparent',
|
| 54 |
+
border: 'none',
|
| 55 |
+
color: 'var(--text-muted)',
|
| 56 |
+
cursor: 'pointer',
|
| 57 |
+
display: 'flex',
|
| 58 |
+
alignItems: 'center',
|
| 59 |
+
padding: '4px',
|
| 60 |
+
borderRadius: '4px',
|
| 61 |
+
flexShrink: 0,
|
| 62 |
+
}}
|
| 63 |
+
>
|
| 64 |
+
{collapsed ? <ChevronRight size={16} /> : <ChevronLeft size={16} />}
|
| 65 |
+
</button>
|
| 66 |
+
</div>
|
| 67 |
+
|
| 68 |
+
{/* Nav */}
|
| 69 |
+
<nav className="sidebar-nav">
|
| 70 |
+
{!collapsed && <div className="sidebar-section-label">Navigation</div>}
|
| 71 |
+
{navItems.map((item) => {
|
| 72 |
+
const isActive = item.exact
|
| 73 |
+
? location.pathname === item.to
|
| 74 |
+
: location.pathname.startsWith(item.to) && item.to !== '/';
|
| 75 |
+
return (
|
| 76 |
+
<NavLink
|
| 77 |
+
key={item.to}
|
| 78 |
+
to={item.to}
|
| 79 |
+
className={`sidebar-nav-item${isActive ? ' active' : ''}`}
|
| 80 |
+
data-tooltip={collapsed ? item.label : undefined}
|
| 81 |
+
>
|
| 82 |
+
<span className="sidebar-nav-icon">{item.icon}</span>
|
| 83 |
+
{!collapsed && <span>{item.label}</span>}
|
| 84 |
+
</NavLink>
|
| 85 |
+
);
|
| 86 |
+
})}
|
| 87 |
+
|
| 88 |
+
<div style={{ flex: 1 }} />
|
| 89 |
+
|
| 90 |
+
{auth.isLoggedIn && (
|
| 91 |
+
<button
|
| 92 |
+
className="sidebar-nav-item"
|
| 93 |
+
onClick={handleLogout}
|
| 94 |
+
data-tooltip={collapsed ? 'Sign Out' : undefined}
|
| 95 |
+
>
|
| 96 |
+
<span className="sidebar-nav-icon"><LogOut size={18} /></span>
|
| 97 |
+
{!collapsed && <span>Sign Out</span>}
|
| 98 |
+
</button>
|
| 99 |
+
)}
|
| 100 |
+
</nav>
|
| 101 |
+
|
| 102 |
+
{/* Status */}
|
| 103 |
+
{!collapsed && (
|
| 104 |
+
<div className="sidebar-status">
|
| 105 |
+
<div className="sidebar-status-title">System Status</div>
|
| 106 |
+
<div className="sidebar-status-item">
|
| 107 |
+
<span className={dotClass(health.api)} />
|
| 108 |
+
<span>API</span>
|
| 109 |
+
<span style={{ marginLeft: 'auto', color: health.api === 'ok' ? 'var(--accent-green)' : 'var(--accent-red)' }}>
|
| 110 |
+
{health.api.toUpperCase()}
|
| 111 |
+
</span>
|
| 112 |
+
</div>
|
| 113 |
+
<div className="sidebar-status-item">
|
| 114 |
+
<span className={dotClass(health.redis)} />
|
| 115 |
+
<span>Redis</span>
|
| 116 |
+
<span style={{ marginLeft: 'auto', color: health.redis === 'ok' ? 'var(--accent-green)' : 'var(--text-muted)' }}>
|
| 117 |
+
{health.redis.toUpperCase()}
|
| 118 |
+
</span>
|
| 119 |
+
</div>
|
| 120 |
+
<div className="sidebar-status-item">
|
| 121 |
+
<span className={dotClass(health.ml_deps)} />
|
| 122 |
+
<span>ML Deps</span>
|
| 123 |
+
<span style={{ marginLeft: 'auto', color: health.ml_deps === 'ok' ? 'var(--accent-green)' : 'var(--text-muted)' }}>
|
| 124 |
+
{health.ml_deps.toUpperCase()}
|
| 125 |
+
</span>
|
| 126 |
+
</div>
|
| 127 |
+
</div>
|
| 128 |
+
)}
|
| 129 |
+
</motion.aside>
|
| 130 |
+
);
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
function AppShell() {
|
| 134 |
+
const { auth, setAuth, sidebarCollapsed, setSidebarCollapsed, setConnectedProviders } = useAppStore();
|
| 135 |
+
const location = useLocation();
|
| 136 |
+
|
| 137 |
+
// Restore session from localStorage on startup
|
| 138 |
+
useEffect(() => {
|
| 139 |
+
const stored = getStoredSession();
|
| 140 |
+
if (stored) {
|
| 141 |
+
setAuth({ isLoggedIn: true, sessionId: stored });
|
| 142 |
+
}
|
| 143 |
+
}, []);
|
| 144 |
+
|
| 145 |
+
// Poll health
|
| 146 |
+
useEffect(() => {
|
| 147 |
+
const check = async () => {
|
| 148 |
+
try {
|
| 149 |
+
await api.health();
|
| 150 |
+
} catch { /* ignore */ }
|
| 151 |
+
};
|
| 152 |
+
check();
|
| 153 |
+
const t = setInterval(check, 30000);
|
| 154 |
+
return () => clearInterval(t);
|
| 155 |
+
}, []);
|
| 156 |
+
|
| 157 |
+
// Poll connected providers if logged in
|
| 158 |
+
useEffect(() => {
|
| 159 |
+
if (!auth.isLoggedIn) return;
|
| 160 |
+
const check = async () => {
|
| 161 |
+
try {
|
| 162 |
+
const data = await api.getKeys();
|
| 163 |
+
setConnectedProviders(data.connected_providers);
|
| 164 |
+
} catch (_) {}
|
| 165 |
+
};
|
| 166 |
+
check();
|
| 167 |
+
}, [auth.isLoggedIn, setConnectedProviders]);
|
| 168 |
+
|
| 169 |
+
if (!auth.isLoggedIn) {
|
| 170 |
+
return (
|
| 171 |
+
<AnimatePresence mode="wait">
|
| 172 |
+
<motion.div
|
| 173 |
+
key="login"
|
| 174 |
+
initial={{ opacity: 0 }}
|
| 175 |
+
animate={{ opacity: 1 }}
|
| 176 |
+
exit={{ opacity: 0 }}
|
| 177 |
+
transition={{ duration: 0.3 }}
|
| 178 |
+
>
|
| 179 |
+
<LoginPage />
|
| 180 |
+
</motion.div>
|
| 181 |
+
</AnimatePresence>
|
| 182 |
+
);
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
return (
|
| 186 |
+
<div className="app-layout">
|
| 187 |
+
<Sidebar collapsed={sidebarCollapsed} setCollapsed={setSidebarCollapsed} />
|
| 188 |
+
<main className={`main-content${sidebarCollapsed ? ' sidebar-collapsed' : ''}`}>
|
| 189 |
+
<AnimatePresence mode="wait">
|
| 190 |
+
<motion.div
|
| 191 |
+
key={location.pathname}
|
| 192 |
+
initial={{ opacity: 0, y: 8 }}
|
| 193 |
+
animate={{ opacity: 1, y: 0 }}
|
| 194 |
+
exit={{ opacity: 0, y: -8 }}
|
| 195 |
+
transition={{ duration: 0.2 }}
|
| 196 |
+
style={{ flex: 1, overflow: 'hidden', display: 'flex', flexDirection: 'column', minHeight: 0 }}
|
| 197 |
+
>
|
| 198 |
+
<Routes>
|
| 199 |
+
<Route path="/" element={<Playground />} />
|
| 200 |
+
<Route path="/analytics" element={<Analytics />} />
|
| 201 |
+
<Route path="/models" element={<ModelRegistry />} />
|
| 202 |
+
<Route path="/settings" element={<SettingsPage />} />
|
| 203 |
+
</Routes>
|
| 204 |
+
</motion.div>
|
| 205 |
+
</AnimatePresence>
|
| 206 |
+
</main>
|
| 207 |
+
</div>
|
| 208 |
+
);
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
export default function App() {
|
| 212 |
+
return (
|
| 213 |
+
<BrowserRouter basename="/ui">
|
| 214 |
+
<AppShell />
|
| 215 |
+
</BrowserRouter>
|
| 216 |
+
);
|
| 217 |
+
}
|
frontend/src/api.ts
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// API client for LLMOpt backend
|
| 2 |
+
// Session ID is stored in localStorage and sent as Authorization: Bearer <token>
|
| 3 |
+
// This avoids httponly cookie issues on localhost
|
| 4 |
+
import type {
|
| 5 |
+
GenerateRequest,
|
| 6 |
+
GenerateResponse,
|
| 7 |
+
ExplainResponse,
|
| 8 |
+
HistoryItem,
|
| 9 |
+
DashboardStats,
|
| 10 |
+
ModelSpec,
|
| 11 |
+
} from './types';
|
| 12 |
+
|
| 13 |
+
const BASE = ''; // same-origin (served by FastAPI or proxied by Vite)
|
| 14 |
+
|
| 15 |
+
// Get session token from localStorage
|
| 16 |
+
export function getStoredSession(): string | null {
|
| 17 |
+
return localStorage.getItem('llmopt_session');
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
export function setStoredSession(id: string | null) {
|
| 21 |
+
if (id) localStorage.setItem('llmopt_session', id);
|
| 22 |
+
else localStorage.removeItem('llmopt_session');
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
async function request<T>(
|
| 26 |
+
path: string,
|
| 27 |
+
options: RequestInit = {},
|
| 28 |
+
requiresAuth = true,
|
| 29 |
+
): Promise<T> {
|
| 30 |
+
const headers: Record<string, string> = {
|
| 31 |
+
'Content-Type': 'application/json',
|
| 32 |
+
...(options.headers as Record<string, string> || {}),
|
| 33 |
+
};
|
| 34 |
+
|
| 35 |
+
// Inject session token as Bearer header
|
| 36 |
+
if (requiresAuth) {
|
| 37 |
+
const session = getStoredSession();
|
| 38 |
+
if (session) {
|
| 39 |
+
headers['Authorization'] = `Bearer ${session}`;
|
| 40 |
+
}
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
const res = await fetch(`${BASE}${path}`, {
|
| 44 |
+
credentials: 'include',
|
| 45 |
+
...options,
|
| 46 |
+
headers,
|
| 47 |
+
});
|
| 48 |
+
|
| 49 |
+
if (!res.ok) {
|
| 50 |
+
const body = await res.json().catch(() => ({ detail: res.statusText }));
|
| 51 |
+
const err = new Error(body.detail || `HTTP ${res.status}`);
|
| 52 |
+
(err as any).status = res.status;
|
| 53 |
+
throw err;
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
return res.json();
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
export const api = {
|
| 60 |
+
health: () => request<{ status: string; version: string }>('/health', {}, false),
|
| 61 |
+
|
| 62 |
+
generate: (req: GenerateRequest) =>
|
| 63 |
+
request<GenerateResponse>('/generate', {
|
| 64 |
+
method: 'POST',
|
| 65 |
+
body: JSON.stringify(req),
|
| 66 |
+
}),
|
| 67 |
+
|
| 68 |
+
explain: (
|
| 69 |
+
query: string,
|
| 70 |
+
budget_mode: string,
|
| 71 |
+
params?: {
|
| 72 |
+
alpha?: number;
|
| 73 |
+
beta?: number;
|
| 74 |
+
gamma?: number;
|
| 75 |
+
compression_enabled?: boolean;
|
| 76 |
+
exclude_providers?: string[];
|
| 77 |
+
only_providers?: string[];
|
| 78 |
+
}
|
| 79 |
+
) =>
|
| 80 |
+
request<ExplainResponse>('/explain', {
|
| 81 |
+
method: 'POST',
|
| 82 |
+
body: JSON.stringify({ query, budget_mode, ...params }),
|
| 83 |
+
}), // explain requires auth to access session keys
|
| 84 |
+
|
| 85 |
+
models: () =>
|
| 86 |
+
request<{ models: ModelSpec[] }>('/models', {}, false),
|
| 87 |
+
|
| 88 |
+
// Auth
|
| 89 |
+
register: (email: string, password: string) =>
|
| 90 |
+
request<{ message: string }>('/auth/register', {
|
| 91 |
+
method: 'POST',
|
| 92 |
+
body: JSON.stringify({ email, password }),
|
| 93 |
+
}, false),
|
| 94 |
+
|
| 95 |
+
login: (email: string, password: string) =>
|
| 96 |
+
request<{ message: string; session_id: string }>('/auth/login', {
|
| 97 |
+
method: 'POST',
|
| 98 |
+
body: JSON.stringify({ email, password }),
|
| 99 |
+
}, false),
|
| 100 |
+
|
| 101 |
+
logout: () =>
|
| 102 |
+
request<{ message: string }>('/auth/logout', { method: 'POST' }),
|
| 103 |
+
|
| 104 |
+
getKeys: () =>
|
| 105 |
+
request<{ connected_providers: string[] }>('/auth/keys'),
|
| 106 |
+
|
| 107 |
+
updateKeys: (api_keys: Record<string, string>) =>
|
| 108 |
+
request<{ message: string }>('/auth/keys', {
|
| 109 |
+
method: 'POST',
|
| 110 |
+
body: JSON.stringify({ api_keys }),
|
| 111 |
+
}),
|
| 112 |
+
|
| 113 |
+
deleteKey: (provider: string) =>
|
| 114 |
+
request<{ message: string }>(`/auth/keys/${provider}`, {
|
| 115 |
+
method: 'DELETE',
|
| 116 |
+
}),
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
getDashboardStats: () =>
|
| 120 |
+
request<DashboardStats>('/auth/dashboard-stats'),
|
| 121 |
+
|
| 122 |
+
getHistory: () =>
|
| 123 |
+
request<HistoryItem[]>('/auth/history'),
|
| 124 |
+
|
| 125 |
+
// Streaming
|
| 126 |
+
stream: async (req: GenerateRequest, onChunk: (chunk: string) => void) => {
|
| 127 |
+
const session = getStoredSession();
|
| 128 |
+
const headers: Record<string, string> = { 'Content-Type': 'application/json' };
|
| 129 |
+
if (session) headers['Authorization'] = `Bearer ${session}`;
|
| 130 |
+
|
| 131 |
+
const res = await fetch('/stream', {
|
| 132 |
+
method: 'POST',
|
| 133 |
+
credentials: 'include',
|
| 134 |
+
headers,
|
| 135 |
+
body: JSON.stringify(req),
|
| 136 |
+
});
|
| 137 |
+
|
| 138 |
+
if (!res.ok) {
|
| 139 |
+
const body = await res.json().catch(() => ({ detail: res.statusText }));
|
| 140 |
+
throw new Error(body.detail || `HTTP ${res.status}`);
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
const reader = res.body?.getReader();
|
| 144 |
+
const decoder = new TextDecoder();
|
| 145 |
+
if (!reader) return;
|
| 146 |
+
|
| 147 |
+
while (true) {
|
| 148 |
+
const { done, value } = await reader.read();
|
| 149 |
+
if (done) break;
|
| 150 |
+
onChunk(decoder.decode(value, { stream: true }));
|
| 151 |
+
}
|
| 152 |
+
},
|
| 153 |
+
};
|
frontend/src/assets/react.svg
ADDED
|
|
frontend/src/index.css
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
:root {
|
| 2 |
+
font-family: Inter, system-ui, Avenir, Helvetica, Arial, sans-serif;
|
| 3 |
+
line-height: 1.5;
|
| 4 |
+
font-weight: 400;
|
| 5 |
+
|
| 6 |
+
color-scheme: light dark;
|
| 7 |
+
color: rgba(255, 255, 255, 0.87);
|
| 8 |
+
background-color: #242424;
|
| 9 |
+
|
| 10 |
+
font-synthesis: none;
|
| 11 |
+
text-rendering: optimizeLegibility;
|
| 12 |
+
-webkit-font-smoothing: antialiased;
|
| 13 |
+
-moz-osx-font-smoothing: grayscale;
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
a {
|
| 17 |
+
font-weight: 500;
|
| 18 |
+
color: #646cff;
|
| 19 |
+
text-decoration: inherit;
|
| 20 |
+
}
|
| 21 |
+
a:hover {
|
| 22 |
+
color: #535bf2;
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
body {
|
| 26 |
+
margin: 0;
|
| 27 |
+
display: flex;
|
| 28 |
+
place-items: center;
|
| 29 |
+
min-width: 320px;
|
| 30 |
+
min-height: 100vh;
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
h1 {
|
| 34 |
+
font-size: 3.2em;
|
| 35 |
+
line-height: 1.1;
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
button {
|
| 39 |
+
border-radius: 8px;
|
| 40 |
+
border: 1px solid transparent;
|
| 41 |
+
padding: 0.6em 1.2em;
|
| 42 |
+
font-size: 1em;
|
| 43 |
+
font-weight: 500;
|
| 44 |
+
font-family: inherit;
|
| 45 |
+
background-color: #1a1a1a;
|
| 46 |
+
cursor: pointer;
|
| 47 |
+
transition: border-color 0.25s;
|
| 48 |
+
}
|
| 49 |
+
button:hover {
|
| 50 |
+
border-color: #646cff;
|
| 51 |
+
}
|
| 52 |
+
button:focus,
|
| 53 |
+
button:focus-visible {
|
| 54 |
+
outline: 4px auto -webkit-focus-ring-color;
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
@media (prefers-color-scheme: light) {
|
| 58 |
+
:root {
|
| 59 |
+
color: #213547;
|
| 60 |
+
background-color: #ffffff;
|
| 61 |
+
}
|
| 62 |
+
a:hover {
|
| 63 |
+
color: #747bff;
|
| 64 |
+
}
|
| 65 |
+
button {
|
| 66 |
+
background-color: #f9f9f9;
|
| 67 |
+
}
|
| 68 |
+
}
|
frontend/src/main.tsx
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { StrictMode } from 'react'
|
| 2 |
+
import { createRoot } from 'react-dom/client'
|
| 3 |
+
import './theme.css'
|
| 4 |
+
import App from './App.tsx'
|
| 5 |
+
|
| 6 |
+
createRoot(document.getElementById('root')!).render(
|
| 7 |
+
<StrictMode>
|
| 8 |
+
<App />
|
| 9 |
+
</StrictMode>,
|
| 10 |
+
)
|
frontend/src/pages/Analytics.tsx
ADDED
|
@@ -0,0 +1,385 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { useEffect, useState } from 'react';
|
| 2 |
+
import { motion } from 'framer-motion';
|
| 3 |
+
import { BarChart3, TrendingUp, TrendingDown } from 'lucide-react';
|
| 4 |
+
import {
|
| 5 |
+
AreaChart,
|
| 6 |
+
Area,
|
| 7 |
+
BarChart,
|
| 8 |
+
Bar,
|
| 9 |
+
XAxis,
|
| 10 |
+
YAxis,
|
| 11 |
+
CartesianGrid,
|
| 12 |
+
Tooltip,
|
| 13 |
+
ResponsiveContainer,
|
| 14 |
+
Cell,
|
| 15 |
+
Legend,
|
| 16 |
+
} from 'recharts';
|
| 17 |
+
import { api } from '../api';
|
| 18 |
+
import type { DashboardStats, HistoryItem } from '../types';
|
| 19 |
+
|
| 20 |
+
// βββ Mock time-series data (since backend doesn't expose it yet) ββββββββββββββ
|
| 21 |
+
|
| 22 |
+
function generateDailyData(days: number, base: number, variance: number) {
|
| 23 |
+
const now = new Date();
|
| 24 |
+
return Array.from({ length: days }, (_, i) => {
|
| 25 |
+
const d = new Date(now);
|
| 26 |
+
d.setDate(d.getDate() - (days - 1 - i));
|
| 27 |
+
return {
|
| 28 |
+
date: d.toLocaleDateString('en-US', { month: 'short', day: 'numeric' }),
|
| 29 |
+
actual: Math.max(0, base + (Math.random() - 0.5) * variance),
|
| 30 |
+
baseline: base * 8,
|
| 31 |
+
};
|
| 32 |
+
});
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
const PROVIDER_COLORS: Record<string, string> = {
|
| 36 |
+
openai: '#00E5FF',
|
| 37 |
+
anthropic: '#7C4DFF',
|
| 38 |
+
google: '#00FF94',
|
| 39 |
+
ollama: '#FFB300',
|
| 40 |
+
cohere: '#FF3D57',
|
| 41 |
+
other: '#7A8299',
|
| 42 |
+
};
|
| 43 |
+
|
| 44 |
+
// βββ Custom Tooltip βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 45 |
+
|
| 46 |
+
const CustomTooltip = ({ active, payload, label }: any) => {
|
| 47 |
+
if (!active || !payload?.length) return null;
|
| 48 |
+
return (
|
| 49 |
+
<div style={{
|
| 50 |
+
background: 'var(--bg-elevated)',
|
| 51 |
+
border: '1px solid var(--bg-border)',
|
| 52 |
+
borderRadius: '8px',
|
| 53 |
+
padding: '12px 16px',
|
| 54 |
+
fontSize: 'var(--text-xs)',
|
| 55 |
+
fontFamily: 'JetBrains Mono, monospace',
|
| 56 |
+
}}>
|
| 57 |
+
<div style={{ color: 'var(--text-secondary)', marginBottom: 8 }}>{label}</div>
|
| 58 |
+
{payload.map((p: any) => (
|
| 59 |
+
<div key={p.name} style={{ color: p.color, marginBottom: 4 }}>
|
| 60 |
+
{p.name}: ${typeof p.value === 'number' ? p.value.toFixed(4) : p.value}
|
| 61 |
+
</div>
|
| 62 |
+
))}
|
| 63 |
+
</div>
|
| 64 |
+
);
|
| 65 |
+
};
|
| 66 |
+
|
| 67 |
+
// βββ KPI Card ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 68 |
+
|
| 69 |
+
function KPICard({
|
| 70 |
+
label,
|
| 71 |
+
value,
|
| 72 |
+
delta,
|
| 73 |
+
deltaPositive,
|
| 74 |
+
color,
|
| 75 |
+
delay,
|
| 76 |
+
}: {
|
| 77 |
+
label: string;
|
| 78 |
+
value: string;
|
| 79 |
+
delta?: string;
|
| 80 |
+
deltaPositive?: boolean;
|
| 81 |
+
color: string;
|
| 82 |
+
delay: number;
|
| 83 |
+
}) {
|
| 84 |
+
return (
|
| 85 |
+
<motion.div
|
| 86 |
+
className={`metric-card ${color}`}
|
| 87 |
+
initial={{ opacity: 0, y: 16 }}
|
| 88 |
+
animate={{ opacity: 1, y: 0 }}
|
| 89 |
+
transition={{ delay, duration: 0.4 }}
|
| 90 |
+
>
|
| 91 |
+
<div className="metric-card-label">{label}</div>
|
| 92 |
+
<div className="metric-card-value">{value}</div>
|
| 93 |
+
{delta && (
|
| 94 |
+
<div className={`metric-card-delta ${deltaPositive ? 'delta-up' : 'delta-down'}`}>
|
| 95 |
+
{deltaPositive ? <TrendingUp size={12} /> : <TrendingDown size={12} />}
|
| 96 |
+
{delta}
|
| 97 |
+
</div>
|
| 98 |
+
)}
|
| 99 |
+
</motion.div>
|
| 100 |
+
);
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
// βββ Query Log Table ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 104 |
+
|
| 105 |
+
function ComplexityBar({ score }: { score: number }) {
|
| 106 |
+
const color =
|
| 107 |
+
score < 0.4 ? 'var(--accent-green)' :
|
| 108 |
+
score < 0.7 ? 'var(--accent-amber)' :
|
| 109 |
+
'var(--accent-red)';
|
| 110 |
+
|
| 111 |
+
return (
|
| 112 |
+
<div className="complexity-bar">
|
| 113 |
+
<div className="complexity-bar-track">
|
| 114 |
+
<div
|
| 115 |
+
className="complexity-bar-fill"
|
| 116 |
+
style={{ width: `${score * 100}%`, background: color }}
|
| 117 |
+
/>
|
| 118 |
+
</div>
|
| 119 |
+
<span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)', color }}>
|
| 120 |
+
{(score * 100).toFixed(0)}
|
| 121 |
+
</span>
|
| 122 |
+
</div>
|
| 123 |
+
);
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
function QueryLogTable({ items }: { items: HistoryItem[] }) {
|
| 129 |
+
const [expanded, setExpanded] = useState<number | null>(null);
|
| 130 |
+
|
| 131 |
+
if (items.length === 0) {
|
| 132 |
+
return (
|
| 133 |
+
<div className="empty-state">
|
| 134 |
+
<div className="empty-state-icon">π</div>
|
| 135 |
+
<div className="empty-state-title">No Query History</div>
|
| 136 |
+
<div className="empty-state-desc">Run queries in the Playground to see them here.</div>
|
| 137 |
+
</div>
|
| 138 |
+
);
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
return (
|
| 142 |
+
<div style={{ overflowX: 'auto' }}>
|
| 143 |
+
<table className="data-table">
|
| 144 |
+
<thead>
|
| 145 |
+
<tr>
|
| 146 |
+
<th>Time</th>
|
| 147 |
+
<th>Query</th>
|
| 148 |
+
<th>Complexity</th>
|
| 149 |
+
<th>Model</th>
|
| 150 |
+
<th>Cost</th>
|
| 151 |
+
<th>Latency</th>
|
| 152 |
+
<th>Tier</th>
|
| 153 |
+
</tr>
|
| 154 |
+
</thead>
|
| 155 |
+
<tbody>
|
| 156 |
+
{items.map((item) => (
|
| 157 |
+
<>
|
| 158 |
+
<tr key={item.id} onClick={() => setExpanded(expanded === item.id ? null : item.id)}>
|
| 159 |
+
<td>
|
| 160 |
+
<span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)', color: 'var(--text-muted)' }}>
|
| 161 |
+
{item.time_ago}
|
| 162 |
+
</span>
|
| 163 |
+
</td>
|
| 164 |
+
<td>
|
| 165 |
+
<div className="truncate" style={{ maxWidth: 240, fontSize: 'var(--text-sm)' }}>
|
| 166 |
+
{item.query}
|
| 167 |
+
</div>
|
| 168 |
+
</td>
|
| 169 |
+
<td><ComplexityBar score={item.complexity_score || 0} /></td>
|
| 170 |
+
<td>
|
| 171 |
+
<div style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)', color: 'var(--accent-cyan)' }}>
|
| 172 |
+
{item.model_used?.split('-').slice(-2).join('-') || 'β'}
|
| 173 |
+
</div>
|
| 174 |
+
</td>
|
| 175 |
+
<td>
|
| 176 |
+
<span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)', color: 'var(--accent-green)' }}>
|
| 177 |
+
${(item.estimated_cost || 0).toFixed(6)}
|
| 178 |
+
</span>
|
| 179 |
+
</td>
|
| 180 |
+
<td>
|
| 181 |
+
<span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)' }}>
|
| 182 |
+
{item.latency_ms?.toFixed(0) || 'β'}ms
|
| 183 |
+
</span>
|
| 184 |
+
</td>
|
| 185 |
+
<td>
|
| 186 |
+
<span className={`badge badge-${item.complexity_tier === 'easy' ? 'green' : item.complexity_tier === 'hard' ? 'red' : 'amber'}`}>
|
| 187 |
+
{item.complexity_tier || 'std'}
|
| 188 |
+
</span>
|
| 189 |
+
</td>
|
| 190 |
+
</tr>
|
| 191 |
+
{expanded === item.id && (
|
| 192 |
+
<tr key={`${item.id}-exp`}>
|
| 193 |
+
<td colSpan={7} style={{ padding: 0 }}>
|
| 194 |
+
<div style={{
|
| 195 |
+
padding: 'var(--sp-4) var(--sp-5)',
|
| 196 |
+
background: 'var(--bg-base)',
|
| 197 |
+
borderTop: '1px solid var(--bg-border)',
|
| 198 |
+
fontFamily: 'JetBrains Mono, monospace',
|
| 199 |
+
fontSize: 'var(--text-xs)',
|
| 200 |
+
lineHeight: 1.8,
|
| 201 |
+
}}>
|
| 202 |
+
<div style={{ color: 'var(--accent-cyan)' }}>{'>'} Full query:</div>
|
| 203 |
+
<div style={{ color: 'var(--text-secondary)', margin: '4px 0 12px', whiteSpace: 'pre-wrap' }}>{item.query}</div>
|
| 204 |
+
<div style={{ color: 'var(--accent-cyan)' }}>{'>'} Response snippet:</div>
|
| 205 |
+
<div style={{ color: 'var(--text-secondary)', margin: '4px 0', whiteSpace: 'pre-wrap' }}>
|
| 206 |
+
{(item.response || '').slice(0, 400)}{item.response?.length > 400 ? '...' : ''}
|
| 207 |
+
</div>
|
| 208 |
+
<div style={{ display: 'flex', gap: 'var(--sp-6)', marginTop: 12, color: 'var(--text-muted)' }}>
|
| 209 |
+
<span>Tokens in: {item.input_tokens}</span>
|
| 210 |
+
<span>Tokens out: {item.output_tokens}</span>
|
| 211 |
+
<span>Saved: {item.tokens_saved}</span>
|
| 212 |
+
<span>Cost saved: ${(item.cost_saved || 0).toFixed(6)}</span>
|
| 213 |
+
</div>
|
| 214 |
+
</div>
|
| 215 |
+
</td>
|
| 216 |
+
</tr>
|
| 217 |
+
)}
|
| 218 |
+
</>
|
| 219 |
+
))}
|
| 220 |
+
</tbody>
|
| 221 |
+
</table>
|
| 222 |
+
</div>
|
| 223 |
+
);
|
| 224 |
+
}
|
| 225 |
+
|
| 226 |
+
// βββ Analytics Page βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 227 |
+
|
| 228 |
+
export default function Analytics() {
|
| 229 |
+
const [stats, setStats] = useState<DashboardStats | null>(null);
|
| 230 |
+
const [history, setHistory] = useState<HistoryItem[]>([]);
|
| 231 |
+
const [loading, setLoading] = useState(true);
|
| 232 |
+
const [error, setError] = useState('');
|
| 233 |
+
const [chartRange, setChartRange] = useState<7 | 30>(7);
|
| 234 |
+
|
| 235 |
+
const costData = generateDailyData(chartRange, 0.05, 0.08);
|
| 236 |
+
const modelData = stats
|
| 237 |
+
? Object.entries(stats.distribution).map(([name, pct]) => ({
|
| 238 |
+
name: name.charAt(0).toUpperCase() + name.slice(1),
|
| 239 |
+
value: pct,
|
| 240 |
+
fill: PROVIDER_COLORS[name] || PROVIDER_COLORS.other,
|
| 241 |
+
}))
|
| 242 |
+
: [];
|
| 243 |
+
|
| 244 |
+
useEffect(() => {
|
| 245 |
+
const load = async () => {
|
| 246 |
+
try {
|
| 247 |
+
const [s, h] = await Promise.all([
|
| 248 |
+
api.getDashboardStats(),
|
| 249 |
+
api.getHistory(),
|
| 250 |
+
]);
|
| 251 |
+
setStats(s);
|
| 252 |
+
setHistory(h);
|
| 253 |
+
} catch (e: any) {
|
| 254 |
+
setError(e.message || 'Failed to load analytics');
|
| 255 |
+
} finally {
|
| 256 |
+
setLoading(false);
|
| 257 |
+
}
|
| 258 |
+
};
|
| 259 |
+
load();
|
| 260 |
+
}, []);
|
| 261 |
+
|
| 262 |
+
if (loading) {
|
| 263 |
+
return (
|
| 264 |
+
<div style={{ display: 'flex', alignItems: 'center', justifyContent: 'center', height: '100%' }}>
|
| 265 |
+
<span className="spinner" style={{ width: 32, height: 32, borderWidth: 3 }} />
|
| 266 |
+
</div>
|
| 267 |
+
);
|
| 268 |
+
}
|
| 269 |
+
|
| 270 |
+
return (
|
| 271 |
+
<div style={{ display: 'flex', flexDirection: 'column', flex: 1, minHeight: 0, overflow: 'hidden' }}>
|
| 272 |
+
<div className="topbar">
|
| 273 |
+
<div className="topbar-breadcrumb">
|
| 274 |
+
<BarChart3 size={14} style={{ color: 'var(--accent-cyan)' }} />
|
| 275 |
+
<strong>Analytics</strong>
|
| 276 |
+
<span style={{ color: 'var(--text-muted)' }}>/ Observability Dashboard</span>
|
| 277 |
+
</div>
|
| 278 |
+
</div>
|
| 279 |
+
|
| 280 |
+
<div className="page-content" style={{ display: 'flex', flexDirection: 'column', gap: 'var(--sp-5)', flex: 1, overflowY: 'auto' }}>
|
| 281 |
+
{error && <div className="auth-error">β {error}</div>}
|
| 282 |
+
|
| 283 |
+
{/* KPI Row */}
|
| 284 |
+
<div className="grid-4">
|
| 285 |
+
<KPICard label="Total Saved" value={stats?.routing_savings || '$0.00'} delta="from routing" deltaPositive color="green" delay={0} />
|
| 286 |
+
<KPICard label="Queries Run" value={String(stats?.prompts_improved || 0)} color="cyan" delay={0.08} />
|
| 287 |
+
<KPICard label="Avg Quality Boost" value={stats?.avg_boost || '0%'} delta="complexity-adjusted" deltaPositive color="purple" delay={0.16} />
|
| 288 |
+
<KPICard label="Tokens Saved" value={stats?.tokens_saved || '0'} delta="vs uncompressed" deltaPositive color="amber" delay={0.24} />
|
| 289 |
+
</div>
|
| 290 |
+
|
| 291 |
+
{/* Cost Over Time + Model Distribution */}
|
| 292 |
+
<div className="grid-2">
|
| 293 |
+
<div className="card">
|
| 294 |
+
<div className="card-header">
|
| 295 |
+
<div>
|
| 296 |
+
<div className="card-title">Cost Over Time</div>
|
| 297 |
+
<div className="card-subtitle">Actual vs GPT-4o baseline (USD)</div>
|
| 298 |
+
</div>
|
| 299 |
+
<div style={{ display: 'flex', gap: 'var(--sp-2)' }}>
|
| 300 |
+
{([7, 30] as const).map((d) => (
|
| 301 |
+
<button
|
| 302 |
+
key={d}
|
| 303 |
+
className={`btn btn-ghost btn-sm${chartRange === d ? ' active' : ''}`}
|
| 304 |
+
style={chartRange === d ? { borderColor: 'var(--accent-cyan)', color: 'var(--accent-cyan)' } : {}}
|
| 305 |
+
onClick={() => setChartRange(d)}
|
| 306 |
+
>
|
| 307 |
+
{d}d
|
| 308 |
+
</button>
|
| 309 |
+
))}
|
| 310 |
+
</div>
|
| 311 |
+
</div>
|
| 312 |
+
<div className="card-body">
|
| 313 |
+
<ResponsiveContainer width="100%" height={200}>
|
| 314 |
+
<AreaChart data={costData} margin={{ top: 5, right: 5, bottom: 5, left: 5 }}>
|
| 315 |
+
<defs>
|
| 316 |
+
<linearGradient id="costGrad" x1="0" y1="0" x2="0" y2="1">
|
| 317 |
+
<stop offset="5%" stopColor="#00E5FF" stopOpacity={0.3} />
|
| 318 |
+
<stop offset="95%" stopColor="#00E5FF" stopOpacity={0} />
|
| 319 |
+
</linearGradient>
|
| 320 |
+
<linearGradient id="baseGrad" x1="0" y1="0" x2="0" y2="1">
|
| 321 |
+
<stop offset="5%" stopColor="#FF3D57" stopOpacity={0.1} />
|
| 322 |
+
<stop offset="95%" stopColor="#FF3D57" stopOpacity={0} />
|
| 323 |
+
</linearGradient>
|
| 324 |
+
</defs>
|
| 325 |
+
<CartesianGrid strokeDasharray="3 3" stroke="var(--bg-border)" />
|
| 326 |
+
<XAxis dataKey="date" tick={{ fill: 'var(--text-muted)', fontSize: 11 }} />
|
| 327 |
+
<YAxis tick={{ fill: 'var(--text-muted)', fontSize: 11 }} tickFormatter={(v) => `$${v.toFixed(2)}`} />
|
| 328 |
+
<Tooltip content={<CustomTooltip />} />
|
| 329 |
+
<Legend wrapperStyle={{ fontSize: 12, color: 'var(--text-secondary)' }} />
|
| 330 |
+
<Area type="monotone" dataKey="baseline" name="GPT-4o Baseline" stroke="#FF3D57" strokeDasharray="5 5" fill="url(#baseGrad)" strokeWidth={2} />
|
| 331 |
+
<Area type="monotone" dataKey="actual" name="LLMOpt Actual" stroke="#00E5FF" fill="url(#costGrad)" strokeWidth={2} dot={{ fill: '#00E5FF', r: 3 }} />
|
| 332 |
+
</AreaChart>
|
| 333 |
+
</ResponsiveContainer>
|
| 334 |
+
</div>
|
| 335 |
+
</div>
|
| 336 |
+
|
| 337 |
+
<div className="card">
|
| 338 |
+
<div className="card-header">
|
| 339 |
+
<div>
|
| 340 |
+
<div className="card-title">Model Usage</div>
|
| 341 |
+
<div className="card-subtitle">Distribution by provider (%)</div>
|
| 342 |
+
</div>
|
| 343 |
+
</div>
|
| 344 |
+
<div className="card-body">
|
| 345 |
+
{modelData.length > 0 ? (
|
| 346 |
+
<ResponsiveContainer width="100%" height={200}>
|
| 347 |
+
<BarChart data={modelData} layout="vertical" margin={{ top: 5, right: 20, bottom: 5, left: 60 }}>
|
| 348 |
+
<CartesianGrid strokeDasharray="3 3" stroke="var(--bg-border)" horizontal={false} />
|
| 349 |
+
<XAxis type="number" tick={{ fill: 'var(--text-muted)', fontSize: 11 }} tickFormatter={(v) => `${v}%`} />
|
| 350 |
+
<YAxis type="category" dataKey="name" tick={{ fill: 'var(--text-secondary)', fontSize: 12, fontFamily: 'JetBrains Mono' }} />
|
| 351 |
+
<Tooltip
|
| 352 |
+
formatter={(v: any) => [`${v}%`, 'Share']}
|
| 353 |
+
contentStyle={{ background: 'var(--bg-elevated)', border: '1px solid var(--bg-border)', borderRadius: 8, fontSize: 12 }}
|
| 354 |
+
/>
|
| 355 |
+
<Bar dataKey="value" radius={[0, 4, 4, 0]}>
|
| 356 |
+
{modelData.map((entry, i) => (
|
| 357 |
+
<Cell key={i} fill={entry.fill} />
|
| 358 |
+
))}
|
| 359 |
+
</Bar>
|
| 360 |
+
</BarChart>
|
| 361 |
+
</ResponsiveContainer>
|
| 362 |
+
) : (
|
| 363 |
+
<div className="empty-state" style={{ padding: 'var(--sp-8)' }}>
|
| 364 |
+
<div className="empty-state-title">No data yet</div>
|
| 365 |
+
<div className="empty-state-desc">Run queries to see model distribution.</div>
|
| 366 |
+
</div>
|
| 367 |
+
)}
|
| 368 |
+
</div>
|
| 369 |
+
</div>
|
| 370 |
+
</div>
|
| 371 |
+
|
| 372 |
+
{/* Query Log */}
|
| 373 |
+
<div className="card">
|
| 374 |
+
<div className="card-header">
|
| 375 |
+
<div>
|
| 376 |
+
<div className="card-title">Query Log</div>
|
| 377 |
+
<div className="card-subtitle">Last 20 requests β click to expand</div>
|
| 378 |
+
</div>
|
| 379 |
+
</div>
|
| 380 |
+
<QueryLogTable items={history} />
|
| 381 |
+
</div>
|
| 382 |
+
</div>
|
| 383 |
+
</div>
|
| 384 |
+
);
|
| 385 |
+
}
|
frontend/src/pages/Login.tsx
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { useState } from 'react';
|
| 2 |
+
import { motion, AnimatePresence } from 'framer-motion';
|
| 3 |
+
import { Zap } from 'lucide-react';
|
| 4 |
+
import { api, setStoredSession } from '../api';
|
| 5 |
+
import { useAppStore } from '../store';
|
| 6 |
+
|
| 7 |
+
type Mode = 'login' | 'register';
|
| 8 |
+
|
| 9 |
+
export default function LoginPage() {
|
| 10 |
+
const { setAuth, setConnectedProviders } = useAppStore();
|
| 11 |
+
const [mode, setMode] = useState<Mode>('login');
|
| 12 |
+
const [email, setEmail] = useState('');
|
| 13 |
+
const [password, setPassword] = useState('');
|
| 14 |
+
const [loading, setLoading] = useState(false);
|
| 15 |
+
const [error, setError] = useState('');
|
| 16 |
+
|
| 17 |
+
const handleSubmit = async (e: React.FormEvent) => {
|
| 18 |
+
e.preventDefault();
|
| 19 |
+
if (!email.trim() || !password.trim()) return;
|
| 20 |
+
setLoading(true);
|
| 21 |
+
setError('');
|
| 22 |
+
|
| 23 |
+
try {
|
| 24 |
+
if (mode === 'register') {
|
| 25 |
+
await api.register(email, password);
|
| 26 |
+
// Auto-login after register
|
| 27 |
+
}
|
| 28 |
+
const data = await api.login(email, password);
|
| 29 |
+
setStoredSession(data.session_id);
|
| 30 |
+
setAuth({ isLoggedIn: true, sessionId: data.session_id });
|
| 31 |
+
|
| 32 |
+
// Fetch connected providers
|
| 33 |
+
try {
|
| 34 |
+
const keys = await api.getKeys();
|
| 35 |
+
setConnectedProviders(keys.connected_providers);
|
| 36 |
+
} catch (_) {}
|
| 37 |
+
} catch (e: any) {
|
| 38 |
+
setError(e.message || 'Authentication failed');
|
| 39 |
+
} finally {
|
| 40 |
+
setLoading(false);
|
| 41 |
+
}
|
| 42 |
+
};
|
| 43 |
+
|
| 44 |
+
const handleGoogleLogin = () => {
|
| 45 |
+
window.location.href = '/auth/login/google';
|
| 46 |
+
};
|
| 47 |
+
|
| 48 |
+
const handleGithubLogin = () => {
|
| 49 |
+
window.location.href = '/auth/login/github';
|
| 50 |
+
};
|
| 51 |
+
|
| 52 |
+
return (
|
| 53 |
+
<div className="auth-page">
|
| 54 |
+
{/* Background */}
|
| 55 |
+
<div className="auth-bg-grid" />
|
| 56 |
+
<div className="auth-bg-glow" />
|
| 57 |
+
<div className="auth-bg-glow-2" />
|
| 58 |
+
|
| 59 |
+
{/* Floating particles */}
|
| 60 |
+
{[...Array(6)].map((_, i) => (
|
| 61 |
+
<motion.div
|
| 62 |
+
key={i}
|
| 63 |
+
style={{
|
| 64 |
+
position: 'absolute',
|
| 65 |
+
width: `${4 + i * 2}px`,
|
| 66 |
+
height: `${4 + i * 2}px`,
|
| 67 |
+
borderRadius: '50%',
|
| 68 |
+
background: i % 2 === 0 ? 'var(--accent-cyan)' : 'var(--accent-purple)',
|
| 69 |
+
opacity: 0.3,
|
| 70 |
+
left: `${15 + i * 14}%`,
|
| 71 |
+
top: `${20 + (i % 3) * 25}%`,
|
| 72 |
+
}}
|
| 73 |
+
animate={{
|
| 74 |
+
y: [0, -20, 0],
|
| 75 |
+
opacity: [0.3, 0.6, 0.3],
|
| 76 |
+
}}
|
| 77 |
+
transition={{
|
| 78 |
+
duration: 3 + i * 0.5,
|
| 79 |
+
repeat: Infinity,
|
| 80 |
+
ease: 'easeInOut',
|
| 81 |
+
delay: i * 0.4,
|
| 82 |
+
}}
|
| 83 |
+
/>
|
| 84 |
+
))}
|
| 85 |
+
|
| 86 |
+
<AnimatePresence mode="wait">
|
| 87 |
+
<motion.div
|
| 88 |
+
key={mode}
|
| 89 |
+
className="auth-card"
|
| 90 |
+
initial={{ opacity: 0, y: 24, scale: 0.97 }}
|
| 91 |
+
animate={{ opacity: 1, y: 0, scale: 1 }}
|
| 92 |
+
exit={{ opacity: 0, y: -16, scale: 0.97 }}
|
| 93 |
+
transition={{ duration: 0.3 }}
|
| 94 |
+
>
|
| 95 |
+
{/* Logo */}
|
| 96 |
+
<div className="auth-logo">
|
| 97 |
+
<div className="auth-logo-icon"><Zap size={28} fill="currentColor" /></div>
|
| 98 |
+
<div className="auth-logo-text">LLM<span>Opt</span></div>
|
| 99 |
+
</div>
|
| 100 |
+
|
| 101 |
+
<div className="auth-title">
|
| 102 |
+
{mode === 'login' ? 'Welcome back' : 'Create account'}
|
| 103 |
+
</div>
|
| 104 |
+
<div className="auth-subtitle">
|
| 105 |
+
{mode === 'login'
|
| 106 |
+
? 'Sign in to your LLMOpt workspace'
|
| 107 |
+
: 'Start optimizing your LLM costs today'}
|
| 108 |
+
</div>
|
| 109 |
+
|
| 110 |
+
{/* OAuth */}
|
| 111 |
+
<div style={{ display: 'flex', gap: 'var(--sp-3)', marginBottom: 'var(--sp-4)' }}>
|
| 112 |
+
<button className="oauth-btn" onClick={handleGoogleLogin}>
|
| 113 |
+
<svg width="18" height="18" viewBox="0 0 24 24">
|
| 114 |
+
<path d="M22.56 12.25c0-.78-.07-1.53-.2-2.25H12v4.26h5.92c-.26 1.37-1.04 2.53-2.21 3.31v2.77h3.57c2.08-1.92 3.28-4.74 3.28-8.09z" fill="#4285F4"/>
|
| 115 |
+
<path d="M12 23c2.97 0 5.46-.98 7.28-2.66l-3.57-2.77c-.98.66-2.23 1.06-3.71 1.06-2.86 0-5.29-1.93-6.16-4.53H2.18v2.84C3.99 20.53 7.7 23 12 23z" fill="#34A853"/>
|
| 116 |
+
<path d="M5.84 14.09c-.22-.66-.35-1.36-.35-2.09s.13-1.43.35-2.09V7.07H2.18C1.43 8.55 1 10.22 1 12s.43 3.45 1.18 4.93l2.85-2.22.81-.62z" fill="#FBBC05"/>
|
| 117 |
+
<path d="M12 5.38c1.62 0 3.06.56 4.21 1.64l3.15-3.15C17.45 2.09 14.97 1 12 1 7.7 1 3.99 3.47 2.18 7.07l3.66 2.84c.87-2.6 3.3-4.53 6.16-4.53z" fill="#EA4335"/>
|
| 118 |
+
</svg>
|
| 119 |
+
Continue with Google
|
| 120 |
+
</button>
|
| 121 |
+
<button className="oauth-btn" onClick={handleGithubLogin}>
|
| 122 |
+
<svg width="18" height="18" viewBox="0 0 24 24" fill="currentColor"><path d="M12 2C6.477 2 2 6.477 2 12c0 4.42 2.865 8.167 6.839 9.49.5.092.682-.217.682-.482 0-.237-.008-.866-.013-1.7-2.782.603-3.369-1.342-3.369-1.342-.454-1.155-1.11-1.462-1.11-1.462-.908-.62.069-.608.069-.608 1.003.07 1.531 1.03 1.531 1.03.892 1.529 2.341 1.087 2.91.832.092-.647.35-1.088.636-1.338-2.22-.253-4.555-1.11-4.555-4.943 0-1.091.39-1.984 1.029-2.683-.103-.253-.446-1.27.098-2.647 0 0 .84-.268 2.75 1.026A9.578 9.578 0 0112 6.836c.85.004 1.705.115 2.504.337 1.909-1.294 2.747-1.026 2.747-1.026.546 1.377.202 2.394.1 2.647.64.699 1.028 1.592 1.028 2.683 0 3.842-2.339 4.687-4.566 4.935.359.309.678.919.678 1.852 0 1.336-.012 2.415-.012 2.743 0 .267.18.578.688.48C19.138 20.163 22 16.418 22 12c0-5.523-4.477-10-10-10z"/></svg>
|
| 123 |
+
GitHub
|
| 124 |
+
</button>
|
| 125 |
+
</div>
|
| 126 |
+
|
| 127 |
+
<div className="auth-divider">
|
| 128 |
+
<div className="auth-divider-line" />
|
| 129 |
+
<div className="auth-divider-text">or</div>
|
| 130 |
+
<div className="auth-divider-line" />
|
| 131 |
+
</div>
|
| 132 |
+
|
| 133 |
+
{/* Form */}
|
| 134 |
+
<form className="auth-form" onSubmit={handleSubmit}>
|
| 135 |
+
<div className="input-group">
|
| 136 |
+
<label className="input-label" htmlFor="auth-email">Email</label>
|
| 137 |
+
<input
|
| 138 |
+
id="auth-email"
|
| 139 |
+
type="email"
|
| 140 |
+
value={email}
|
| 141 |
+
onChange={(e) => setEmail(e.target.value)}
|
| 142 |
+
placeholder="you@company.com"
|
| 143 |
+
autoComplete="email"
|
| 144 |
+
required
|
| 145 |
+
/>
|
| 146 |
+
</div>
|
| 147 |
+
<div className="input-group">
|
| 148 |
+
<label className="input-label" htmlFor="auth-password">Password</label>
|
| 149 |
+
<input
|
| 150 |
+
id="auth-password"
|
| 151 |
+
type="password"
|
| 152 |
+
value={password}
|
| 153 |
+
onChange={(e) => setPassword(e.target.value)}
|
| 154 |
+
placeholder="β’β’β’β’β’β’β’β’"
|
| 155 |
+
autoComplete={mode === 'login' ? 'current-password' : 'new-password'}
|
| 156 |
+
required
|
| 157 |
+
/>
|
| 158 |
+
</div>
|
| 159 |
+
|
| 160 |
+
{error && (
|
| 161 |
+
<motion.div
|
| 162 |
+
className="auth-error"
|
| 163 |
+
initial={{ opacity: 0 }}
|
| 164 |
+
animate={{ opacity: 1 }}
|
| 165 |
+
>
|
| 166 |
+
{error}
|
| 167 |
+
</motion.div>
|
| 168 |
+
)}
|
| 169 |
+
|
| 170 |
+
<button
|
| 171 |
+
id="auth-submit-btn"
|
| 172 |
+
type="submit"
|
| 173 |
+
className="btn btn-primary btn-lg"
|
| 174 |
+
disabled={loading}
|
| 175 |
+
style={{ width: '100%', marginTop: 'var(--sp-2)' }}
|
| 176 |
+
>
|
| 177 |
+
{loading ? (
|
| 178 |
+
<>
|
| 179 |
+
<span className="spinner" />
|
| 180 |
+
{mode === 'login' ? 'Signing in...' : 'Creating account...'}
|
| 181 |
+
</>
|
| 182 |
+
) : (
|
| 183 |
+
mode === 'login' ? 'Sign In' : 'Create Account'
|
| 184 |
+
)}
|
| 185 |
+
</button>
|
| 186 |
+
</form>
|
| 187 |
+
|
| 188 |
+
<div className="auth-footer">
|
| 189 |
+
{mode === 'login' ? (
|
| 190 |
+
<>
|
| 191 |
+
Don't have an account?{' '}
|
| 192 |
+
<span className="auth-link" onClick={() => { setMode('register'); setError(''); }}>
|
| 193 |
+
Sign up
|
| 194 |
+
</span>
|
| 195 |
+
</>
|
| 196 |
+
) : (
|
| 197 |
+
<>
|
| 198 |
+
Already have an account?{' '}
|
| 199 |
+
<span className="auth-link" onClick={() => { setMode('login'); setError(''); }}>
|
| 200 |
+
Sign in
|
| 201 |
+
</span>
|
| 202 |
+
</>
|
| 203 |
+
)}
|
| 204 |
+
</div>
|
| 205 |
+
</motion.div>
|
| 206 |
+
</AnimatePresence>
|
| 207 |
+
</div>
|
| 208 |
+
);
|
| 209 |
+
}
|
frontend/src/pages/ModelRegistry.tsx
ADDED
|
@@ -0,0 +1,354 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { useEffect, useState } from 'react';
|
| 2 |
+
import { motion } from 'framer-motion';
|
| 3 |
+
import { Database, Star, ArrowUpDown } from 'lucide-react';
|
| 4 |
+
import { api } from '../api';
|
| 5 |
+
import type { ModelSpec } from '../types';
|
| 6 |
+
|
| 7 |
+
const PROVIDER_COLORS: Record<string, string> = {
|
| 8 |
+
openai: 'badge-cyan',
|
| 9 |
+
anthropic: 'badge-purple',
|
| 10 |
+
google: 'badge-green',
|
| 11 |
+
ollama: 'badge-amber',
|
| 12 |
+
mistral: 'badge-muted',
|
| 13 |
+
deepseek: 'badge-red',
|
| 14 |
+
cohere: 'badge-muted',
|
| 15 |
+
};
|
| 16 |
+
|
| 17 |
+
function providerBadge(provider: string) {
|
| 18 |
+
const p = provider?.toLowerCase();
|
| 19 |
+
for (const key of Object.keys(PROVIDER_COLORS)) {
|
| 20 |
+
if (p?.includes(key)) return PROVIDER_COLORS[key];
|
| 21 |
+
}
|
| 22 |
+
return 'badge-muted';
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
function CapabilityGauge({ score }: { score: number }) {
|
| 26 |
+
const r = 24;
|
| 27 |
+
const circ = 2 * Math.PI * r;
|
| 28 |
+
const dash = circ * score;
|
| 29 |
+
const color = score >= 0.8 ? 'var(--accent-cyan)' : score >= 0.6 ? 'var(--accent-amber)' : 'var(--accent-red)';
|
| 30 |
+
return (
|
| 31 |
+
<svg width={60} height={60}>
|
| 32 |
+
<circle cx={30} cy={30} r={r} fill="none" stroke="var(--bg-border)" strokeWidth={4} />
|
| 33 |
+
<circle
|
| 34 |
+
cx={30} cy={30} r={r}
|
| 35 |
+
fill="none"
|
| 36 |
+
stroke={color}
|
| 37 |
+
strokeWidth={4}
|
| 38 |
+
strokeDasharray={`${dash} ${circ - dash}`}
|
| 39 |
+
strokeLinecap="round"
|
| 40 |
+
transform="rotate(-90 30 30)"
|
| 41 |
+
style={{ transition: 'stroke-dasharray 0.6s ease' }}
|
| 42 |
+
/>
|
| 43 |
+
<text
|
| 44 |
+
x={30} y={35}
|
| 45 |
+
textAnchor="middle"
|
| 46 |
+
fill="var(--text-primary)"
|
| 47 |
+
style={{
|
| 48 |
+
fontSize: '11px',
|
| 49 |
+
fontFamily: 'JetBrains Mono, monospace',
|
| 50 |
+
fontWeight: 700,
|
| 51 |
+
}}
|
| 52 |
+
>
|
| 53 |
+
{(score * 100).toFixed(0)}
|
| 54 |
+
</text>
|
| 55 |
+
</svg>
|
| 56 |
+
);
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
function ModelCard({ model, index }: { model: ModelSpec; index: number }) {
|
| 60 |
+
// Best value: high capability + low cost
|
| 61 |
+
const isBestValue = model.capability_score > 0.7 && model.input_cost_per_1k < 0.002;
|
| 62 |
+
const isLocalFree = model.provider === 'ollama';
|
| 63 |
+
|
| 64 |
+
return (
|
| 65 |
+
<motion.div
|
| 66 |
+
className="model-card"
|
| 67 |
+
style={isBestValue ? { borderColor: 'var(--accent-cyan)', boxShadow: '0 0 20px rgba(0,229,255,0.08)' } : {}}
|
| 68 |
+
initial={{ opacity: 0, y: 16 }}
|
| 69 |
+
animate={{ opacity: 1, y: 0 }}
|
| 70 |
+
transition={{ delay: index * 0.04, duration: 0.35 }}
|
| 71 |
+
>
|
| 72 |
+
{isBestValue && (
|
| 73 |
+
<div style={{ display: 'flex', alignItems: 'center', gap: 6, marginBottom: 'var(--sp-2)' }}>
|
| 74 |
+
<Star size={12} fill="var(--accent-cyan)" color="var(--accent-cyan)" />
|
| 75 |
+
<span style={{ fontSize: 'var(--text-xs)', color: 'var(--accent-cyan)', fontFamily: 'JetBrains Mono, monospace', fontWeight: 700 }}>
|
| 76 |
+
BEST VALUE
|
| 77 |
+
</span>
|
| 78 |
+
</div>
|
| 79 |
+
)}
|
| 80 |
+
|
| 81 |
+
<div style={{ display: 'flex', alignItems: 'flex-start', justifyContent: 'space-between', gap: 'var(--sp-3)' }}>
|
| 82 |
+
<div style={{ flex: 1 }}>
|
| 83 |
+
<div className="model-card-name">{model.model_name}</div>
|
| 84 |
+
<div style={{ marginTop: 'var(--sp-2)', display: 'flex', gap: 'var(--sp-2)', flexWrap: 'wrap' }}>
|
| 85 |
+
<span className={`badge ${providerBadge(model.provider)}`}>{model.provider}</span>
|
| 86 |
+
{isLocalFree && <span className="badge badge-green">FREE LOCAL</span>}
|
| 87 |
+
</div>
|
| 88 |
+
</div>
|
| 89 |
+
<CapabilityGauge score={model.capability_score} />
|
| 90 |
+
</div>
|
| 91 |
+
|
| 92 |
+
<div className="model-card-pricing">
|
| 93 |
+
<div className="model-card-price-item">
|
| 94 |
+
<div className="model-card-price-label">Input /1k</div>
|
| 95 |
+
<div className="model-card-price-value">
|
| 96 |
+
{isLocalFree ? <span style={{ color: 'var(--accent-green)' }}>FREE</span> : `$${model.input_cost_per_1k.toFixed(5)}`}
|
| 97 |
+
</div>
|
| 98 |
+
</div>
|
| 99 |
+
<div className="model-card-price-item">
|
| 100 |
+
<div className="model-card-price-label">Output /1k</div>
|
| 101 |
+
<div className="model-card-price-value">
|
| 102 |
+
{isLocalFree ? <span style={{ color: 'var(--accent-green)' }}>FREE</span> : `$${model.output_cost_per_1k.toFixed(5)}`}
|
| 103 |
+
</div>
|
| 104 |
+
</div>
|
| 105 |
+
<div className="model-card-price-item">
|
| 106 |
+
<div className="model-card-price-label">Context</div>
|
| 107 |
+
<div className="model-card-price-value" style={{ color: 'var(--accent-purple)' }}>
|
| 108 |
+
{model.context_window >= 1000000
|
| 109 |
+
? `${(model.context_window / 1000000).toFixed(0)}M`
|
| 110 |
+
: `${(model.context_window / 1000).toFixed(0)}k`}
|
| 111 |
+
</div>
|
| 112 |
+
</div>
|
| 113 |
+
</div>
|
| 114 |
+
|
| 115 |
+
{/* Capability breakdown bars */}
|
| 116 |
+
<div style={{ display: 'flex', flexDirection: 'column', gap: 6 }}>
|
| 117 |
+
{[
|
| 118 |
+
{ label: 'Reasoning', value: model.reasoning_score, color: 'var(--accent-cyan)' },
|
| 119 |
+
{ label: 'Coding', value: model.coding_score, color: 'var(--accent-purple)' },
|
| 120 |
+
{ label: 'Math', value: model.math_score, color: 'var(--accent-amber)' },
|
| 121 |
+
].map(({ label, value, color }) => (
|
| 122 |
+
<div key={label} style={{ display: 'flex', alignItems: 'center', gap: 8 }}>
|
| 123 |
+
<span style={{ fontSize: 'var(--text-xs)', color: 'var(--text-muted)', width: 62, flexShrink: 0 }}>{label}</span>
|
| 124 |
+
<div style={{ flex: 1, height: 4, background: 'var(--bg-border)', borderRadius: 2, overflow: 'hidden' }}>
|
| 125 |
+
<div style={{ width: `${value * 100}%`, height: '100%', background: color, borderRadius: 2 }} />
|
| 126 |
+
</div>
|
| 127 |
+
<span style={{ fontSize: 'var(--text-xs)', color, fontFamily: 'JetBrains Mono', width: 28, textAlign: 'right' }}>
|
| 128 |
+
{(value * 100).toFixed(0)}
|
| 129 |
+
</span>
|
| 130 |
+
</div>
|
| 131 |
+
))}
|
| 132 |
+
</div>
|
| 133 |
+
|
| 134 |
+
{model.notes && (
|
| 135 |
+
<div style={{ fontSize: 'var(--text-xs)', color: 'var(--text-secondary)', borderTop: '1px solid var(--bg-border)', paddingTop: 'var(--sp-3)', lineHeight: 1.5 }}>
|
| 136 |
+
{model.notes}
|
| 137 |
+
</div>
|
| 138 |
+
)}
|
| 139 |
+
</motion.div>
|
| 140 |
+
);
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
type SortKey = 'capability_score' | 'input_cost_per_1k' | 'output_cost_per_1k' | 'max_complexity';
|
| 144 |
+
|
| 145 |
+
export default function ModelRegistry() {
|
| 146 |
+
const [models, setModels] = useState<ModelSpec[]>([]);
|
| 147 |
+
const [loading, setLoading] = useState(true);
|
| 148 |
+
const [error, setError] = useState('');
|
| 149 |
+
const [sortKey, setSortKey] = useState<SortKey>('capability_score');
|
| 150 |
+
const [sortAsc, setSortAsc] = useState(false);
|
| 151 |
+
const [view, setView] = useState<'grid' | 'table'>('grid');
|
| 152 |
+
const [filter, setFilter] = useState('');
|
| 153 |
+
|
| 154 |
+
useEffect(() => {
|
| 155 |
+
const load = async () => {
|
| 156 |
+
try {
|
| 157 |
+
const data = await api.models();
|
| 158 |
+
setModels(data.models || []);
|
| 159 |
+
} catch (e: any) {
|
| 160 |
+
setError(e.message || 'Failed to load models');
|
| 161 |
+
} finally {
|
| 162 |
+
setLoading(false);
|
| 163 |
+
}
|
| 164 |
+
};
|
| 165 |
+
load();
|
| 166 |
+
}, []);
|
| 167 |
+
|
| 168 |
+
const filtered = models.filter(
|
| 169 |
+
(m) =>
|
| 170 |
+
m.model_name.toLowerCase().includes(filter.toLowerCase()) ||
|
| 171 |
+
m.provider.toLowerCase().includes(filter.toLowerCase())
|
| 172 |
+
);
|
| 173 |
+
|
| 174 |
+
const sorted = [...filtered].sort((a, b) => {
|
| 175 |
+
const d = a[sortKey] - b[sortKey];
|
| 176 |
+
return sortAsc ? d : -d;
|
| 177 |
+
});
|
| 178 |
+
|
| 179 |
+
const toggleSort = (k: SortKey) => {
|
| 180 |
+
if (sortKey === k) setSortAsc((v) => !v);
|
| 181 |
+
else { setSortKey(k); setSortAsc(false); }
|
| 182 |
+
};
|
| 183 |
+
|
| 184 |
+
if (loading) {
|
| 185 |
+
return (
|
| 186 |
+
<div style={{ display: 'flex', alignItems: 'center', justifyContent: 'center', height: '100%' }}>
|
| 187 |
+
<span className="spinner" style={{ width: 32, height: 32, borderWidth: 3 }} />
|
| 188 |
+
</div>
|
| 189 |
+
);
|
| 190 |
+
}
|
| 191 |
+
|
| 192 |
+
return (
|
| 193 |
+
<div style={{ display: 'flex', flexDirection: 'column', flex: 1, minHeight: 0, overflow: 'hidden' }}>
|
| 194 |
+
<div className="topbar">
|
| 195 |
+
<div className="topbar-breadcrumb">
|
| 196 |
+
<Database size={14} style={{ color: 'var(--accent-cyan)' }} />
|
| 197 |
+
<strong>Model Registry</strong>
|
| 198 |
+
<span style={{ color: 'var(--text-muted)' }}>/ {models.length} models registered</span>
|
| 199 |
+
</div>
|
| 200 |
+
<div className="topbar-actions">
|
| 201 |
+
<input
|
| 202 |
+
type="text"
|
| 203 |
+
value={filter}
|
| 204 |
+
onChange={(e) => setFilter(e.target.value)}
|
| 205 |
+
placeholder="Filter models..."
|
| 206 |
+
style={{ width: 180, padding: '6px 12px', fontSize: 'var(--text-xs)' }}
|
| 207 |
+
/>
|
| 208 |
+
<button
|
| 209 |
+
className="btn btn-ghost btn-sm"
|
| 210 |
+
style={view === 'grid' ? { borderColor: 'var(--accent-cyan)', color: 'var(--accent-cyan)' } : {}}
|
| 211 |
+
onClick={() => setView('grid')}
|
| 212 |
+
>
|
| 213 |
+
Grid
|
| 214 |
+
</button>
|
| 215 |
+
<button
|
| 216 |
+
className="btn btn-ghost btn-sm"
|
| 217 |
+
style={view === 'table' ? { borderColor: 'var(--accent-cyan)', color: 'var(--accent-cyan)' } : {}}
|
| 218 |
+
onClick={() => setView('table')}
|
| 219 |
+
>
|
| 220 |
+
Table
|
| 221 |
+
</button>
|
| 222 |
+
</div>
|
| 223 |
+
</div>
|
| 224 |
+
|
| 225 |
+
<div className="page-content">
|
| 226 |
+
{error && <div className="auth-error mb-4">β {error}</div>}
|
| 227 |
+
|
| 228 |
+
{sorted.length === 0 && !error ? (
|
| 229 |
+
<div className="empty-state">
|
| 230 |
+
<div className="empty-state-icon"><Database size={48} /></div>
|
| 231 |
+
<div className="empty-state-title">No Models Found</div>
|
| 232 |
+
<div className="empty-state-desc">
|
| 233 |
+
{filter ? `No models match "${filter}"` : 'The model registry is empty.'}
|
| 234 |
+
</div>
|
| 235 |
+
</div>
|
| 236 |
+
) : view === 'grid' ? (
|
| 237 |
+
<div className="grid-2">
|
| 238 |
+
{sorted.map((m, i) => <ModelCard key={m.model_name} model={m} index={i} />)}
|
| 239 |
+
</div>
|
| 240 |
+
) : (
|
| 241 |
+
<div className="card">
|
| 242 |
+
<div style={{ overflowX: 'auto' }}>
|
| 243 |
+
<table className="data-table">
|
| 244 |
+
<thead>
|
| 245 |
+
<tr>
|
| 246 |
+
<th>Model</th>
|
| 247 |
+
<th>Provider</th>
|
| 248 |
+
<th
|
| 249 |
+
style={{ cursor: 'pointer', userSelect: 'none' }}
|
| 250 |
+
onClick={() => toggleSort('capability_score')}
|
| 251 |
+
>
|
| 252 |
+
<div style={{ display: 'flex', alignItems: 'center', gap: 4 }}>
|
| 253 |
+
Capability <ArrowUpDown size={12} />
|
| 254 |
+
</div>
|
| 255 |
+
</th>
|
| 256 |
+
<th
|
| 257 |
+
style={{ cursor: 'pointer', userSelect: 'none' }}
|
| 258 |
+
onClick={() => toggleSort('input_cost_per_1k')}
|
| 259 |
+
>
|
| 260 |
+
<div style={{ display: 'flex', alignItems: 'center', gap: 4 }}>
|
| 261 |
+
Input /1k <ArrowUpDown size={12} />
|
| 262 |
+
</div>
|
| 263 |
+
</th>
|
| 264 |
+
<th
|
| 265 |
+
style={{ cursor: 'pointer', userSelect: 'none' }}
|
| 266 |
+
onClick={() => toggleSort('output_cost_per_1k')}
|
| 267 |
+
>
|
| 268 |
+
<div style={{ display: 'flex', alignItems: 'center', gap: 4 }}>
|
| 269 |
+
Output /1k <ArrowUpDown size={12} />
|
| 270 |
+
</div>
|
| 271 |
+
</th>
|
| 272 |
+
<th
|
| 273 |
+
style={{ cursor: 'pointer', userSelect: 'none' }}
|
| 274 |
+
onClick={() => toggleSort('max_complexity')}
|
| 275 |
+
>
|
| 276 |
+
<div style={{ display: 'flex', alignItems: 'center', gap: 4 }}>
|
| 277 |
+
Max Complexity <ArrowUpDown size={12} />
|
| 278 |
+
</div>
|
| 279 |
+
</th>
|
| 280 |
+
<th>Context</th>
|
| 281 |
+
<th>Notes</th>
|
| 282 |
+
</tr>
|
| 283 |
+
</thead>
|
| 284 |
+
<tbody>
|
| 285 |
+
{sorted.map((m) => {
|
| 286 |
+
const isBest = m.capability_score > 0.7 && m.input_cost_per_1k < 0.002;
|
| 287 |
+
return (
|
| 288 |
+
<tr
|
| 289 |
+
key={m.model_name}
|
| 290 |
+
style={isBest ? { borderLeft: '3px solid var(--accent-cyan)' } : {}}
|
| 291 |
+
>
|
| 292 |
+
<td>
|
| 293 |
+
<div style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-sm)', color: 'var(--text-primary)', display: 'flex', alignItems: 'center', gap: 8 }}>
|
| 294 |
+
{isBest && <Star size={11} fill="var(--accent-cyan)" color="var(--accent-cyan)" />}
|
| 295 |
+
{m.model_name}
|
| 296 |
+
</div>
|
| 297 |
+
</td>
|
| 298 |
+
<td>
|
| 299 |
+
<span className={`badge ${providerBadge(m.provider)}`}>{m.provider}</span>
|
| 300 |
+
</td>
|
| 301 |
+
<td>
|
| 302 |
+
<div style={{ display: 'flex', alignItems: 'center', gap: 8 }}>
|
| 303 |
+
<div style={{ width: 60, height: 4, background: 'var(--bg-border)', borderRadius: 2, overflow: 'hidden' }}>
|
| 304 |
+
<div style={{ width: `${m.capability_score * 100}%`, height: '100%', background: 'var(--accent-cyan)', borderRadius: 2 }} />
|
| 305 |
+
</div>
|
| 306 |
+
<span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)', color: 'var(--accent-cyan)' }}>
|
| 307 |
+
{(m.capability_score * 100).toFixed(0)}
|
| 308 |
+
</span>
|
| 309 |
+
</div>
|
| 310 |
+
</td>
|
| 311 |
+
<td>
|
| 312 |
+
<span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)', color: m.input_cost_per_1k === 0 ? 'var(--accent-green)' : 'var(--text-primary)' }}>
|
| 313 |
+
{m.input_cost_per_1k === 0 ? 'FREE' : `$${m.input_cost_per_1k.toFixed(5)}`}
|
| 314 |
+
</span>
|
| 315 |
+
</td>
|
| 316 |
+
<td>
|
| 317 |
+
<span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)', color: m.output_cost_per_1k === 0 ? 'var(--accent-green)' : 'var(--text-primary)' }}>
|
| 318 |
+
{m.output_cost_per_1k === 0 ? 'FREE' : `$${m.output_cost_per_1k.toFixed(5)}`}
|
| 319 |
+
</span>
|
| 320 |
+
</td>
|
| 321 |
+
<td>
|
| 322 |
+
<div style={{ display: 'flex', alignItems: 'center', gap: 6 }}>
|
| 323 |
+
<div style={{ width: 50, height: 4, background: 'var(--bg-border)', borderRadius: 2, overflow: 'hidden' }}>
|
| 324 |
+
<div style={{ width: `${m.max_complexity * 100}%`, height: '100%', background: 'var(--accent-purple)', borderRadius: 2 }} />
|
| 325 |
+
</div>
|
| 326 |
+
<span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)', color: 'var(--accent-purple)' }}>
|
| 327 |
+
{(m.max_complexity * 100).toFixed(0)}
|
| 328 |
+
</span>
|
| 329 |
+
</div>
|
| 330 |
+
</td>
|
| 331 |
+
<td>
|
| 332 |
+
<span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)', color: 'var(--accent-purple)' }}>
|
| 333 |
+
{m.context_window >= 1000000
|
| 334 |
+
? `${(m.context_window / 1000000).toFixed(0)}M`
|
| 335 |
+
: `${(m.context_window / 1000).toFixed(0)}k`}
|
| 336 |
+
</span>
|
| 337 |
+
</td>
|
| 338 |
+
<td style={{ maxWidth: 200 }}>
|
| 339 |
+
<span style={{ fontSize: 'var(--text-xs)', color: 'var(--text-secondary)' }} title={m.notes}>
|
| 340 |
+
{m.notes?.length > 50 ? m.notes.slice(0, 50) + 'β¦' : m.notes}
|
| 341 |
+
</span>
|
| 342 |
+
</td>
|
| 343 |
+
</tr>
|
| 344 |
+
);
|
| 345 |
+
})}
|
| 346 |
+
</tbody>
|
| 347 |
+
</table>
|
| 348 |
+
</div>
|
| 349 |
+
</div>
|
| 350 |
+
)}
|
| 351 |
+
</div>
|
| 352 |
+
</div>
|
| 353 |
+
);
|
| 354 |
+
}
|
frontend/src/pages/Playground.tsx
ADDED
|
@@ -0,0 +1,606 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { useState, useCallback } from 'react';
|
| 2 |
+
import { motion, AnimatePresence } from 'framer-motion';
|
| 3 |
+
import { Play, Zap, Eye, EyeOff, Copy, Check } from 'lucide-react';
|
| 4 |
+
import { useAppStore } from '../store';
|
| 5 |
+
import { api } from '../api';
|
| 6 |
+
import type { GenerateResponse, ExplainResponse, PipelineStage, PipelineStageStatus, ComplexityTier } from '../types';
|
| 7 |
+
import ReactMarkdown from 'react-markdown';
|
| 8 |
+
|
| 9 |
+
// βββ Tier Badge ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 10 |
+
|
| 11 |
+
const TIER_COLORS: Record<ComplexityTier, string> = {
|
| 12 |
+
trivial: '#00ff94',
|
| 13 |
+
easy: '#00e5ff',
|
| 14 |
+
medium: '#ffc700',
|
| 15 |
+
hard: '#ff6b35',
|
| 16 |
+
expert: '#e040fb',
|
| 17 |
+
};
|
| 18 |
+
|
| 19 |
+
function TierBadge({ tier }: { tier: ComplexityTier }) {
|
| 20 |
+
const color = TIER_COLORS[tier] || 'var(--text-muted)';
|
| 21 |
+
return (
|
| 22 |
+
<span style={{
|
| 23 |
+
display: 'inline-block',
|
| 24 |
+
padding: '2px 10px',
|
| 25 |
+
borderRadius: 12,
|
| 26 |
+
border: `1px solid ${color}`,
|
| 27 |
+
color,
|
| 28 |
+
fontSize: 'var(--text-xs)',
|
| 29 |
+
fontFamily: 'JetBrains Mono, monospace',
|
| 30 |
+
fontWeight: 700,
|
| 31 |
+
letterSpacing: 1,
|
| 32 |
+
textTransform: 'uppercase',
|
| 33 |
+
background: `${color}15`,
|
| 34 |
+
}}>
|
| 35 |
+
{tier}
|
| 36 |
+
</span>
|
| 37 |
+
);
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
// βββ Pipeline Visualizer βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 41 |
+
|
| 42 |
+
const PIPELINE_STAGES: { id: string; label: string; icon: string; desc: string }[] = [
|
| 43 |
+
{ id: 'analyze', label: 'Query Analyzer', icon: 'π', desc: 'Detecting domain, complexity signals, token count' },
|
| 44 |
+
{ id: 'estimate', label: 'Complexity Estimator', icon: 'π§ ', desc: 'ML model scoring query complexity C(q) β [0,1]' },
|
| 45 |
+
{ id: 'optimize', label: 'Optimization Engine', icon: 'β‘', desc: 'Selecting optimal model via budget constraints' },
|
| 46 |
+
{ id: 'compress', label: 'Prompt Optimizer', icon: 'π¦', desc: 'Compressing tokens and injecting system prompt' },
|
| 47 |
+
{ id: 'route', label: 'Model Router', icon: 'π', desc: 'Routing request to provider API' },
|
| 48 |
+
];
|
| 49 |
+
|
| 50 |
+
function PipelineVisualizer({
|
| 51 |
+
stages,
|
| 52 |
+
explainData,
|
| 53 |
+
}: {
|
| 54 |
+
stages: PipelineStage[];
|
| 55 |
+
explainData?: ExplainResponse | null;
|
| 56 |
+
}) {
|
| 57 |
+
return (
|
| 58 |
+
<div className="pipeline-wrapper">
|
| 59 |
+
{PIPELINE_STAGES.map((def, i) => {
|
| 60 |
+
const stage = stages.find((s) => s.id === def.id);
|
| 61 |
+
const status: PipelineStageStatus = stage?.status || 'idle';
|
| 62 |
+
|
| 63 |
+
// Extract real detail from explain data
|
| 64 |
+
let detail = def.desc;
|
| 65 |
+
if (explainData) {
|
| 66 |
+
if (def.id === 'analyze') {
|
| 67 |
+
detail = `Domain: ${explainData.features.primary_domain} | Tokens: ${explainData.features.token_count} | Output: ${explainData.features.estimated_output_length}`;
|
| 68 |
+
} else if (def.id === 'estimate') {
|
| 69 |
+
detail = `Score: ${explainData.complexity.score.toFixed(3)} | Tier: ${explainData.complexity.tier} | Reasoning req: ${explainData.complexity.required_reasoning.toFixed(2)}`;
|
| 70 |
+
} else if (def.id === 'optimize') {
|
| 71 |
+
const opt = explainData.optimization;
|
| 72 |
+
detail = `Selected: ${opt.selected_model} | Budget: ${opt.budget_mode} | Compression: ${opt.compression_enabled ? 'yes' : 'no'}`;
|
| 73 |
+
} else if (def.id === 'compress') {
|
| 74 |
+
const p = explainData.optimized_prompt;
|
| 75 |
+
const saved = p.tokens_saved || 0;
|
| 76 |
+
detail = `Tokens before: ${p.tokens_before} β after: ${p.tokens_after} | Saved: ${saved} tokens`;
|
| 77 |
+
} else if (def.id === 'route') {
|
| 78 |
+
const opt = explainData.optimization;
|
| 79 |
+
detail = `Provider: ${opt.provider} | Max tokens: ${opt.estimated_output_tokens} | Style: ${opt.system_prompt_style}`;
|
| 80 |
+
}
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
return (
|
| 84 |
+
<div key={def.id} className={`pipeline-stage ${status}`}>
|
| 85 |
+
<div className="pipeline-stage-icon">
|
| 86 |
+
{status === 'active' ? (
|
| 87 |
+
<span className="spinner" style={{ width: 16, height: 16, borderWidth: 2 }} />
|
| 88 |
+
) : status === 'complete' ? (
|
| 89 |
+
<span style={{ color: 'var(--accent-green)', fontSize: 18 }}>β</span>
|
| 90 |
+
) : status === 'skipped' ? (
|
| 91 |
+
<span style={{ color: 'var(--text-muted)', fontSize: 14 }}>β</span>
|
| 92 |
+
) : status === 'error' ? (
|
| 93 |
+
<span style={{ color: 'var(--accent-red)', fontSize: 18 }}>β</span>
|
| 94 |
+
) : (
|
| 95 |
+
<span style={{ fontSize: 14, opacity: 0.5 }}>{def.icon}</span>
|
| 96 |
+
)}
|
| 97 |
+
</div>
|
| 98 |
+
<div className="pipeline-stage-body">
|
| 99 |
+
<div className="pipeline-stage-label">{def.label}</div>
|
| 100 |
+
<div className="pipeline-stage-detail">{detail}</div>
|
| 101 |
+
</div>
|
| 102 |
+
{i < PIPELINE_STAGES.length - 1 && (
|
| 103 |
+
<div
|
| 104 |
+
className="pipeline-connector"
|
| 105 |
+
style={{
|
| 106 |
+
background: status === 'complete'
|
| 107 |
+
? 'linear-gradient(180deg, var(--accent-cyan) 0%, var(--bg-border) 100%)'
|
| 108 |
+
: 'var(--bg-border)',
|
| 109 |
+
}}
|
| 110 |
+
/>
|
| 111 |
+
)}
|
| 112 |
+
</div>
|
| 113 |
+
);
|
| 114 |
+
})}
|
| 115 |
+
</div>
|
| 116 |
+
);
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
// βββ Rationale Card ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 120 |
+
|
| 121 |
+
function RationaleCard({ explain }: { explain: ExplainResponse }) {
|
| 122 |
+
const { complexity, optimization, optimized_prompt, features } = explain;
|
| 123 |
+
|
| 124 |
+
return (
|
| 125 |
+
<div className="rationale-card">
|
| 126 |
+
<div className="rationale-card-title">
|
| 127 |
+
<Zap size={14} />
|
| 128 |
+
LLMOpt Decision Rationale
|
| 129 |
+
</div>
|
| 130 |
+
|
| 131 |
+
{/* Complexity breakdown */}
|
| 132 |
+
<div className="rationale-section">
|
| 133 |
+
<div className="rationale-label">Complexity Analysis</div>
|
| 134 |
+
<div style={{ display: 'flex', alignItems: 'center', gap: 12, marginBottom: 12 }}>
|
| 135 |
+
<TierBadge tier={complexity.tier} />
|
| 136 |
+
<span style={{ fontFamily: 'JetBrains Mono', fontSize: 'var(--text-sm)', color: 'var(--text-primary)' }}>
|
| 137 |
+
Score: <strong style={{ color: 'var(--accent-cyan)' }}>{complexity.score.toFixed(3)}</strong>
|
| 138 |
+
</span>
|
| 139 |
+
</div>
|
| 140 |
+
<div style={{ display: 'flex', flexDirection: 'column', gap: 8 }}>
|
| 141 |
+
{[
|
| 142 |
+
{ label: 'Reasoning req.', value: complexity.required_reasoning, color: 'var(--accent-cyan)' },
|
| 143 |
+
{ label: 'Coding req.', value: complexity.required_coding, color: 'var(--accent-purple)' },
|
| 144 |
+
{ label: 'Math req.', value: complexity.required_math, color: 'var(--accent-amber)' },
|
| 145 |
+
].map(({ label, value, color }) => (
|
| 146 |
+
<div key={label} style={{ display: 'flex', alignItems: 'center', gap: 8 }}>
|
| 147 |
+
<span style={{ fontSize: 'var(--text-xs)', color: 'var(--text-muted)', width: 100, flexShrink: 0 }}>{label}</span>
|
| 148 |
+
<div style={{ flex: 1, height: 4, background: 'var(--bg-border)', borderRadius: 2 }}>
|
| 149 |
+
<div style={{ width: `${value * 100}%`, height: '100%', background: color, borderRadius: 2, transition: 'width 0.6s ease' }} />
|
| 150 |
+
</div>
|
| 151 |
+
<span style={{ fontSize: 'var(--text-xs)', color, fontFamily: 'JetBrains Mono', width: 32, textAlign: 'right' }}>
|
| 152 |
+
{(value * 100).toFixed(0)}
|
| 153 |
+
</span>
|
| 154 |
+
</div>
|
| 155 |
+
))}
|
| 156 |
+
</div>
|
| 157 |
+
</div>
|
| 158 |
+
|
| 159 |
+
{/* Feature flags */}
|
| 160 |
+
<div className="rationale-section">
|
| 161 |
+
<div className="rationale-label">Detected Features</div>
|
| 162 |
+
<div style={{ display: 'flex', flexWrap: 'wrap', gap: 6 }}>
|
| 163 |
+
{[
|
| 164 |
+
{ key: 'domain_code', label: 'Code' },
|
| 165 |
+
{ key: 'domain_math', label: 'Math' },
|
| 166 |
+
{ key: 'domain_science', label: 'Science' },
|
| 167 |
+
{ key: 'domain_reasoning', label: 'Reasoning' },
|
| 168 |
+
{ key: 'domain_creative', label: 'Creative' },
|
| 169 |
+
{ key: 'multi_step', label: 'Multi-step' },
|
| 170 |
+
{ key: 'requires_comparison', label: 'Comparison' },
|
| 171 |
+
{ key: 'requires_analysis', label: 'Analysis' },
|
| 172 |
+
{ key: 'has_math_notation', label: 'Math notation' },
|
| 173 |
+
{ key: 'has_code_block', label: 'Code block' },
|
| 174 |
+
].filter((f) => features[f.key as keyof typeof features] === true).map((f) => (
|
| 175 |
+
<span key={f.key} className="badge badge-cyan" style={{ fontSize: '10px' }}>
|
| 176 |
+
{f.label}
|
| 177 |
+
</span>
|
| 178 |
+
))}
|
| 179 |
+
{Object.entries(features).filter(([k]) =>
|
| 180 |
+
k.startsWith('domain_') || k.startsWith('requires_') || k.startsWith('has_')
|
| 181 |
+
).every(([, v]) => v !== true) && (
|
| 182 |
+
<span style={{ fontSize: 'var(--text-xs)', color: 'var(--text-muted)' }}>No special features detected</span>
|
| 183 |
+
)}
|
| 184 |
+
</div>
|
| 185 |
+
</div>
|
| 186 |
+
|
| 187 |
+
{/* Routing decision */}
|
| 188 |
+
<div className="rationale-section">
|
| 189 |
+
<div className="rationale-label">Routing Decision</div>
|
| 190 |
+
<div style={{ display: 'grid', gridTemplateColumns: '1fr 1fr', gap: 8 }}>
|
| 191 |
+
{[
|
| 192 |
+
{ label: 'Selected Model', value: optimization.selected_model, mono: true },
|
| 193 |
+
{ label: 'Provider', value: optimization.provider, mono: true },
|
| 194 |
+
{ label: 'Budget Mode', value: optimization.budget_mode, mono: false },
|
| 195 |
+
{ label: 'System Prompt', value: optimization.system_prompt_style, mono: false },
|
| 196 |
+
{ label: 'Compression', value: optimization.compression_enabled ? 'enabled' : 'disabled', mono: false },
|
| 197 |
+
{ label: 'Fallback', value: optimization.fallback_model || 'N/A', mono: true },
|
| 198 |
+
].map(({ label, value, mono }) => (
|
| 199 |
+
<div key={label}>
|
| 200 |
+
<div style={{ fontSize: '10px', color: 'var(--text-muted)', marginBottom: 2 }}>{label}</div>
|
| 201 |
+
<div style={{
|
| 202 |
+
fontFamily: mono ? 'JetBrains Mono, monospace' : 'inherit',
|
| 203 |
+
fontSize: 'var(--text-xs)',
|
| 204 |
+
color: mono ? 'var(--accent-cyan)' : 'var(--text-primary)',
|
| 205 |
+
}}>
|
| 206 |
+
{value}
|
| 207 |
+
</div>
|
| 208 |
+
</div>
|
| 209 |
+
))}
|
| 210 |
+
</div>
|
| 211 |
+
</div>
|
| 212 |
+
|
| 213 |
+
{/* Compression */}
|
| 214 |
+
{optimized_prompt.tokens_saved > 0 && (
|
| 215 |
+
<div className="rationale-section">
|
| 216 |
+
<div className="rationale-label">Prompt Compression</div>
|
| 217 |
+
<div style={{ display: 'flex', alignItems: 'center', gap: 8 }}>
|
| 218 |
+
<span style={{ fontFamily: 'JetBrains Mono', fontSize: 'var(--text-xs)', color: 'var(--text-muted)' }}>
|
| 219 |
+
{optimized_prompt.tokens_before}
|
| 220 |
+
</span>
|
| 221 |
+
<span style={{ color: 'var(--accent-green)', fontSize: 'var(--text-xs)' }}>β</span>
|
| 222 |
+
<span style={{ fontFamily: 'JetBrains Mono', fontSize: 'var(--text-xs)', color: 'var(--accent-green)' }}>
|
| 223 |
+
{optimized_prompt.tokens_after} tokens
|
| 224 |
+
</span>
|
| 225 |
+
<span style={{ marginLeft: 'auto', color: 'var(--accent-amber)', fontFamily: 'JetBrains Mono', fontSize: 'var(--text-xs)' }}>
|
| 226 |
+
-{optimized_prompt.tokens_saved} saved ({(optimized_prompt.compression_ratio * 100).toFixed(1)}%)
|
| 227 |
+
</span>
|
| 228 |
+
</div>
|
| 229 |
+
</div>
|
| 230 |
+
)}
|
| 231 |
+
|
| 232 |
+
{/* Rationale bullets */}
|
| 233 |
+
{optimization.rationale?.length > 0 && (
|
| 234 |
+
<div className="rationale-section">
|
| 235 |
+
<div className="rationale-label">Optimizer Rationale</div>
|
| 236 |
+
<ul style={{ margin: 0, padding: '0 0 0 16px', display: 'flex', flexDirection: 'column', gap: 4 }}>
|
| 237 |
+
{optimization.rationale.map((r, i) => (
|
| 238 |
+
<li key={i} style={{ fontSize: 'var(--text-xs)', color: 'var(--text-secondary)' }}>{r}</li>
|
| 239 |
+
))}
|
| 240 |
+
</ul>
|
| 241 |
+
</div>
|
| 242 |
+
)}
|
| 243 |
+
</div>
|
| 244 |
+
);
|
| 245 |
+
}
|
| 246 |
+
|
| 247 |
+
// βββ Metrics Bar βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 248 |
+
|
| 249 |
+
function MetricsBar({ result }: { result: GenerateResponse }) {
|
| 250 |
+
return (
|
| 251 |
+
<div className="metrics-bar">
|
| 252 |
+
<div className="metric-item">
|
| 253 |
+
<div className="metric-label">Model</div>
|
| 254 |
+
<div className="metric-value" style={{ fontFamily: 'JetBrains Mono', fontSize: 'var(--text-xs)' }}>{result.model_used}</div>
|
| 255 |
+
</div>
|
| 256 |
+
<div className="metric-divider" />
|
| 257 |
+
<div className="metric-item">
|
| 258 |
+
<div className="metric-label">Tokens</div>
|
| 259 |
+
<div className="metric-value">{result.total_tokens.toLocaleString()}</div>
|
| 260 |
+
</div>
|
| 261 |
+
<div className="metric-divider" />
|
| 262 |
+
<div className="metric-item">
|
| 263 |
+
<div className="metric-label">Cost</div>
|
| 264 |
+
<div className="metric-value" style={{ color: 'var(--accent-green)' }}>${result.estimated_cost.toFixed(6)}</div>
|
| 265 |
+
</div>
|
| 266 |
+
<div className="metric-divider" />
|
| 267 |
+
<div className="metric-item">
|
| 268 |
+
<div className="metric-label">Saved</div>
|
| 269 |
+
<div className="metric-value" style={{ color: 'var(--accent-amber)' }}>${result.cost_saved.toFixed(6)}</div>
|
| 270 |
+
</div>
|
| 271 |
+
<div className="metric-divider" />
|
| 272 |
+
<div className="metric-item">
|
| 273 |
+
<div className="metric-label">Latency</div>
|
| 274 |
+
<div className="metric-value">{result.latency_ms.toFixed(0)}ms</div>
|
| 275 |
+
</div>
|
| 276 |
+
<div className="metric-divider" />
|
| 277 |
+
<div className="metric-item">
|
| 278 |
+
<div className="metric-label">Complexity</div>
|
| 279 |
+
<div className="metric-value">
|
| 280 |
+
<TierBadge tier={result.complexity_tier} />
|
| 281 |
+
</div>
|
| 282 |
+
</div>
|
| 283 |
+
{result.tokens_saved > 0 && (
|
| 284 |
+
<>
|
| 285 |
+
<div className="metric-divider" />
|
| 286 |
+
<div className="metric-item">
|
| 287 |
+
<div className="metric-label">Tokens Compressed</div>
|
| 288 |
+
<div className="metric-value" style={{ color: 'var(--accent-purple)' }}>-{result.tokens_saved}</div>
|
| 289 |
+
</div>
|
| 290 |
+
</>
|
| 291 |
+
)}
|
| 292 |
+
</div>
|
| 293 |
+
);
|
| 294 |
+
}
|
| 295 |
+
|
| 296 |
+
// βββ Main Playground Page ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 297 |
+
|
| 298 |
+
export default function Playground() {
|
| 299 |
+
const {
|
| 300 |
+
budgetMode,
|
| 301 |
+
alphaWeight,
|
| 302 |
+
betaWeight,
|
| 303 |
+
gammaWeight,
|
| 304 |
+
compressionEnabled,
|
| 305 |
+
evaluationEnabled,
|
| 306 |
+
connectedProviders,
|
| 307 |
+
} = useAppStore();
|
| 308 |
+
const [query, setQuery] = useState('');
|
| 309 |
+
const [selectedProviders, setSelectedProviders] = useState<string[]>([]);
|
| 310 |
+
const [loading, setLoading] = useState(false);
|
| 311 |
+
const [error, setError] = useState('');
|
| 312 |
+
const [result, setResult] = useState<GenerateResponse | null>(null);
|
| 313 |
+
const [explain, setExplain] = useState<ExplainResponse | null>(null);
|
| 314 |
+
const [showExplain, setShowExplain] = useState(false);
|
| 315 |
+
const [copied, setCopied] = useState(false);
|
| 316 |
+
const [stages, setStages] = useState<PipelineStage[]>([]);
|
| 317 |
+
|
| 318 |
+
const setStageStatus = useCallback((id: string, status: PipelineStageStatus, detail?: string) => {
|
| 319 |
+
setStages((prev) => {
|
| 320 |
+
const existing = prev.find((s) => s.id === id);
|
| 321 |
+
if (existing) {
|
| 322 |
+
return prev.map((s) => s.id === id ? { ...s, status, detail: detail ?? s.detail } : s);
|
| 323 |
+
}
|
| 324 |
+
return [...prev, { id, label: id, icon: '', status, detail }];
|
| 325 |
+
});
|
| 326 |
+
}, []);
|
| 327 |
+
|
| 328 |
+
const resetStages = useCallback(() => {
|
| 329 |
+
setStages(PIPELINE_STAGES.map((s) => ({ id: s.id, label: s.label, icon: s.icon, status: 'idle' as PipelineStageStatus })));
|
| 330 |
+
}, []);
|
| 331 |
+
|
| 332 |
+
// Animate stages based on real pipeline latency
|
| 333 |
+
const runPipelineAnimation = useCallback(async () => {
|
| 334 |
+
const timings = [300, 600, 400, 300, 0]; // ms per stage (route waits for real API)
|
| 335 |
+
const ids = ['analyze', 'estimate', 'optimize', 'compress', 'route'];
|
| 336 |
+
|
| 337 |
+
for (let i = 0; i < ids.length - 1; i++) {
|
| 338 |
+
setStageStatus(ids[i], 'active');
|
| 339 |
+
await new Promise((r) => setTimeout(r, timings[i]));
|
| 340 |
+
setStageStatus(ids[i], 'complete');
|
| 341 |
+
}
|
| 342 |
+
setStageStatus('route', 'active');
|
| 343 |
+
}, [setStageStatus]);
|
| 344 |
+
|
| 345 |
+
const handleSubmit = async () => {
|
| 346 |
+
if (!query.trim() || loading) return;
|
| 347 |
+
setLoading(true);
|
| 348 |
+
setError('');
|
| 349 |
+
setResult(null);
|
| 350 |
+
setExplain(null);
|
| 351 |
+
resetStages();
|
| 352 |
+
|
| 353 |
+
const providerConstraints = selectedProviders.length > 0 ? selectedProviders : undefined;
|
| 354 |
+
|
| 355 |
+
try {
|
| 356 |
+
// 1. Run explain in parallel with pipeline animation to get real routing data
|
| 357 |
+
const explainPromise = api.explain(query, budgetMode, {
|
| 358 |
+
alpha: alphaWeight,
|
| 359 |
+
beta: betaWeight,
|
| 360 |
+
gamma: gammaWeight,
|
| 361 |
+
compression_enabled: compressionEnabled,
|
| 362 |
+
only_providers: providerConstraints,
|
| 363 |
+
});
|
| 364 |
+
|
| 365 |
+
// 2. Start animation
|
| 366 |
+
await runPipelineAnimation();
|
| 367 |
+
|
| 368 |
+
// 3. Get explain data (ready by now or wait a bit more)
|
| 369 |
+
const explainData = await explainPromise;
|
| 370 |
+
setExplain(explainData);
|
| 371 |
+
setShowExplain(false);
|
| 372 |
+
|
| 373 |
+
// 4. Complete route stage after getting result
|
| 374 |
+
setStageStatus('route', 'active');
|
| 375 |
+
|
| 376 |
+
// 5. Generate with real API
|
| 377 |
+
const genResult = await api.generate({
|
| 378 |
+
query,
|
| 379 |
+
budget_mode: budgetMode,
|
| 380 |
+
alpha: alphaWeight,
|
| 381 |
+
beta: betaWeight,
|
| 382 |
+
gamma: gammaWeight,
|
| 383 |
+
compression_enabled: compressionEnabled,
|
| 384 |
+
evaluate: evaluationEnabled,
|
| 385 |
+
only_providers: providerConstraints,
|
| 386 |
+
});
|
| 387 |
+
|
| 388 |
+
setStageStatus('route', 'complete');
|
| 389 |
+
setResult(genResult);
|
| 390 |
+
} catch (e: any) {
|
| 391 |
+
const status = (e as any).status;
|
| 392 |
+
if (status === 401) {
|
| 393 |
+
setError('Please add your API keys in Settings to generate responses.');
|
| 394 |
+
} else if (status === 503) {
|
| 395 |
+
setError('Redis is unavailable β sessions require Redis. Try adding ?session= to the request or run Redis locally.');
|
| 396 |
+
} else {
|
| 397 |
+
setError(e.message || 'Generation failed');
|
| 398 |
+
}
|
| 399 |
+
setStageStatus('route', 'error');
|
| 400 |
+
} finally {
|
| 401 |
+
setLoading(false);
|
| 402 |
+
}
|
| 403 |
+
};
|
| 404 |
+
|
| 405 |
+
const handleCopy = () => {
|
| 406 |
+
if (result?.response) {
|
| 407 |
+
navigator.clipboard.writeText(result.response);
|
| 408 |
+
setCopied(true);
|
| 409 |
+
setTimeout(() => setCopied(false), 2000);
|
| 410 |
+
}
|
| 411 |
+
};
|
| 412 |
+
|
| 413 |
+
const examplePrompts = [
|
| 414 |
+
'Explain quicksort with Python code and time complexity analysis',
|
| 415 |
+
'What is the derivative of xΒ²Β·sin(x)?',
|
| 416 |
+
'Write a haiku about machine learning',
|
| 417 |
+
'Design a distributed rate limiter for 1M RPS',
|
| 418 |
+
'Summarize the French Revolution in 3 bullet points',
|
| 419 |
+
];
|
| 420 |
+
|
| 421 |
+
return (
|
| 422 |
+
<div style={{ display: 'flex', flexDirection: 'column', flex: 1, minHeight: 0, overflow: 'hidden' }}>
|
| 423 |
+
<div className="topbar">
|
| 424 |
+
<div className="topbar-breadcrumb">
|
| 425 |
+
<Zap size={14} style={{ color: 'var(--accent-cyan)' }} />
|
| 426 |
+
<strong>Playground</strong>
|
| 427 |
+
<span style={{ color: 'var(--text-muted)' }}>/ Query Optimizer</span>
|
| 428 |
+
</div>
|
| 429 |
+
<div className="topbar-actions">
|
| 430 |
+
{connectedProviders.length > 0 && (
|
| 431 |
+
<div style={{ display: 'flex', alignItems: 'center', gap: '6px', marginRight: '16px', borderRight: '1px solid var(--bg-border)', paddingRight: '16px' }}>
|
| 432 |
+
<span style={{ fontSize: 'var(--text-xs)', color: 'var(--text-muted)' }}>Routing Pool:</span>
|
| 433 |
+
{connectedProviders.map((prov) => {
|
| 434 |
+
const isSelected = selectedProviders.includes(prov);
|
| 435 |
+
const isActive = selectedProviders.length === 0 || isSelected;
|
| 436 |
+
return (
|
| 437 |
+
<button
|
| 438 |
+
key={prov}
|
| 439 |
+
className={`btn btn-xs`}
|
| 440 |
+
style={{
|
| 441 |
+
textTransform: 'capitalize',
|
| 442 |
+
fontSize: '10px',
|
| 443 |
+
padding: '2px 8px',
|
| 444 |
+
borderColor: isActive ? 'var(--accent-cyan)' : 'var(--bg-border)',
|
| 445 |
+
color: isActive ? 'var(--accent-cyan)' : 'var(--text-muted)',
|
| 446 |
+
opacity: isActive ? 1 : 0.4,
|
| 447 |
+
transition: 'all 0.2s ease',
|
| 448 |
+
background: 'transparent',
|
| 449 |
+
}}
|
| 450 |
+
onClick={() => {
|
| 451 |
+
setSelectedProviders(prev =>
|
| 452 |
+
prev.includes(prov)
|
| 453 |
+
? prev.filter(p => p !== prov)
|
| 454 |
+
: [...prev, prov]
|
| 455 |
+
);
|
| 456 |
+
}}
|
| 457 |
+
>
|
| 458 |
+
{prov}
|
| 459 |
+
</button>
|
| 460 |
+
);
|
| 461 |
+
})}
|
| 462 |
+
</div>
|
| 463 |
+
)}
|
| 464 |
+
<span style={{ fontSize: 'var(--text-xs)', color: 'var(--text-muted)' }}>Budget:</span>
|
| 465 |
+
{(['cheap', 'balanced', 'quality'] as const).map((m) => (
|
| 466 |
+
<button
|
| 467 |
+
key={m}
|
| 468 |
+
className={`btn btn-ghost btn-sm ${budgetMode === m ? 'active' : ''}`}
|
| 469 |
+
style={budgetMode === m ? { borderColor: 'var(--accent-cyan)', color: 'var(--accent-cyan)' } : {}}
|
| 470 |
+
onClick={() => useAppStore.getState().setBudgetMode(m)}
|
| 471 |
+
>
|
| 472 |
+
{m}
|
| 473 |
+
</button>
|
| 474 |
+
))}
|
| 475 |
+
</div>
|
| 476 |
+
</div>
|
| 477 |
+
|
| 478 |
+
|
| 479 |
+
<div className="page-content" style={{ display: 'flex', gap: 'var(--sp-4)', alignItems: 'flex-start', overflow: 'auto' }}>
|
| 480 |
+
{/* Left panel: Input + Pipeline + Examples */}
|
| 481 |
+
<div style={{ flex: 1, display: 'flex', flexDirection: 'column', gap: 'var(--sp-4)', minWidth: 0 }}>
|
| 482 |
+
{/* Query input */}
|
| 483 |
+
<div className="card">
|
| 484 |
+
<div className="card-header">Query</div>
|
| 485 |
+
<textarea
|
| 486 |
+
id="playground-query-input"
|
| 487 |
+
value={query}
|
| 488 |
+
onChange={(e) => setQuery(e.target.value)}
|
| 489 |
+
onKeyDown={(e) => {
|
| 490 |
+
if (e.key === 'Enter' && (e.ctrlKey || e.metaKey)) handleSubmit();
|
| 491 |
+
}}
|
| 492 |
+
placeholder="Ask anything β LLMOpt will analyze complexity, route to the optimal model, and compress the prompt to save cost..."
|
| 493 |
+
style={{ minHeight: 120, resize: 'vertical', fontFamily: 'inherit' }}
|
| 494 |
+
/>
|
| 495 |
+
<div style={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between', marginTop: 'var(--sp-3)' }}>
|
| 496 |
+
<span style={{ fontSize: 'var(--text-xs)', color: 'var(--text-muted)' }}>
|
| 497 |
+
Ctrl+Enter to run
|
| 498 |
+
</span>
|
| 499 |
+
<button
|
| 500 |
+
id="playground-submit-btn"
|
| 501 |
+
className="btn btn-primary"
|
| 502 |
+
onClick={handleSubmit}
|
| 503 |
+
disabled={loading || !query.trim()}
|
| 504 |
+
>
|
| 505 |
+
{loading ? (
|
| 506 |
+
<><span className="spinner" /> Optimizingβ¦</>
|
| 507 |
+
) : (
|
| 508 |
+
<><Play size={14} /> Run</>
|
| 509 |
+
)}
|
| 510 |
+
</button>
|
| 511 |
+
</div>
|
| 512 |
+
</div>
|
| 513 |
+
|
| 514 |
+
{/* Example prompts */}
|
| 515 |
+
{!result && !loading && (
|
| 516 |
+
<div className="card">
|
| 517 |
+
<div className="card-header">Example Prompts</div>
|
| 518 |
+
<div style={{ display: 'flex', flexDirection: 'column', gap: 'var(--sp-2)' }}>
|
| 519 |
+
{examplePrompts.map((p) => (
|
| 520 |
+
<button
|
| 521 |
+
key={p}
|
| 522 |
+
className="example-prompt-btn"
|
| 523 |
+
onClick={() => setQuery(p)}
|
| 524 |
+
>
|
| 525 |
+
<span className="example-prompt-icon">β</span>
|
| 526 |
+
{p}
|
| 527 |
+
</button>
|
| 528 |
+
))}
|
| 529 |
+
</div>
|
| 530 |
+
</div>
|
| 531 |
+
)}
|
| 532 |
+
|
| 533 |
+
{/* Error */}
|
| 534 |
+
{error && (
|
| 535 |
+
<motion.div
|
| 536 |
+
className="auth-error"
|
| 537 |
+
initial={{ opacity: 0, y: -8 }}
|
| 538 |
+
animate={{ opacity: 1, y: 0 }}
|
| 539 |
+
>
|
| 540 |
+
β {error}
|
| 541 |
+
</motion.div>
|
| 542 |
+
)}
|
| 543 |
+
|
| 544 |
+
{/* Result */}
|
| 545 |
+
<AnimatePresence>
|
| 546 |
+
{result && (
|
| 547 |
+
<motion.div
|
| 548 |
+
className="card"
|
| 549 |
+
initial={{ opacity: 0, y: 12 }}
|
| 550 |
+
animate={{ opacity: 1, y: 0 }}
|
| 551 |
+
exit={{ opacity: 0, y: -8 }}
|
| 552 |
+
>
|
| 553 |
+
<div className="card-header" style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center' }}>
|
| 554 |
+
<span>Response</span>
|
| 555 |
+
<div style={{ display: 'flex', gap: 'var(--sp-2)' }}>
|
| 556 |
+
<button className="btn btn-ghost btn-sm" onClick={() => setShowExplain((v) => !v)}>
|
| 557 |
+
{showExplain ? <EyeOff size={12} /> : <Eye size={12} />}
|
| 558 |
+
{showExplain ? 'Hide Explain' : 'Explain'}
|
| 559 |
+
</button>
|
| 560 |
+
<button className="btn btn-ghost btn-sm" onClick={handleCopy}>
|
| 561 |
+
{copied ? <Check size={12} /> : <Copy size={12} />}
|
| 562 |
+
{copied ? 'Copied!' : 'Copy'}
|
| 563 |
+
</button>
|
| 564 |
+
</div>
|
| 565 |
+
</div>
|
| 566 |
+
<MetricsBar result={result} />
|
| 567 |
+
<div className="response-content">
|
| 568 |
+
<ReactMarkdown>{result.response}</ReactMarkdown>
|
| 569 |
+
</div>
|
| 570 |
+
|
| 571 |
+
{/* Inline explain panel */}
|
| 572 |
+
<AnimatePresence>
|
| 573 |
+
{showExplain && explain && (
|
| 574 |
+
<motion.div
|
| 575 |
+
initial={{ opacity: 0, height: 0 }}
|
| 576 |
+
animate={{ opacity: 1, height: 'auto' }}
|
| 577 |
+
exit={{ opacity: 0, height: 0 }}
|
| 578 |
+
style={{ overflow: 'hidden' }}
|
| 579 |
+
>
|
| 580 |
+
<div style={{ borderTop: '1px solid var(--bg-border)', paddingTop: 'var(--sp-4)', marginTop: 'var(--sp-4)' }}>
|
| 581 |
+
<RationaleCard explain={explain} />
|
| 582 |
+
</div>
|
| 583 |
+
</motion.div>
|
| 584 |
+
)}
|
| 585 |
+
</AnimatePresence>
|
| 586 |
+
</motion.div>
|
| 587 |
+
)}
|
| 588 |
+
</AnimatePresence>
|
| 589 |
+
</div>
|
| 590 |
+
|
| 591 |
+
{/* Right panel: Pipeline */}
|
| 592 |
+
<div style={{ width: 280, flexShrink: 0 }}>
|
| 593 |
+
<div className="card" style={{ position: 'sticky', top: 0 }}>
|
| 594 |
+
<div className="card-header">Optimization Pipeline</div>
|
| 595 |
+
<PipelineVisualizer stages={stages} explainData={explain} />
|
| 596 |
+
{!loading && stages.length === 0 && (
|
| 597 |
+
<div style={{ padding: 'var(--sp-4)', textAlign: 'center', color: 'var(--text-muted)', fontSize: 'var(--text-xs)' }}>
|
| 598 |
+
Run a query to see the pipeline in action
|
| 599 |
+
</div>
|
| 600 |
+
)}
|
| 601 |
+
</div>
|
| 602 |
+
</div>
|
| 603 |
+
</div>
|
| 604 |
+
</div>
|
| 605 |
+
);
|
| 606 |
+
}
|
frontend/src/pages/Settings.tsx
ADDED
|
@@ -0,0 +1,349 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { useState, useEffect } from 'react';
|
| 2 |
+
import { Settings, Eye, EyeOff, CheckCircle } from 'lucide-react';
|
| 3 |
+
import { api } from '../api';
|
| 4 |
+
import { useAppStore } from '../store';
|
| 5 |
+
|
| 6 |
+
interface KeyEntry {
|
| 7 |
+
provider: string;
|
| 8 |
+
label: string;
|
| 9 |
+
placeholder: string;
|
| 10 |
+
connected: boolean;
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
const PROVIDERS: Omit<KeyEntry, 'connected'>[] = [
|
| 14 |
+
{ provider: 'openai', label: 'OpenAI', placeholder: 'sk-...' },
|
| 15 |
+
{ provider: 'anthropic', label: 'Anthropic', placeholder: 'sk-ant-...' },
|
| 16 |
+
{ provider: 'google', label: 'Google AI', placeholder: 'AI...' },
|
| 17 |
+
{ provider: 'deepseek', label: 'DeepSeek', placeholder: 'sk-...' },
|
| 18 |
+
{ provider: 'mistral', label: 'Mistral', placeholder: '...' },
|
| 19 |
+
{ provider: 'cohere', label: 'Cohere', placeholder: 'co-...' },
|
| 20 |
+
{ provider: 'ollama', label: 'Ollama URL', placeholder: 'http://localhost:11434' },
|
| 21 |
+
];
|
| 22 |
+
|
| 23 |
+
function ApiKeyRow({
|
| 24 |
+
entry,
|
| 25 |
+
onSave,
|
| 26 |
+
onDelete,
|
| 27 |
+
}: {
|
| 28 |
+
entry: KeyEntry;
|
| 29 |
+
onSave: (provider: string, key: string) => Promise<void>;
|
| 30 |
+
onDelete: (provider: string) => Promise<void>;
|
| 31 |
+
}) {
|
| 32 |
+
const [value, setValue] = useState('');
|
| 33 |
+
const [visible, setVisible] = useState(false);
|
| 34 |
+
const [saving, setSaving] = useState(false);
|
| 35 |
+
const [saved, setSaved] = useState(false);
|
| 36 |
+
const [deleting, setDeleting] = useState(false);
|
| 37 |
+
const [confirmDelete, setConfirmDelete] = useState(false);
|
| 38 |
+
|
| 39 |
+
const handleSave = async () => {
|
| 40 |
+
if (!value.trim()) return;
|
| 41 |
+
setSaving(true);
|
| 42 |
+
try {
|
| 43 |
+
await onSave(entry.provider, value);
|
| 44 |
+
setSaved(true);
|
| 45 |
+
setValue('');
|
| 46 |
+
setTimeout(() => setSaved(false), 3000);
|
| 47 |
+
} finally {
|
| 48 |
+
setSaving(false);
|
| 49 |
+
}
|
| 50 |
+
};
|
| 51 |
+
|
| 52 |
+
const handleDelete = async () => {
|
| 53 |
+
if (!confirmDelete) {
|
| 54 |
+
setConfirmDelete(true);
|
| 55 |
+
setTimeout(() => setConfirmDelete(false), 3000);
|
| 56 |
+
return;
|
| 57 |
+
}
|
| 58 |
+
setDeleting(true);
|
| 59 |
+
try {
|
| 60 |
+
await onDelete(entry.provider);
|
| 61 |
+
setValue('');
|
| 62 |
+
setConfirmDelete(false);
|
| 63 |
+
} finally {
|
| 64 |
+
setDeleting(false);
|
| 65 |
+
}
|
| 66 |
+
};
|
| 67 |
+
|
| 68 |
+
return (
|
| 69 |
+
<div className="settings-row">
|
| 70 |
+
<div className="settings-row-info">
|
| 71 |
+
<div className="settings-row-label">{entry.label}</div>
|
| 72 |
+
<div className="settings-row-desc">
|
| 73 |
+
{entry.connected ? (
|
| 74 |
+
<span style={{ color: 'var(--accent-green)', display: 'flex', alignItems: 'center', gap: 4 }}>
|
| 75 |
+
<CheckCircle size={12} /> Connected
|
| 76 |
+
</span>
|
| 77 |
+
) : (
|
| 78 |
+
<span style={{ color: 'var(--text-muted)' }}>No key set</span>
|
| 79 |
+
)}
|
| 80 |
+
</div>
|
| 81 |
+
</div>
|
| 82 |
+
<div className="settings-key-input-wrapper">
|
| 83 |
+
<div style={{ position: 'relative', flex: 1 }}>
|
| 84 |
+
<input
|
| 85 |
+
type={visible ? 'text' : 'password'}
|
| 86 |
+
value={value}
|
| 87 |
+
onChange={(e) => setValue(e.target.value)}
|
| 88 |
+
placeholder={entry.placeholder}
|
| 89 |
+
onKeyDown={(e) => e.key === 'Enter' && handleSave()}
|
| 90 |
+
style={{ paddingRight: '40px' }}
|
| 91 |
+
/>
|
| 92 |
+
<button
|
| 93 |
+
onClick={() => setVisible((v) => !v)}
|
| 94 |
+
style={{
|
| 95 |
+
position: 'absolute', right: 10, top: '50%', transform: 'translateY(-50%)',
|
| 96 |
+
background: 'none', border: 'none', cursor: 'pointer',
|
| 97 |
+
color: 'var(--text-muted)', display: 'flex',
|
| 98 |
+
}}
|
| 99 |
+
>
|
| 100 |
+
{visible ? <EyeOff size={14} /> : <Eye size={14} />}
|
| 101 |
+
</button>
|
| 102 |
+
</div>
|
| 103 |
+
<button
|
| 104 |
+
className="btn btn-ghost btn-sm"
|
| 105 |
+
onClick={handleSave}
|
| 106 |
+
disabled={saving || !value.trim()}
|
| 107 |
+
style={saved ? { borderColor: 'var(--accent-green)', color: 'var(--accent-green)' } : {}}
|
| 108 |
+
>
|
| 109 |
+
{saving ? <span className="spinner" /> : saved ? 'β Saved' : 'Save'}
|
| 110 |
+
</button>
|
| 111 |
+
{entry.connected && (
|
| 112 |
+
<button
|
| 113 |
+
className="btn btn-ghost btn-sm"
|
| 114 |
+
onClick={handleDelete}
|
| 115 |
+
disabled={deleting}
|
| 116 |
+
style={{
|
| 117 |
+
borderColor: confirmDelete ? 'var(--accent-red)' : 'var(--bg-border)',
|
| 118 |
+
color: 'var(--accent-red)',
|
| 119 |
+
}}
|
| 120 |
+
>
|
| 121 |
+
{deleting ? <span className="spinner" /> : confirmDelete ? 'Sure?' : 'Clear'}
|
| 122 |
+
</button>
|
| 123 |
+
)}
|
| 124 |
+
</div>
|
| 125 |
+
</div>
|
| 126 |
+
);
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
interface SliderRowProps {
|
| 130 |
+
label: string;
|
| 131 |
+
desc: string;
|
| 132 |
+
value: number;
|
| 133 |
+
min?: number;
|
| 134 |
+
max?: number;
|
| 135 |
+
step?: number;
|
| 136 |
+
onChange: (v: number) => void;
|
| 137 |
+
unit?: string;
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
function SliderRow({ label, desc, value, min = 0, max = 1, step = 0.01, onChange, unit = '' }: SliderRowProps) {
|
| 141 |
+
return (
|
| 142 |
+
<div className="settings-row" style={{ flexDirection: 'column', alignItems: 'flex-start', gap: 'var(--sp-3)' }}>
|
| 143 |
+
<div style={{ display: 'flex', justifyContent: 'space-between', width: '100%', alignItems: 'center' }}>
|
| 144 |
+
<div>
|
| 145 |
+
<div className="settings-row-label">{label}</div>
|
| 146 |
+
<div className="settings-row-desc">{desc}</div>
|
| 147 |
+
</div>
|
| 148 |
+
<span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-sm)', color: 'var(--accent-cyan)', minWidth: 48, textAlign: 'right' }}>
|
| 149 |
+
{value.toFixed(2)}{unit}
|
| 150 |
+
</span>
|
| 151 |
+
</div>
|
| 152 |
+
<input
|
| 153 |
+
type="range"
|
| 154 |
+
min={min}
|
| 155 |
+
max={max}
|
| 156 |
+
step={step}
|
| 157 |
+
value={value}
|
| 158 |
+
onChange={(e) => onChange(parseFloat(e.target.value))}
|
| 159 |
+
style={{ width: '100%' }}
|
| 160 |
+
/>
|
| 161 |
+
</div>
|
| 162 |
+
);
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
function ToggleRow({
|
| 166 |
+
label, desc, value, onChange,
|
| 167 |
+
}: {
|
| 168 |
+
label: string; desc: string; value: boolean; onChange: (v: boolean) => void;
|
| 169 |
+
}) {
|
| 170 |
+
return (
|
| 171 |
+
<div className="settings-row">
|
| 172 |
+
<div className="settings-row-info">
|
| 173 |
+
<div className="settings-row-label">{label}</div>
|
| 174 |
+
<div className="settings-row-desc">{desc}</div>
|
| 175 |
+
</div>
|
| 176 |
+
<label className="toggle-switch">
|
| 177 |
+
<input type="checkbox" checked={value} onChange={(e) => onChange(e.target.checked)} />
|
| 178 |
+
<span className="toggle-slider" />
|
| 179 |
+
</label>
|
| 180 |
+
</div>
|
| 181 |
+
);
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
export default function SettingsPage() {
|
| 185 |
+
const {
|
| 186 |
+
connectedProviders,
|
| 187 |
+
setConnectedProviders,
|
| 188 |
+
alphaWeight,
|
| 189 |
+
setAlphaWeight,
|
| 190 |
+
betaWeight,
|
| 191 |
+
setBetaWeight,
|
| 192 |
+
gammaWeight,
|
| 193 |
+
setGammaWeight,
|
| 194 |
+
compressionEnabled,
|
| 195 |
+
setCompressionEnabled,
|
| 196 |
+
compressionThreshold,
|
| 197 |
+
setCompressionThreshold,
|
| 198 |
+
evaluationEnabled,
|
| 199 |
+
setEvaluationEnabled,
|
| 200 |
+
redisUrl,
|
| 201 |
+
setRedisUrl,
|
| 202 |
+
} = useAppStore();
|
| 203 |
+
|
| 204 |
+
useEffect(() => {
|
| 205 |
+
// Refresh connected providers
|
| 206 |
+
api.getKeys().then((d) => setConnectedProviders(d.connected_providers)).catch(() => {});
|
| 207 |
+
}, []);
|
| 208 |
+
|
| 209 |
+
const handleSaveKey = async (provider: string, key: string) => {
|
| 210 |
+
await api.updateKeys({ [provider]: key });
|
| 211 |
+
const data = await api.getKeys();
|
| 212 |
+
setConnectedProviders(data.connected_providers);
|
| 213 |
+
};
|
| 214 |
+
|
| 215 |
+
const handleDeleteKey = async (provider: string) => {
|
| 216 |
+
await api.deleteKey(provider);
|
| 217 |
+
const data = await api.getKeys();
|
| 218 |
+
setConnectedProviders(data.connected_providers);
|
| 219 |
+
};
|
| 220 |
+
|
| 221 |
+
const providerEntries: KeyEntry[] = PROVIDERS.map((p) => ({
|
| 222 |
+
...p,
|
| 223 |
+
connected: connectedProviders.includes(p.provider),
|
| 224 |
+
}));
|
| 225 |
+
|
| 226 |
+
const totalWeight = alphaWeight + betaWeight + gammaWeight;
|
| 227 |
+
const formulaDisplay = `score = ${alphaWeight.toFixed(2)}Β·cost + ${betaWeight.toFixed(2)}Β·tokens + ${gammaWeight.toFixed(2)}Β·quality`;
|
| 228 |
+
|
| 229 |
+
return (
|
| 230 |
+
<div style={{ display: 'flex', flexDirection: 'column', flex: 1, minHeight: 0, overflow: 'hidden' }}>
|
| 231 |
+
<div className="topbar">
|
| 232 |
+
<div className="topbar-breadcrumb">
|
| 233 |
+
<Settings size={14} style={{ color: 'var(--accent-cyan)' }} />
|
| 234 |
+
<strong>Settings</strong>
|
| 235 |
+
<span style={{ color: 'var(--text-muted)' }}>/ Configuration</span>
|
| 236 |
+
</div>
|
| 237 |
+
</div>
|
| 238 |
+
|
| 239 |
+
<div className="page-content" style={{ display: 'flex', flexDirection: 'column', gap: 'var(--sp-4)', flex: 1, overflowY: 'auto' }}>
|
| 240 |
+
{/* API Keys */}
|
| 241 |
+
<div className="settings-section">
|
| 242 |
+
<div className="settings-section-header">
|
| 243 |
+
<div className="settings-section-title">API Keys</div>
|
| 244 |
+
<div className="settings-section-desc">Provider credentials are encrypted and stored server-side in your session</div>
|
| 245 |
+
</div>
|
| 246 |
+
{providerEntries.map((entry) => (
|
| 247 |
+
<ApiKeyRow key={entry.provider} entry={entry} onSave={handleSaveKey} onDelete={handleDeleteKey} />
|
| 248 |
+
))}
|
| 249 |
+
</div>
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
{/* Budget Weights */}
|
| 253 |
+
<div className="settings-section">
|
| 254 |
+
<div className="settings-section-header">
|
| 255 |
+
<div className="settings-section-title">Budget Optimization Weights</div>
|
| 256 |
+
<div className="settings-section-desc">
|
| 257 |
+
Control how the routing optimizer balances cost, tokens, and quality
|
| 258 |
+
</div>
|
| 259 |
+
</div>
|
| 260 |
+
<div style={{ padding: 'var(--sp-4) var(--sp-5)' }}>
|
| 261 |
+
<div style={{
|
| 262 |
+
background: 'var(--bg-base)',
|
| 263 |
+
border: '1px solid var(--bg-border)',
|
| 264 |
+
borderRadius: 'var(--radius-md)',
|
| 265 |
+
padding: 'var(--sp-3) var(--sp-4)',
|
| 266 |
+
fontFamily: 'Fira Code, monospace',
|
| 267 |
+
fontSize: 'var(--text-sm)',
|
| 268 |
+
color: 'var(--accent-cyan)',
|
| 269 |
+
marginBottom: 'var(--sp-4)',
|
| 270 |
+
}}>
|
| 271 |
+
{formulaDisplay}
|
| 272 |
+
{Math.abs(totalWeight - 1) > 0.01 && (
|
| 273 |
+
<span style={{ color: 'var(--accent-amber)', marginLeft: 12 }}>
|
| 274 |
+
β sum = {totalWeight.toFixed(2)} (should be 1.0)
|
| 275 |
+
</span>
|
| 276 |
+
)}
|
| 277 |
+
</div>
|
| 278 |
+
</div>
|
| 279 |
+
<SliderRow label="Ξ± β Cost Weight" desc="Penalize expensive routes" value={alphaWeight} onChange={setAlphaWeight} />
|
| 280 |
+
<SliderRow label="Ξ² β Token Weight" desc="Penalize high token usage" value={betaWeight} onChange={setBetaWeight} />
|
| 281 |
+
<SliderRow label="Ξ³ β Quality Weight" desc="Reward high-capability models" value={gammaWeight} onChange={setGammaWeight} />
|
| 282 |
+
</div>
|
| 283 |
+
|
| 284 |
+
{/* Compression */}
|
| 285 |
+
<div className="settings-section">
|
| 286 |
+
<div className="settings-section-header">
|
| 287 |
+
<div className="settings-section-title">Prompt Compression</div>
|
| 288 |
+
<div className="settings-section-desc">Automatically compress prompts to reduce token usage and cost</div>
|
| 289 |
+
</div>
|
| 290 |
+
<ToggleRow
|
| 291 |
+
label="Enable Compression"
|
| 292 |
+
desc="Apply LLM-based prompt compression before routing"
|
| 293 |
+
value={compressionEnabled}
|
| 294 |
+
onChange={setCompressionEnabled}
|
| 295 |
+
/>
|
| 296 |
+
{compressionEnabled && (
|
| 297 |
+
<SliderRow
|
| 298 |
+
label="Compression Threshold"
|
| 299 |
+
desc="Minimum compression ratio to apply (lower = more aggressive)"
|
| 300 |
+
value={compressionThreshold}
|
| 301 |
+
min={0.05}
|
| 302 |
+
max={0.5}
|
| 303 |
+
step={0.01}
|
| 304 |
+
onChange={setCompressionThreshold}
|
| 305 |
+
unit=" ratio"
|
| 306 |
+
/>
|
| 307 |
+
)}
|
| 308 |
+
</div>
|
| 309 |
+
|
| 310 |
+
{/* Evaluation */}
|
| 311 |
+
<div className="settings-section">
|
| 312 |
+
<div className="settings-section-header">
|
| 313 |
+
<div className="settings-section-title">LLM-as-Judge Evaluation</div>
|
| 314 |
+
<div className="settings-section-desc">Use a secondary LLM to evaluate response quality (adds cost)</div>
|
| 315 |
+
</div>
|
| 316 |
+
<ToggleRow
|
| 317 |
+
label="Enable Evaluation"
|
| 318 |
+
desc="Score each response using an independent judge model"
|
| 319 |
+
value={evaluationEnabled}
|
| 320 |
+
onChange={setEvaluationEnabled}
|
| 321 |
+
/>
|
| 322 |
+
</div>
|
| 323 |
+
|
| 324 |
+
{/* Infrastructure */}
|
| 325 |
+
<div className="settings-section">
|
| 326 |
+
<div className="settings-section-header">
|
| 327 |
+
<div className="settings-section-title">Infrastructure</div>
|
| 328 |
+
<div className="settings-section-desc">Connection settings for cache and queue</div>
|
| 329 |
+
</div>
|
| 330 |
+
<div className="settings-row">
|
| 331 |
+
<div className="settings-row-info">
|
| 332 |
+
<div className="settings-row-label">Redis URL</div>
|
| 333 |
+
<div className="settings-row-desc">Used for session storage and response caching</div>
|
| 334 |
+
</div>
|
| 335 |
+
<div className="settings-key-input-wrapper">
|
| 336 |
+
<input
|
| 337 |
+
type="text"
|
| 338 |
+
value={redisUrl}
|
| 339 |
+
onChange={(e) => setRedisUrl(e.target.value)}
|
| 340 |
+
placeholder="redis://localhost:6379"
|
| 341 |
+
/>
|
| 342 |
+
</div>
|
| 343 |
+
</div>
|
| 344 |
+
</div>
|
| 345 |
+
|
| 346 |
+
</div>
|
| 347 |
+
</div>
|
| 348 |
+
);
|
| 349 |
+
}
|
frontend/src/store.ts
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { create } from 'zustand';
|
| 2 |
+
import type { BudgetMode, GenerateResponse, HealthStatus } from './types';
|
| 3 |
+
|
| 4 |
+
const getLocal = <T>(key: string, def: T): T => {
|
| 5 |
+
const val = localStorage.getItem(key);
|
| 6 |
+
if (val === null) return def;
|
| 7 |
+
try {
|
| 8 |
+
return JSON.parse(val) as T;
|
| 9 |
+
} catch {
|
| 10 |
+
return val as unknown as T;
|
| 11 |
+
}
|
| 12 |
+
};
|
| 13 |
+
|
| 14 |
+
interface AuthState {
|
| 15 |
+
isLoggedIn: boolean;
|
| 16 |
+
sessionId: string | null;
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
interface AppState {
|
| 20 |
+
auth: AuthState;
|
| 21 |
+
health: HealthStatus;
|
| 22 |
+
budgetMode: BudgetMode;
|
| 23 |
+
connectedProviders: string[];
|
| 24 |
+
sidebarCollapsed: boolean;
|
| 25 |
+
setBudgetMode: (mode: BudgetMode) => void;
|
| 26 |
+
setAuth: (auth: Partial<AuthState>) => void;
|
| 27 |
+
setHealth: (health: Partial<HealthStatus>) => void;
|
| 28 |
+
setConnectedProviders: (providers: string[]) => void;
|
| 29 |
+
setSidebarCollapsed: (v: boolean) => void;
|
| 30 |
+
lastResult: GenerateResponse | null;
|
| 31 |
+
setLastResult: (r: GenerateResponse | null) => void;
|
| 32 |
+
|
| 33 |
+
// Settings
|
| 34 |
+
alphaWeight: number;
|
| 35 |
+
betaWeight: number;
|
| 36 |
+
gammaWeight: number;
|
| 37 |
+
compressionEnabled: boolean;
|
| 38 |
+
compressionThreshold: number;
|
| 39 |
+
evaluationEnabled: boolean;
|
| 40 |
+
redisUrl: string;
|
| 41 |
+
setAlphaWeight: (v: number) => void;
|
| 42 |
+
setBetaWeight: (v: number) => void;
|
| 43 |
+
setGammaWeight: (v: number) => void;
|
| 44 |
+
setCompressionEnabled: (v: boolean) => void;
|
| 45 |
+
setCompressionThreshold: (v: number) => void;
|
| 46 |
+
setEvaluationEnabled: (v: boolean) => void;
|
| 47 |
+
setRedisUrl: (v: string) => void;
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
export const useAppStore = create<AppState>((set) => ({
|
| 51 |
+
auth: {
|
| 52 |
+
isLoggedIn: false,
|
| 53 |
+
sessionId: null,
|
| 54 |
+
},
|
| 55 |
+
health: {
|
| 56 |
+
redis: 'unknown',
|
| 57 |
+
ml_deps: 'unknown',
|
| 58 |
+
api: 'ok',
|
| 59 |
+
},
|
| 60 |
+
budgetMode: 'balanced',
|
| 61 |
+
connectedProviders: [],
|
| 62 |
+
sidebarCollapsed: false,
|
| 63 |
+
lastResult: null,
|
| 64 |
+
setBudgetMode: (mode) => set({ budgetMode: mode }),
|
| 65 |
+
setAuth: (auth) => set((s) => ({ auth: { ...s.auth, ...auth } })),
|
| 66 |
+
setHealth: (health) => set((s) => ({ health: { ...s.health, ...health } })),
|
| 67 |
+
setConnectedProviders: (providers) => set({ connectedProviders: providers }),
|
| 68 |
+
setSidebarCollapsed: (v) => set({ sidebarCollapsed: v }),
|
| 69 |
+
setLastResult: (r) => set({ lastResult: r }),
|
| 70 |
+
|
| 71 |
+
// Settings initial state from localStorage
|
| 72 |
+
alphaWeight: getLocal('llmopt_alpha', 0.4),
|
| 73 |
+
betaWeight: getLocal('llmopt_beta', 0.3),
|
| 74 |
+
gammaWeight: getLocal('llmopt_gamma', 0.3),
|
| 75 |
+
compressionEnabled: getLocal('llmopt_compression', true),
|
| 76 |
+
compressionThreshold: getLocal('llmopt_compression_threshold', 0.15),
|
| 77 |
+
evaluationEnabled: getLocal('llmopt_evaluation', false),
|
| 78 |
+
redisUrl: getLocal('llmopt_redis_url', 'redis://localhost:6379'),
|
| 79 |
+
|
| 80 |
+
// Settings setters
|
| 81 |
+
setAlphaWeight: (v) => { localStorage.setItem('llmopt_alpha', JSON.stringify(v)); set({ alphaWeight: v }); },
|
| 82 |
+
setBetaWeight: (v) => { localStorage.setItem('llmopt_beta', JSON.stringify(v)); set({ betaWeight: v }); },
|
| 83 |
+
setGammaWeight: (v) => { localStorage.setItem('llmopt_gamma', JSON.stringify(v)); set({ gammaWeight: v }); },
|
| 84 |
+
setCompressionEnabled: (v) => { localStorage.setItem('llmopt_compression', JSON.stringify(v)); set({ compressionEnabled: v }); },
|
| 85 |
+
setCompressionThreshold: (v) => { localStorage.setItem('llmopt_compression_threshold', JSON.stringify(v)); set({ compressionThreshold: v }); },
|
| 86 |
+
setEvaluationEnabled: (v) => { localStorage.setItem('llmopt_evaluation', JSON.stringify(v)); set({ evaluationEnabled: v }); },
|
| 87 |
+
setRedisUrl: (v) => { localStorage.setItem('llmopt_redis_url', v); set({ redisUrl: v }); },
|
| 88 |
+
}));
|
frontend/src/theme.css
ADDED
|
@@ -0,0 +1,1982 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* ============================================================
|
| 2 |
+
LLMOpt Design System β CSS Custom Properties
|
| 3 |
+
Aesthetic: Dark Industrial Dashboard ("Bloomberg meets cyberpunk")
|
| 4 |
+
============================================================ */
|
| 5 |
+
|
| 6 |
+
/* ---- Google Fonts (loaded in index.html) ---- */
|
| 7 |
+
|
| 8 |
+
:root {
|
| 9 |
+
/* Backgrounds */
|
| 10 |
+
--bg-base: #0A0B0E;
|
| 11 |
+
--bg-surface: #111318;
|
| 12 |
+
--bg-elevated: #1A1D26;
|
| 13 |
+
--bg-border: #252A38;
|
| 14 |
+
|
| 15 |
+
/* Accents */
|
| 16 |
+
--accent-cyan: #00E5FF;
|
| 17 |
+
--accent-green: #00FF94;
|
| 18 |
+
--accent-amber: #FFB300;
|
| 19 |
+
--accent-red: #FF3D57;
|
| 20 |
+
--accent-purple: #7C4DFF;
|
| 21 |
+
|
| 22 |
+
/* Text */
|
| 23 |
+
--text-primary: #E8ECF4;
|
| 24 |
+
--text-secondary: #C5CBE0;
|
| 25 |
+
--text-muted: #9098B0;
|
| 26 |
+
|
| 27 |
+
/* Gradients */
|
| 28 |
+
--gradient-glow: linear-gradient(135deg, #00E5FF22, #7C4DFF11);
|
| 29 |
+
--gradient-card: linear-gradient(145deg, #111318, #1A1D26);
|
| 30 |
+
|
| 31 |
+
/* Typography sizes */
|
| 32 |
+
--text-xs: 11px;
|
| 33 |
+
--text-sm: 13px;
|
| 34 |
+
--text-base: 15px;
|
| 35 |
+
--text-lg: 18px;
|
| 36 |
+
--text-xl: 24px;
|
| 37 |
+
--text-2xl: 32px;
|
| 38 |
+
--text-3xl: 48px;
|
| 39 |
+
|
| 40 |
+
/* Spacing */
|
| 41 |
+
--sp-1: 4px;
|
| 42 |
+
--sp-2: 8px;
|
| 43 |
+
--sp-3: 12px;
|
| 44 |
+
--sp-4: 16px;
|
| 45 |
+
--sp-5: 20px;
|
| 46 |
+
--sp-6: 24px;
|
| 47 |
+
--sp-8: 32px;
|
| 48 |
+
--sp-10: 40px;
|
| 49 |
+
--sp-12: 48px;
|
| 50 |
+
|
| 51 |
+
/* Border radius */
|
| 52 |
+
--radius-sm: 4px;
|
| 53 |
+
--radius-md: 8px;
|
| 54 |
+
--radius-lg: 12px;
|
| 55 |
+
--radius-xl: 16px;
|
| 56 |
+
--radius-full: 9999px;
|
| 57 |
+
|
| 58 |
+
/* Transitions */
|
| 59 |
+
--transition-fast: 150ms ease;
|
| 60 |
+
--transition-normal: 250ms ease;
|
| 61 |
+
--transition-slow: 400ms ease;
|
| 62 |
+
|
| 63 |
+
/* Z-index layers */
|
| 64 |
+
--z-sidebar: 100;
|
| 65 |
+
--z-modal: 200;
|
| 66 |
+
--z-toast: 300;
|
| 67 |
+
|
| 68 |
+
/* Sidebar */
|
| 69 |
+
--sidebar-w: 280px;
|
| 70 |
+
--sidebar-w-collapsed: 56px;
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
/* ============================================================
|
| 74 |
+
Reset & Base
|
| 75 |
+
============================================================ */
|
| 76 |
+
|
| 77 |
+
*, *::before, *::after {
|
| 78 |
+
box-sizing: border-box;
|
| 79 |
+
margin: 0;
|
| 80 |
+
padding: 0;
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
html {
|
| 84 |
+
font-size: 16px;
|
| 85 |
+
scroll-behavior: smooth;
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
body {
|
| 89 |
+
background-color: var(--bg-base);
|
| 90 |
+
color: var(--text-primary);
|
| 91 |
+
font-family: 'DM Sans', -apple-system, BlinkMacSystemFont, sans-serif;
|
| 92 |
+
font-size: var(--text-base);
|
| 93 |
+
line-height: 1.6;
|
| 94 |
+
-webkit-font-smoothing: antialiased;
|
| 95 |
+
-moz-osx-font-smoothing: grayscale;
|
| 96 |
+
overflow-x: hidden;
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
/* ============================================================
|
| 100 |
+
Typography
|
| 101 |
+
============================================================ */
|
| 102 |
+
|
| 103 |
+
h1, h2, h3, h4, h5, h6 {
|
| 104 |
+
font-family: 'JetBrains Mono', monospace;
|
| 105 |
+
line-height: 1.2;
|
| 106 |
+
letter-spacing: -0.02em;
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
code, pre, .mono {
|
| 110 |
+
font-family: 'Fira Code', 'JetBrains Mono', monospace;
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
/* ============================================================
|
| 114 |
+
Scrollbar
|
| 115 |
+
============================================================ */
|
| 116 |
+
|
| 117 |
+
::-webkit-scrollbar { width: 6px; height: 6px; }
|
| 118 |
+
::-webkit-scrollbar-track { background: var(--bg-base); }
|
| 119 |
+
::-webkit-scrollbar-thumb { background: var(--bg-border); border-radius: 3px; }
|
| 120 |
+
::-webkit-scrollbar-thumb:hover { background: var(--text-muted); }
|
| 121 |
+
|
| 122 |
+
/* ============================================================
|
| 123 |
+
Layout
|
| 124 |
+
============================================================ */
|
| 125 |
+
|
| 126 |
+
.app-layout {
|
| 127 |
+
display: flex;
|
| 128 |
+
height: 100vh;
|
| 129 |
+
overflow: hidden;
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
.main-content {
|
| 133 |
+
flex: 1;
|
| 134 |
+
display: flex;
|
| 135 |
+
flex-direction: column;
|
| 136 |
+
overflow: hidden;
|
| 137 |
+
margin-left: var(--sidebar-w);
|
| 138 |
+
transition: margin-left var(--transition-normal);
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
.main-content.sidebar-collapsed {
|
| 142 |
+
margin-left: var(--sidebar-w-collapsed);
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
.page-content {
|
| 146 |
+
flex: 1;
|
| 147 |
+
overflow-y: auto;
|
| 148 |
+
padding: var(--sp-6);
|
| 149 |
+
background: var(--bg-base);
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
+
/* ============================================================
|
| 153 |
+
Sidebar
|
| 154 |
+
============================================================ */
|
| 155 |
+
|
| 156 |
+
.sidebar {
|
| 157 |
+
width: var(--sidebar-w);
|
| 158 |
+
min-width: var(--sidebar-w);
|
| 159 |
+
background: var(--bg-surface);
|
| 160 |
+
border-right: 1px solid var(--bg-border);
|
| 161 |
+
display: flex;
|
| 162 |
+
flex-direction: column;
|
| 163 |
+
position: fixed;
|
| 164 |
+
left: 0;
|
| 165 |
+
top: 0;
|
| 166 |
+
height: 100vh;
|
| 167 |
+
z-index: var(--z-sidebar);
|
| 168 |
+
transition: width var(--transition-normal), min-width var(--transition-normal);
|
| 169 |
+
overflow: hidden;
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
.sidebar.collapsed {
|
| 173 |
+
width: var(--sidebar-w-collapsed);
|
| 174 |
+
min-width: var(--sidebar-w-collapsed);
|
| 175 |
+
}
|
| 176 |
+
|
| 177 |
+
.sidebar-logo {
|
| 178 |
+
display: flex;
|
| 179 |
+
align-items: center;
|
| 180 |
+
gap: var(--sp-3);
|
| 181 |
+
padding: var(--sp-5) var(--sp-5);
|
| 182 |
+
border-bottom: 1px solid var(--bg-border);
|
| 183 |
+
min-height: 64px;
|
| 184 |
+
}
|
| 185 |
+
|
| 186 |
+
.sidebar-logo-icon {
|
| 187 |
+
color: var(--accent-cyan);
|
| 188 |
+
font-size: 22px;
|
| 189 |
+
flex-shrink: 0;
|
| 190 |
+
display: flex;
|
| 191 |
+
align-items: center;
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
.sidebar-logo-text {
|
| 195 |
+
font-family: 'JetBrains Mono', monospace;
|
| 196 |
+
font-size: var(--text-lg);
|
| 197 |
+
font-weight: 700;
|
| 198 |
+
color: var(--text-primary);
|
| 199 |
+
white-space: nowrap;
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
.sidebar-logo-text span {
|
| 203 |
+
color: var(--accent-cyan);
|
| 204 |
+
}
|
| 205 |
+
|
| 206 |
+
.sidebar-nav {
|
| 207 |
+
flex: 1;
|
| 208 |
+
padding: var(--sp-4) var(--sp-3);
|
| 209 |
+
display: flex;
|
| 210 |
+
flex-direction: column;
|
| 211 |
+
gap: var(--sp-1);
|
| 212 |
+
}
|
| 213 |
+
|
| 214 |
+
.sidebar-nav-item {
|
| 215 |
+
display: flex;
|
| 216 |
+
align-items: center;
|
| 217 |
+
gap: var(--sp-3);
|
| 218 |
+
padding: var(--sp-3) var(--sp-3);
|
| 219 |
+
border-radius: var(--radius-md);
|
| 220 |
+
color: var(--text-secondary);
|
| 221 |
+
text-decoration: none;
|
| 222 |
+
font-size: var(--text-sm);
|
| 223 |
+
font-weight: 500;
|
| 224 |
+
transition: background var(--transition-fast), color var(--transition-fast), border-color var(--transition-fast);
|
| 225 |
+
cursor: pointer;
|
| 226 |
+
border: none;
|
| 227 |
+
background: transparent;
|
| 228 |
+
width: 100%;
|
| 229 |
+
text-align: left;
|
| 230 |
+
border-left: 2px solid transparent;
|
| 231 |
+
white-space: nowrap;
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
.sidebar-nav-item:hover {
|
| 235 |
+
background: var(--bg-elevated);
|
| 236 |
+
color: var(--text-primary);
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
.sidebar-nav-item.active {
|
| 240 |
+
background: rgba(0, 229, 255, 0.08);
|
| 241 |
+
color: var(--accent-cyan);
|
| 242 |
+
border-left-color: var(--accent-cyan);
|
| 243 |
+
}
|
| 244 |
+
|
| 245 |
+
.sidebar-nav-icon {
|
| 246 |
+
flex-shrink: 0;
|
| 247 |
+
width: 20px;
|
| 248 |
+
display: flex;
|
| 249 |
+
align-items: center;
|
| 250 |
+
justify-content: center;
|
| 251 |
+
}
|
| 252 |
+
|
| 253 |
+
.sidebar-section-label {
|
| 254 |
+
font-size: var(--text-xs);
|
| 255 |
+
font-weight: 600;
|
| 256 |
+
text-transform: uppercase;
|
| 257 |
+
letter-spacing: 0.1em;
|
| 258 |
+
color: var(--text-muted);
|
| 259 |
+
padding: var(--sp-4) var(--sp-3) var(--sp-2);
|
| 260 |
+
white-space: nowrap;
|
| 261 |
+
}
|
| 262 |
+
|
| 263 |
+
.sidebar-status {
|
| 264 |
+
padding: var(--sp-4);
|
| 265 |
+
border-top: 1px solid var(--bg-border);
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
.sidebar-status-title {
|
| 269 |
+
font-size: var(--text-xs);
|
| 270 |
+
font-weight: 600;
|
| 271 |
+
text-transform: uppercase;
|
| 272 |
+
letter-spacing: 0.1em;
|
| 273 |
+
color: var(--text-muted);
|
| 274 |
+
margin-bottom: var(--sp-3);
|
| 275 |
+
}
|
| 276 |
+
|
| 277 |
+
.sidebar-status-item {
|
| 278 |
+
display: flex;
|
| 279 |
+
align-items: center;
|
| 280 |
+
gap: var(--sp-2);
|
| 281 |
+
padding: var(--sp-1) 0;
|
| 282 |
+
font-size: var(--text-xs);
|
| 283 |
+
font-family: 'JetBrains Mono', monospace;
|
| 284 |
+
color: var(--text-secondary);
|
| 285 |
+
}
|
| 286 |
+
|
| 287 |
+
/* ============================================================
|
| 288 |
+
Status Dot
|
| 289 |
+
============================================================ */
|
| 290 |
+
|
| 291 |
+
.dot {
|
| 292 |
+
width: 8px;
|
| 293 |
+
height: 8px;
|
| 294 |
+
border-radius: 50%;
|
| 295 |
+
flex-shrink: 0;
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
.dot-live {
|
| 299 |
+
background: var(--accent-green);
|
| 300 |
+
box-shadow: 0 0 8px var(--accent-green);
|
| 301 |
+
animation: pulse 2s infinite;
|
| 302 |
+
}
|
| 303 |
+
|
| 304 |
+
.dot-warning {
|
| 305 |
+
background: var(--accent-amber);
|
| 306 |
+
box-shadow: 0 0 8px var(--accent-amber);
|
| 307 |
+
}
|
| 308 |
+
|
| 309 |
+
.dot-error {
|
| 310 |
+
background: var(--accent-red);
|
| 311 |
+
box-shadow: 0 0 8px var(--accent-red);
|
| 312 |
+
}
|
| 313 |
+
|
| 314 |
+
.dot-muted {
|
| 315 |
+
background: var(--text-muted);
|
| 316 |
+
}
|
| 317 |
+
|
| 318 |
+
@keyframes pulse {
|
| 319 |
+
0%, 100% { opacity: 1; transform: scale(1); }
|
| 320 |
+
50% { opacity: 0.7; transform: scale(0.9); }
|
| 321 |
+
}
|
| 322 |
+
|
| 323 |
+
/* ============================================================
|
| 324 |
+
Metric Cards
|
| 325 |
+
============================================================ */
|
| 326 |
+
|
| 327 |
+
.metric-card {
|
| 328 |
+
background: var(--bg-surface);
|
| 329 |
+
border: 1px solid var(--bg-border);
|
| 330 |
+
border-radius: var(--radius-lg);
|
| 331 |
+
padding: var(--sp-5) var(--sp-6);
|
| 332 |
+
position: relative;
|
| 333 |
+
overflow: hidden;
|
| 334 |
+
transition: border-color var(--transition-normal), box-shadow var(--transition-normal);
|
| 335 |
+
}
|
| 336 |
+
|
| 337 |
+
.metric-card:hover {
|
| 338 |
+
border-color: rgba(0, 229, 255, 0.3);
|
| 339 |
+
box-shadow: 0 0 20px rgba(0, 229, 255, 0.05);
|
| 340 |
+
}
|
| 341 |
+
|
| 342 |
+
.metric-card::before {
|
| 343 |
+
content: '';
|
| 344 |
+
position: absolute;
|
| 345 |
+
top: 0; left: 0; right: 0;
|
| 346 |
+
height: 2px;
|
| 347 |
+
}
|
| 348 |
+
|
| 349 |
+
.metric-card.cyan::before { background: var(--accent-cyan); }
|
| 350 |
+
.metric-card.green::before { background: var(--accent-green); }
|
| 351 |
+
.metric-card.amber::before { background: var(--accent-amber); }
|
| 352 |
+
.metric-card.purple::before { background: var(--accent-purple); }
|
| 353 |
+
|
| 354 |
+
.metric-card-label {
|
| 355 |
+
font-size: var(--text-xs);
|
| 356 |
+
font-weight: 600;
|
| 357 |
+
text-transform: uppercase;
|
| 358 |
+
letter-spacing: 0.1em;
|
| 359 |
+
color: var(--text-secondary);
|
| 360 |
+
margin-bottom: var(--sp-2);
|
| 361 |
+
}
|
| 362 |
+
|
| 363 |
+
.metric-card-value {
|
| 364 |
+
font-family: 'JetBrains Mono', monospace;
|
| 365 |
+
font-size: var(--text-2xl);
|
| 366 |
+
font-weight: 700;
|
| 367 |
+
color: var(--text-primary);
|
| 368 |
+
line-height: 1;
|
| 369 |
+
margin-bottom: var(--sp-2);
|
| 370 |
+
}
|
| 371 |
+
|
| 372 |
+
.metric-card-delta {
|
| 373 |
+
font-size: var(--text-xs);
|
| 374 |
+
font-weight: 600;
|
| 375 |
+
display: flex;
|
| 376 |
+
align-items: center;
|
| 377 |
+
gap: 4px;
|
| 378 |
+
}
|
| 379 |
+
|
| 380 |
+
.delta-up { color: var(--accent-green); }
|
| 381 |
+
.delta-down { color: var(--accent-red); }
|
| 382 |
+
.delta-neutral { color: var(--text-secondary); }
|
| 383 |
+
|
| 384 |
+
/* ============================================================
|
| 385 |
+
Buttons
|
| 386 |
+
============================================================ */
|
| 387 |
+
|
| 388 |
+
.btn {
|
| 389 |
+
display: inline-flex;
|
| 390 |
+
align-items: center;
|
| 391 |
+
justify-content: center;
|
| 392 |
+
gap: var(--sp-2);
|
| 393 |
+
padding: var(--sp-2) var(--sp-5);
|
| 394 |
+
border-radius: var(--radius-md);
|
| 395 |
+
font-family: 'JetBrains Mono', monospace;
|
| 396 |
+
font-size: var(--text-sm);
|
| 397 |
+
font-weight: 600;
|
| 398 |
+
letter-spacing: 0.05em;
|
| 399 |
+
cursor: pointer;
|
| 400 |
+
border: none;
|
| 401 |
+
transition: all var(--transition-fast);
|
| 402 |
+
text-transform: uppercase;
|
| 403 |
+
white-space: nowrap;
|
| 404 |
+
}
|
| 405 |
+
|
| 406 |
+
.btn:disabled {
|
| 407 |
+
opacity: 0.4;
|
| 408 |
+
cursor: not-allowed;
|
| 409 |
+
}
|
| 410 |
+
|
| 411 |
+
.btn-primary {
|
| 412 |
+
background: var(--accent-cyan);
|
| 413 |
+
color: #000;
|
| 414 |
+
}
|
| 415 |
+
|
| 416 |
+
.btn-primary:hover:not(:disabled) {
|
| 417 |
+
background: #33EAFF;
|
| 418 |
+
box-shadow: 0 0 20px rgba(0, 229, 255, 0.4);
|
| 419 |
+
transform: scale(1.02);
|
| 420 |
+
}
|
| 421 |
+
|
| 422 |
+
.btn-ghost {
|
| 423 |
+
background: transparent;
|
| 424 |
+
color: var(--text-secondary);
|
| 425 |
+
border: 1px solid var(--bg-border);
|
| 426 |
+
}
|
| 427 |
+
|
| 428 |
+
.btn-ghost:hover:not(:disabled) {
|
| 429 |
+
border-color: var(--accent-cyan);
|
| 430 |
+
color: var(--accent-cyan);
|
| 431 |
+
}
|
| 432 |
+
|
| 433 |
+
.btn-danger {
|
| 434 |
+
background: rgba(255, 61, 87, 0.1);
|
| 435 |
+
color: var(--accent-red);
|
| 436 |
+
border: 1px solid rgba(255, 61, 87, 0.3);
|
| 437 |
+
}
|
| 438 |
+
|
| 439 |
+
.btn-danger:hover:not(:disabled) {
|
| 440 |
+
background: var(--accent-red);
|
| 441 |
+
color: #fff;
|
| 442 |
+
}
|
| 443 |
+
|
| 444 |
+
.btn-sm {
|
| 445 |
+
padding: 6px 12px;
|
| 446 |
+
font-size: var(--text-xs);
|
| 447 |
+
}
|
| 448 |
+
|
| 449 |
+
.btn-lg {
|
| 450 |
+
padding: var(--sp-3) var(--sp-8);
|
| 451 |
+
font-size: var(--text-base);
|
| 452 |
+
}
|
| 453 |
+
|
| 454 |
+
/* ============================================================
|
| 455 |
+
Budget Pills
|
| 456 |
+
============================================================ */
|
| 457 |
+
|
| 458 |
+
.budget-pills {
|
| 459 |
+
display: flex;
|
| 460 |
+
gap: var(--sp-2);
|
| 461 |
+
}
|
| 462 |
+
|
| 463 |
+
.budget-pill {
|
| 464 |
+
padding: 6px 14px;
|
| 465 |
+
border-radius: var(--radius-full);
|
| 466 |
+
font-family: 'JetBrains Mono', monospace;
|
| 467 |
+
font-size: var(--text-xs);
|
| 468 |
+
font-weight: 700;
|
| 469 |
+
letter-spacing: 0.08em;
|
| 470 |
+
cursor: pointer;
|
| 471 |
+
border: 1px solid var(--bg-border);
|
| 472 |
+
background: var(--bg-elevated);
|
| 473 |
+
color: var(--text-secondary);
|
| 474 |
+
transition: all var(--transition-fast);
|
| 475 |
+
text-transform: uppercase;
|
| 476 |
+
}
|
| 477 |
+
|
| 478 |
+
.budget-pill:hover {
|
| 479 |
+
border-color: var(--text-secondary);
|
| 480 |
+
color: var(--text-primary);
|
| 481 |
+
}
|
| 482 |
+
|
| 483 |
+
.budget-pill.cheap.active { background: rgba(0, 255, 148, 0.15); border-color: var(--accent-green); color: var(--accent-green); }
|
| 484 |
+
.budget-pill.balanced.active { background: rgba(255, 179, 0, 0.15); border-color: var(--accent-amber); color: var(--accent-amber); }
|
| 485 |
+
.budget-pill.quality.active { background: rgba(0, 229, 255, 0.15); border-color: var(--accent-cyan); color: var(--accent-cyan); }
|
| 486 |
+
|
| 487 |
+
/* ============================================================
|
| 488 |
+
Form elements
|
| 489 |
+
============================================================ */
|
| 490 |
+
|
| 491 |
+
.input-group {
|
| 492 |
+
display: flex;
|
| 493 |
+
flex-direction: column;
|
| 494 |
+
gap: var(--sp-2);
|
| 495 |
+
}
|
| 496 |
+
|
| 497 |
+
.input-label {
|
| 498 |
+
font-size: var(--text-xs);
|
| 499 |
+
font-weight: 600;
|
| 500 |
+
text-transform: uppercase;
|
| 501 |
+
letter-spacing: 0.08em;
|
| 502 |
+
color: var(--text-secondary);
|
| 503 |
+
}
|
| 504 |
+
|
| 505 |
+
input[type="text"],
|
| 506 |
+
input[type="email"],
|
| 507 |
+
input[type="password"],
|
| 508 |
+
input[type="url"],
|
| 509 |
+
input[type="number"],
|
| 510 |
+
textarea,
|
| 511 |
+
select {
|
| 512 |
+
width: 100%;
|
| 513 |
+
background: var(--bg-elevated);
|
| 514 |
+
border: 1px solid var(--bg-border);
|
| 515 |
+
border-radius: var(--radius-md);
|
| 516 |
+
color: var(--text-primary);
|
| 517 |
+
font-family: 'JetBrains Mono', monospace;
|
| 518 |
+
font-size: var(--text-sm);
|
| 519 |
+
padding: var(--sp-3) var(--sp-4);
|
| 520 |
+
transition: border-color var(--transition-fast), box-shadow var(--transition-fast);
|
| 521 |
+
outline: none;
|
| 522 |
+
resize: vertical;
|
| 523 |
+
}
|
| 524 |
+
|
| 525 |
+
input[type="text"]:focus,
|
| 526 |
+
input[type="email"]:focus,
|
| 527 |
+
input[type="password"]:focus,
|
| 528 |
+
input[type="url"]:focus,
|
| 529 |
+
input[type="number"]:focus,
|
| 530 |
+
textarea:focus,
|
| 531 |
+
select:focus {
|
| 532 |
+
border-color: var(--accent-cyan);
|
| 533 |
+
box-shadow: 0 0 0 3px rgba(0, 229, 255, 0.1);
|
| 534 |
+
}
|
| 535 |
+
|
| 536 |
+
input::placeholder,
|
| 537 |
+
textarea::placeholder {
|
| 538 |
+
color: var(--text-muted);
|
| 539 |
+
}
|
| 540 |
+
|
| 541 |
+
select {
|
| 542 |
+
cursor: pointer;
|
| 543 |
+
appearance: none;
|
| 544 |
+
background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' fill='none' viewBox='0 0 24 24' stroke='%237A8299'%3E%3Cpath stroke-linecap='round' stroke-linejoin='round' stroke-width='2' d='M19 9l-7 7-7-7'/%3E%3C/svg%3E");
|
| 545 |
+
background-repeat: no-repeat;
|
| 546 |
+
background-position: right 12px center;
|
| 547 |
+
background-size: 16px;
|
| 548 |
+
padding-right: 40px;
|
| 549 |
+
}
|
| 550 |
+
|
| 551 |
+
/* ============================================================
|
| 552 |
+
Toggle Switch
|
| 553 |
+
============================================================ */
|
| 554 |
+
|
| 555 |
+
.toggle-switch {
|
| 556 |
+
position: relative;
|
| 557 |
+
display: inline-block;
|
| 558 |
+
width: 44px;
|
| 559 |
+
height: 24px;
|
| 560 |
+
flex-shrink: 0;
|
| 561 |
+
}
|
| 562 |
+
|
| 563 |
+
.toggle-switch input {
|
| 564 |
+
opacity: 0;
|
| 565 |
+
width: 0;
|
| 566 |
+
height: 0;
|
| 567 |
+
}
|
| 568 |
+
|
| 569 |
+
.toggle-slider {
|
| 570 |
+
position: absolute;
|
| 571 |
+
cursor: pointer;
|
| 572 |
+
top: 0; left: 0; right: 0; bottom: 0;
|
| 573 |
+
background: var(--bg-border);
|
| 574 |
+
border-radius: var(--radius-full);
|
| 575 |
+
transition: var(--transition-fast);
|
| 576 |
+
}
|
| 577 |
+
|
| 578 |
+
.toggle-slider::before {
|
| 579 |
+
content: '';
|
| 580 |
+
position: absolute;
|
| 581 |
+
height: 18px;
|
| 582 |
+
width: 18px;
|
| 583 |
+
left: 3px;
|
| 584 |
+
bottom: 3px;
|
| 585 |
+
background: var(--text-secondary);
|
| 586 |
+
border-radius: 50%;
|
| 587 |
+
transition: var(--transition-fast);
|
| 588 |
+
}
|
| 589 |
+
|
| 590 |
+
.toggle-switch input:checked + .toggle-slider {
|
| 591 |
+
background: rgba(0, 229, 255, 0.2);
|
| 592 |
+
border: 1px solid var(--accent-cyan);
|
| 593 |
+
}
|
| 594 |
+
|
| 595 |
+
.toggle-switch input:checked + .toggle-slider::before {
|
| 596 |
+
transform: translateX(20px);
|
| 597 |
+
background: var(--accent-cyan);
|
| 598 |
+
}
|
| 599 |
+
|
| 600 |
+
/* ============================================================
|
| 601 |
+
Range Slider
|
| 602 |
+
============================================================ */
|
| 603 |
+
|
| 604 |
+
input[type="range"] {
|
| 605 |
+
-webkit-appearance: none;
|
| 606 |
+
appearance: none;
|
| 607 |
+
width: 100%;
|
| 608 |
+
height: 4px;
|
| 609 |
+
background: var(--bg-border);
|
| 610 |
+
border-radius: 2px;
|
| 611 |
+
outline: none;
|
| 612 |
+
padding: 0;
|
| 613 |
+
}
|
| 614 |
+
|
| 615 |
+
input[type="range"]::-webkit-slider-thumb {
|
| 616 |
+
-webkit-appearance: none;
|
| 617 |
+
appearance: none;
|
| 618 |
+
width: 16px;
|
| 619 |
+
height: 16px;
|
| 620 |
+
border-radius: 50%;
|
| 621 |
+
background: var(--accent-cyan);
|
| 622 |
+
cursor: pointer;
|
| 623 |
+
box-shadow: 0 0 8px rgba(0, 229, 255, 0.5);
|
| 624 |
+
}
|
| 625 |
+
|
| 626 |
+
input[type="range"]::-moz-range-thumb {
|
| 627 |
+
width: 16px;
|
| 628 |
+
height: 16px;
|
| 629 |
+
border-radius: 50%;
|
| 630 |
+
background: var(--accent-cyan);
|
| 631 |
+
cursor: pointer;
|
| 632 |
+
border: none;
|
| 633 |
+
}
|
| 634 |
+
|
| 635 |
+
/* ============================================================
|
| 636 |
+
Pipeline Visualizer
|
| 637 |
+
============================================================ */
|
| 638 |
+
|
| 639 |
+
.pipeline-wrapper {
|
| 640 |
+
background: var(--bg-surface);
|
| 641 |
+
border: 1px solid var(--bg-border);
|
| 642 |
+
border-radius: var(--radius-lg);
|
| 643 |
+
padding: var(--sp-5);
|
| 644 |
+
overflow-x: auto;
|
| 645 |
+
}
|
| 646 |
+
|
| 647 |
+
.pipeline-stages {
|
| 648 |
+
display: flex;
|
| 649 |
+
align-items: center;
|
| 650 |
+
gap: 0;
|
| 651 |
+
min-width: max-content;
|
| 652 |
+
padding: var(--sp-2) 0;
|
| 653 |
+
}
|
| 654 |
+
|
| 655 |
+
.pipeline-stage {
|
| 656 |
+
display: flex;
|
| 657 |
+
flex-direction: column;
|
| 658 |
+
align-items: center;
|
| 659 |
+
gap: var(--sp-2);
|
| 660 |
+
min-width: 90px;
|
| 661 |
+
}
|
| 662 |
+
|
| 663 |
+
.pipeline-node {
|
| 664 |
+
display: flex;
|
| 665 |
+
flex-direction: column;
|
| 666 |
+
align-items: center;
|
| 667 |
+
justify-content: center;
|
| 668 |
+
gap: 4px;
|
| 669 |
+
width: 80px;
|
| 670 |
+
height: 64px;
|
| 671 |
+
border-radius: var(--radius-md);
|
| 672 |
+
border: 1px solid var(--bg-border);
|
| 673 |
+
background: var(--bg-elevated);
|
| 674 |
+
color: var(--text-muted);
|
| 675 |
+
font-size: var(--text-xs);
|
| 676 |
+
font-family: 'JetBrains Mono', monospace;
|
| 677 |
+
font-weight: 600;
|
| 678 |
+
text-transform: uppercase;
|
| 679 |
+
letter-spacing: 0.05em;
|
| 680 |
+
transition: all var(--transition-normal);
|
| 681 |
+
position: relative;
|
| 682 |
+
cursor: default;
|
| 683 |
+
text-align: center;
|
| 684 |
+
}
|
| 685 |
+
|
| 686 |
+
.pipeline-node-icon {
|
| 687 |
+
font-size: 18px;
|
| 688 |
+
line-height: 1;
|
| 689 |
+
}
|
| 690 |
+
|
| 691 |
+
.pipeline-node.active {
|
| 692 |
+
border-color: var(--accent-cyan);
|
| 693 |
+
box-shadow: 0 0 20px rgba(0, 229, 255, 0.3);
|
| 694 |
+
color: var(--accent-cyan);
|
| 695 |
+
animation: nodePulse 1s ease-in-out infinite;
|
| 696 |
+
}
|
| 697 |
+
|
| 698 |
+
.pipeline-node.complete {
|
| 699 |
+
border-color: var(--accent-green);
|
| 700 |
+
background: rgba(0, 255, 148, 0.08);
|
| 701 |
+
color: var(--accent-green);
|
| 702 |
+
}
|
| 703 |
+
|
| 704 |
+
.pipeline-node.skipped {
|
| 705 |
+
border-color: var(--accent-amber);
|
| 706 |
+
background: rgba(255, 179, 0, 0.08);
|
| 707 |
+
color: var(--accent-amber);
|
| 708 |
+
}
|
| 709 |
+
|
| 710 |
+
.pipeline-node.error {
|
| 711 |
+
border-color: var(--accent-red);
|
| 712 |
+
background: rgba(255, 61, 87, 0.08);
|
| 713 |
+
color: var(--accent-red);
|
| 714 |
+
}
|
| 715 |
+
|
| 716 |
+
.pipeline-latency {
|
| 717 |
+
font-size: var(--text-xs);
|
| 718 |
+
font-family: 'JetBrains Mono', monospace;
|
| 719 |
+
color: var(--text-muted);
|
| 720 |
+
min-height: 16px;
|
| 721 |
+
}
|
| 722 |
+
|
| 723 |
+
.pipeline-latency.visible {
|
| 724 |
+
color: var(--accent-green);
|
| 725 |
+
}
|
| 726 |
+
|
| 727 |
+
.pipeline-connector {
|
| 728 |
+
flex: 1;
|
| 729 |
+
height: 2px;
|
| 730 |
+
background: var(--bg-border);
|
| 731 |
+
position: relative;
|
| 732 |
+
min-width: 20px;
|
| 733 |
+
overflow: hidden;
|
| 734 |
+
}
|
| 735 |
+
|
| 736 |
+
.pipeline-connector-fill {
|
| 737 |
+
position: absolute;
|
| 738 |
+
left: 0; top: 0; bottom: 0;
|
| 739 |
+
background: var(--accent-cyan);
|
| 740 |
+
transition: width 0.3s ease;
|
| 741 |
+
box-shadow: 0 0 6px var(--accent-cyan);
|
| 742 |
+
}
|
| 743 |
+
|
| 744 |
+
@keyframes nodePulse {
|
| 745 |
+
0%, 100% { box-shadow: 0 0 10px rgba(0, 229, 255, 0.3); }
|
| 746 |
+
50% { box-shadow: 0 0 25px rgba(0, 229, 255, 0.6); }
|
| 747 |
+
}
|
| 748 |
+
|
| 749 |
+
/* ============================================================
|
| 750 |
+
Query Input Area
|
| 751 |
+
============================================================ */
|
| 752 |
+
|
| 753 |
+
.query-box-wrapper {
|
| 754 |
+
background: var(--bg-surface);
|
| 755 |
+
border: 1px solid var(--bg-border);
|
| 756 |
+
border-radius: var(--radius-lg);
|
| 757 |
+
overflow: hidden;
|
| 758 |
+
transition: border-color var(--transition-fast), box-shadow var(--transition-fast);
|
| 759 |
+
}
|
| 760 |
+
|
| 761 |
+
.query-box-wrapper:focus-within {
|
| 762 |
+
border-color: var(--accent-cyan);
|
| 763 |
+
box-shadow: 0 0 0 3px rgba(0, 229, 255, 0.1);
|
| 764 |
+
}
|
| 765 |
+
|
| 766 |
+
.query-textarea {
|
| 767 |
+
width: 100%;
|
| 768 |
+
background: transparent;
|
| 769 |
+
border: none;
|
| 770 |
+
padding: var(--sp-5);
|
| 771 |
+
color: var(--text-primary);
|
| 772 |
+
font-family: 'JetBrains Mono', monospace;
|
| 773 |
+
font-size: var(--text-base);
|
| 774 |
+
resize: none;
|
| 775 |
+
min-height: 140px;
|
| 776 |
+
outline: none;
|
| 777 |
+
line-height: 1.7;
|
| 778 |
+
}
|
| 779 |
+
|
| 780 |
+
.query-toolbar {
|
| 781 |
+
display: flex;
|
| 782 |
+
align-items: center;
|
| 783 |
+
justify-content: space-between;
|
| 784 |
+
padding: var(--sp-3) var(--sp-4);
|
| 785 |
+
border-top: 1px solid var(--bg-border);
|
| 786 |
+
background: var(--bg-elevated);
|
| 787 |
+
}
|
| 788 |
+
|
| 789 |
+
.query-toolbar-left {
|
| 790 |
+
display: flex;
|
| 791 |
+
align-items: center;
|
| 792 |
+
gap: var(--sp-4);
|
| 793 |
+
}
|
| 794 |
+
|
| 795 |
+
.query-token-count {
|
| 796 |
+
font-family: 'JetBrains Mono', monospace;
|
| 797 |
+
font-size: var(--text-xs);
|
| 798 |
+
color: var(--text-muted);
|
| 799 |
+
}
|
| 800 |
+
|
| 801 |
+
/* ============================================================
|
| 802 |
+
Response Panel
|
| 803 |
+
============================================================ */
|
| 804 |
+
|
| 805 |
+
.response-panel {
|
| 806 |
+
background: var(--bg-surface);
|
| 807 |
+
border: 1px solid var(--bg-border);
|
| 808 |
+
border-radius: var(--radius-lg);
|
| 809 |
+
overflow: hidden;
|
| 810 |
+
}
|
| 811 |
+
|
| 812 |
+
.response-header {
|
| 813 |
+
display: flex;
|
| 814 |
+
align-items: center;
|
| 815 |
+
justify-content: space-between;
|
| 816 |
+
padding: var(--sp-3) var(--sp-5);
|
| 817 |
+
border-bottom: 1px solid var(--bg-border);
|
| 818 |
+
background: var(--bg-elevated);
|
| 819 |
+
}
|
| 820 |
+
|
| 821 |
+
.response-meta {
|
| 822 |
+
display: flex;
|
| 823 |
+
align-items: center;
|
| 824 |
+
gap: var(--sp-4);
|
| 825 |
+
}
|
| 826 |
+
|
| 827 |
+
.response-meta-item {
|
| 828 |
+
font-family: 'JetBrains Mono', monospace;
|
| 829 |
+
font-size: var(--text-xs);
|
| 830 |
+
color: var(--text-secondary);
|
| 831 |
+
}
|
| 832 |
+
|
| 833 |
+
.response-meta-item span {
|
| 834 |
+
color: var(--accent-cyan);
|
| 835 |
+
}
|
| 836 |
+
|
| 837 |
+
.response-body {
|
| 838 |
+
padding: var(--sp-5);
|
| 839 |
+
font-size: var(--text-base);
|
| 840 |
+
line-height: 1.8;
|
| 841 |
+
max-height: 500px;
|
| 842 |
+
overflow-y: auto;
|
| 843 |
+
}
|
| 844 |
+
|
| 845 |
+
.response-body p { margin-bottom: var(--sp-4); }
|
| 846 |
+
.response-body code {
|
| 847 |
+
background: var(--bg-elevated);
|
| 848 |
+
padding: 2px 6px;
|
| 849 |
+
border-radius: var(--radius-sm);
|
| 850 |
+
font-family: 'Fira Code', monospace;
|
| 851 |
+
font-size: 0.9em;
|
| 852 |
+
color: var(--accent-cyan);
|
| 853 |
+
}
|
| 854 |
+
.response-body pre {
|
| 855 |
+
background: var(--bg-elevated);
|
| 856 |
+
border: 1px solid var(--bg-border);
|
| 857 |
+
border-radius: var(--radius-md);
|
| 858 |
+
padding: var(--sp-4);
|
| 859 |
+
overflow-x: auto;
|
| 860 |
+
margin-bottom: var(--sp-4);
|
| 861 |
+
}
|
| 862 |
+
.response-body pre code {
|
| 863 |
+
background: transparent;
|
| 864 |
+
padding: 0;
|
| 865 |
+
color: var(--text-primary);
|
| 866 |
+
}
|
| 867 |
+
|
| 868 |
+
/* ============================================================
|
| 869 |
+
Explain Card
|
| 870 |
+
============================================================ */
|
| 871 |
+
|
| 872 |
+
.explain-card {
|
| 873 |
+
background: var(--bg-elevated);
|
| 874 |
+
border: 1px solid var(--bg-border);
|
| 875 |
+
border-radius: var(--radius-md);
|
| 876 |
+
overflow: hidden;
|
| 877 |
+
}
|
| 878 |
+
|
| 879 |
+
.explain-card-header {
|
| 880 |
+
display: flex;
|
| 881 |
+
align-items: center;
|
| 882 |
+
justify-content: space-between;
|
| 883 |
+
padding: var(--sp-3) var(--sp-4);
|
| 884 |
+
border-bottom: 1px solid var(--bg-border);
|
| 885 |
+
cursor: pointer;
|
| 886 |
+
user-select: none;
|
| 887 |
+
}
|
| 888 |
+
|
| 889 |
+
.explain-card-header:hover {
|
| 890 |
+
background: rgba(0, 229, 255, 0.04);
|
| 891 |
+
}
|
| 892 |
+
|
| 893 |
+
.explain-title {
|
| 894 |
+
font-family: 'JetBrains Mono', monospace;
|
| 895 |
+
font-size: var(--text-xs);
|
| 896 |
+
font-weight: 700;
|
| 897 |
+
text-transform: uppercase;
|
| 898 |
+
letter-spacing: 0.1em;
|
| 899 |
+
color: var(--accent-cyan);
|
| 900 |
+
}
|
| 901 |
+
|
| 902 |
+
.explain-body {
|
| 903 |
+
padding: var(--sp-4);
|
| 904 |
+
font-family: 'JetBrains Mono', monospace;
|
| 905 |
+
font-size: var(--text-xs);
|
| 906 |
+
line-height: 2;
|
| 907 |
+
color: var(--text-secondary);
|
| 908 |
+
}
|
| 909 |
+
|
| 910 |
+
.explain-line::before {
|
| 911 |
+
content: '> ';
|
| 912 |
+
color: var(--accent-cyan);
|
| 913 |
+
}
|
| 914 |
+
|
| 915 |
+
.explain-line {
|
| 916 |
+
display: block;
|
| 917 |
+
}
|
| 918 |
+
|
| 919 |
+
.explain-highlight {
|
| 920 |
+
color: var(--accent-green);
|
| 921 |
+
}
|
| 922 |
+
|
| 923 |
+
/* ============================================================
|
| 924 |
+
Tables
|
| 925 |
+
============================================================ */
|
| 926 |
+
|
| 927 |
+
.data-table {
|
| 928 |
+
width: 100%;
|
| 929 |
+
border-collapse: collapse;
|
| 930 |
+
font-size: var(--text-sm);
|
| 931 |
+
}
|
| 932 |
+
|
| 933 |
+
.data-table th {
|
| 934 |
+
padding: var(--sp-3) var(--sp-4);
|
| 935 |
+
text-align: left;
|
| 936 |
+
font-size: var(--text-xs);
|
| 937 |
+
font-weight: 600;
|
| 938 |
+
text-transform: uppercase;
|
| 939 |
+
letter-spacing: 0.08em;
|
| 940 |
+
color: var(--text-secondary);
|
| 941 |
+
border-bottom: 1px solid var(--bg-border);
|
| 942 |
+
white-space: nowrap;
|
| 943 |
+
}
|
| 944 |
+
|
| 945 |
+
.data-table td {
|
| 946 |
+
padding: var(--sp-3) var(--sp-4);
|
| 947 |
+
border-bottom: 1px solid rgba(37, 42, 56, 0.5);
|
| 948 |
+
vertical-align: middle;
|
| 949 |
+
}
|
| 950 |
+
|
| 951 |
+
.data-table tbody tr:nth-child(odd) { background: var(--bg-surface); }
|
| 952 |
+
.data-table tbody tr:nth-child(even) { background: var(--bg-elevated); }
|
| 953 |
+
.data-table tbody tr:hover { background: rgba(0, 229, 255, 0.04); cursor: pointer; }
|
| 954 |
+
|
| 955 |
+
.complexity-bar {
|
| 956 |
+
display: flex;
|
| 957 |
+
align-items: center;
|
| 958 |
+
gap: var(--sp-2);
|
| 959 |
+
}
|
| 960 |
+
|
| 961 |
+
.complexity-bar-track {
|
| 962 |
+
width: 60px;
|
| 963 |
+
height: 4px;
|
| 964 |
+
background: var(--bg-border);
|
| 965 |
+
border-radius: 2px;
|
| 966 |
+
overflow: hidden;
|
| 967 |
+
flex-shrink: 0;
|
| 968 |
+
}
|
| 969 |
+
|
| 970 |
+
.complexity-bar-fill {
|
| 971 |
+
height: 100%;
|
| 972 |
+
border-radius: 2px;
|
| 973 |
+
transition: width var(--transition-slow);
|
| 974 |
+
}
|
| 975 |
+
|
| 976 |
+
/* ============================================================
|
| 977 |
+
Badge / Pill
|
| 978 |
+
============================================================ */
|
| 979 |
+
|
| 980 |
+
.badge {
|
| 981 |
+
display: inline-flex;
|
| 982 |
+
align-items: center;
|
| 983 |
+
padding: 2px 8px;
|
| 984 |
+
border-radius: var(--radius-full);
|
| 985 |
+
font-size: var(--text-xs);
|
| 986 |
+
font-weight: 600;
|
| 987 |
+
font-family: 'JetBrains Mono', monospace;
|
| 988 |
+
letter-spacing: 0.04em;
|
| 989 |
+
text-transform: uppercase;
|
| 990 |
+
}
|
| 991 |
+
|
| 992 |
+
.badge-cyan { background: rgba(0, 229, 255, 0.12); color: var(--accent-cyan); border: 1px solid rgba(0, 229, 255, 0.3); }
|
| 993 |
+
.badge-green { background: rgba(0, 255, 148, 0.12); color: var(--accent-green); border: 1px solid rgba(0, 255, 148, 0.3); }
|
| 994 |
+
.badge-amber { background: rgba(255, 179, 0, 0.12); color: var(--accent-amber); border: 1px solid rgba(255, 179, 0, 0.3); }
|
| 995 |
+
.badge-red { background: rgba(255, 61, 87, 0.12); color: var(--accent-red); border: 1px solid rgba(255, 61, 87, 0.3); }
|
| 996 |
+
.badge-purple { background: rgba(124, 77, 255, 0.12); color: var(--accent-purple); border: 1px solid rgba(124, 77, 255, 0.3); }
|
| 997 |
+
.badge-muted { background: rgba(61, 67, 87, 0.3); color: var(--text-secondary); border: 1px solid var(--bg-border); }
|
| 998 |
+
|
| 999 |
+
/* ============================================================
|
| 1000 |
+
Cards
|
| 1001 |
+
============================================================ */
|
| 1002 |
+
|
| 1003 |
+
.card {
|
| 1004 |
+
background: var(--bg-surface);
|
| 1005 |
+
border: 1px solid var(--bg-border);
|
| 1006 |
+
border-radius: var(--radius-lg);
|
| 1007 |
+
overflow: hidden;
|
| 1008 |
+
transition: border-color var(--transition-normal), box-shadow var(--transition-normal);
|
| 1009 |
+
flex-shrink: 0;
|
| 1010 |
+
}
|
| 1011 |
+
|
| 1012 |
+
.card:hover {
|
| 1013 |
+
border-color: rgba(0, 229, 255, 0.2);
|
| 1014 |
+
box-shadow: 0 4px 20px rgba(0, 0, 0, 0.3);
|
| 1015 |
+
}
|
| 1016 |
+
|
| 1017 |
+
.card-header {
|
| 1018 |
+
padding: var(--sp-4) var(--sp-5);
|
| 1019 |
+
border-bottom: 1px solid var(--bg-border);
|
| 1020 |
+
display: flex;
|
| 1021 |
+
align-items: center;
|
| 1022 |
+
justify-content: space-between;
|
| 1023 |
+
}
|
| 1024 |
+
|
| 1025 |
+
.card-title {
|
| 1026 |
+
font-family: 'JetBrains Mono', monospace;
|
| 1027 |
+
font-size: var(--text-base);
|
| 1028 |
+
font-weight: 700;
|
| 1029 |
+
color: var(--text-primary);
|
| 1030 |
+
}
|
| 1031 |
+
|
| 1032 |
+
.card-subtitle {
|
| 1033 |
+
font-size: var(--text-xs);
|
| 1034 |
+
color: var(--text-secondary);
|
| 1035 |
+
margin-top: 2px;
|
| 1036 |
+
}
|
| 1037 |
+
|
| 1038 |
+
.card-body {
|
| 1039 |
+
padding: var(--sp-5);
|
| 1040 |
+
}
|
| 1041 |
+
|
| 1042 |
+
/* ============================================================
|
| 1043 |
+
Grid layouts
|
| 1044 |
+
============================================================ */
|
| 1045 |
+
|
| 1046 |
+
.grid-2 { display: grid; grid-template-columns: repeat(2, 1fr); gap: var(--sp-4); }
|
| 1047 |
+
.grid-3 { display: grid; grid-template-columns: repeat(3, 1fr); gap: var(--sp-4); }
|
| 1048 |
+
.grid-4 { display: grid; grid-template-columns: repeat(4, 1fr); gap: var(--sp-4); }
|
| 1049 |
+
|
| 1050 |
+
@media (max-width: 1280px) {
|
| 1051 |
+
.grid-4 { grid-template-columns: repeat(2, 1fr); }
|
| 1052 |
+
}
|
| 1053 |
+
|
| 1054 |
+
@media (max-width: 1024px) {
|
| 1055 |
+
/* Settings responsive overrides - stacked at 1024px to prevent overlapping when sidebar is open */
|
| 1056 |
+
.settings-row {
|
| 1057 |
+
flex-direction: column;
|
| 1058 |
+
align-items: stretch;
|
| 1059 |
+
gap: var(--sp-3);
|
| 1060 |
+
}
|
| 1061 |
+
.settings-key-input-wrapper {
|
| 1062 |
+
width: 100%;
|
| 1063 |
+
max-width: none;
|
| 1064 |
+
}
|
| 1065 |
+
}
|
| 1066 |
+
|
| 1067 |
+
@media (max-width: 768px) {
|
| 1068 |
+
.grid-2,
|
| 1069 |
+
.grid-3,
|
| 1070 |
+
.grid-4 { grid-template-columns: 1fr; }
|
| 1071 |
+
|
| 1072 |
+
.main-content { margin-left: var(--sidebar-w-collapsed) !important; }
|
| 1073 |
+
.sidebar { width: var(--sidebar-w-collapsed) !important; }
|
| 1074 |
+
.sidebar-logo-text,
|
| 1075 |
+
.sidebar-nav-item span,
|
| 1076 |
+
.sidebar-section-label,
|
| 1077 |
+
.sidebar-status { display: none; }
|
| 1078 |
+
|
| 1079 |
+
.page-content { padding: var(--sp-4); }
|
| 1080 |
+
}
|
| 1081 |
+
|
| 1082 |
+
/* ============================================================
|
| 1083 |
+
Page headings
|
| 1084 |
+
============================================================ */
|
| 1085 |
+
|
| 1086 |
+
.page-header {
|
| 1087 |
+
margin-bottom: var(--sp-6);
|
| 1088 |
+
}
|
| 1089 |
+
|
| 1090 |
+
.page-title {
|
| 1091 |
+
font-family: 'JetBrains Mono', monospace;
|
| 1092 |
+
font-size: var(--text-xl);
|
| 1093 |
+
font-weight: 700;
|
| 1094 |
+
color: var(--text-primary);
|
| 1095 |
+
margin-bottom: var(--sp-1);
|
| 1096 |
+
}
|
| 1097 |
+
|
| 1098 |
+
.page-subtitle {
|
| 1099 |
+
font-size: var(--text-sm);
|
| 1100 |
+
color: var(--text-secondary);
|
| 1101 |
+
}
|
| 1102 |
+
|
| 1103 |
+
/* ============================================================
|
| 1104 |
+
Section
|
| 1105 |
+
============================================================ */
|
| 1106 |
+
|
| 1107 |
+
.section {
|
| 1108 |
+
margin-bottom: var(--sp-6);
|
| 1109 |
+
}
|
| 1110 |
+
|
| 1111 |
+
.section-title {
|
| 1112 |
+
font-family: 'JetBrains Mono', monospace;
|
| 1113 |
+
font-size: var(--text-sm);
|
| 1114 |
+
font-weight: 700;
|
| 1115 |
+
text-transform: uppercase;
|
| 1116 |
+
letter-spacing: 0.1em;
|
| 1117 |
+
color: var(--text-secondary);
|
| 1118 |
+
margin-bottom: var(--sp-4);
|
| 1119 |
+
display: flex;
|
| 1120 |
+
align-items: center;
|
| 1121 |
+
gap: var(--sp-3);
|
| 1122 |
+
}
|
| 1123 |
+
|
| 1124 |
+
.section-title::after {
|
| 1125 |
+
content: '';
|
| 1126 |
+
flex: 1;
|
| 1127 |
+
height: 1px;
|
| 1128 |
+
background: var(--bg-border);
|
| 1129 |
+
}
|
| 1130 |
+
|
| 1131 |
+
/* ============================================================
|
| 1132 |
+
Auth / Login page
|
| 1133 |
+
============================================================ */
|
| 1134 |
+
|
| 1135 |
+
.auth-page {
|
| 1136 |
+
min-height: 100vh;
|
| 1137 |
+
background: var(--bg-base);
|
| 1138 |
+
display: flex;
|
| 1139 |
+
align-items: center;
|
| 1140 |
+
justify-content: center;
|
| 1141 |
+
position: relative;
|
| 1142 |
+
overflow: hidden;
|
| 1143 |
+
}
|
| 1144 |
+
|
| 1145 |
+
.auth-bg-grid {
|
| 1146 |
+
position: absolute;
|
| 1147 |
+
inset: 0;
|
| 1148 |
+
background-image:
|
| 1149 |
+
linear-gradient(rgba(0, 229, 255, 0.03) 1px, transparent 1px),
|
| 1150 |
+
linear-gradient(90deg, rgba(0, 229, 255, 0.03) 1px, transparent 1px);
|
| 1151 |
+
background-size: 40px 40px;
|
| 1152 |
+
pointer-events: none;
|
| 1153 |
+
}
|
| 1154 |
+
|
| 1155 |
+
.auth-bg-glow {
|
| 1156 |
+
position: absolute;
|
| 1157 |
+
width: 600px;
|
| 1158 |
+
height: 600px;
|
| 1159 |
+
border-radius: 50%;
|
| 1160 |
+
background: radial-gradient(circle, rgba(0, 229, 255, 0.06) 0%, transparent 70%);
|
| 1161 |
+
top: -100px;
|
| 1162 |
+
left: -100px;
|
| 1163 |
+
pointer-events: none;
|
| 1164 |
+
}
|
| 1165 |
+
|
| 1166 |
+
.auth-bg-glow-2 {
|
| 1167 |
+
position: absolute;
|
| 1168 |
+
width: 400px;
|
| 1169 |
+
height: 400px;
|
| 1170 |
+
border-radius: 50%;
|
| 1171 |
+
background: radial-gradient(circle, rgba(124, 77, 255, 0.06) 0%, transparent 70%);
|
| 1172 |
+
bottom: -50px;
|
| 1173 |
+
right: -50px;
|
| 1174 |
+
pointer-events: none;
|
| 1175 |
+
}
|
| 1176 |
+
|
| 1177 |
+
.auth-card {
|
| 1178 |
+
background: var(--bg-surface);
|
| 1179 |
+
border: 1px solid var(--bg-border);
|
| 1180 |
+
border-radius: var(--radius-xl);
|
| 1181 |
+
padding: var(--sp-10);
|
| 1182 |
+
width: 100%;
|
| 1183 |
+
max-width: 440px;
|
| 1184 |
+
position: relative;
|
| 1185 |
+
z-index: 1;
|
| 1186 |
+
}
|
| 1187 |
+
|
| 1188 |
+
.auth-logo {
|
| 1189 |
+
display: flex;
|
| 1190 |
+
align-items: center;
|
| 1191 |
+
gap: var(--sp-3);
|
| 1192 |
+
margin-bottom: var(--sp-8);
|
| 1193 |
+
}
|
| 1194 |
+
|
| 1195 |
+
.auth-logo-icon {
|
| 1196 |
+
color: var(--accent-cyan);
|
| 1197 |
+
font-size: 28px;
|
| 1198 |
+
}
|
| 1199 |
+
|
| 1200 |
+
.auth-logo-text {
|
| 1201 |
+
font-family: 'JetBrains Mono', monospace;
|
| 1202 |
+
font-size: var(--text-xl);
|
| 1203 |
+
font-weight: 700;
|
| 1204 |
+
}
|
| 1205 |
+
|
| 1206 |
+
.auth-logo-text span {
|
| 1207 |
+
color: var(--accent-cyan);
|
| 1208 |
+
}
|
| 1209 |
+
|
| 1210 |
+
.auth-title {
|
| 1211 |
+
font-family: 'JetBrains Mono', monospace;
|
| 1212 |
+
font-size: var(--text-lg);
|
| 1213 |
+
font-weight: 700;
|
| 1214 |
+
margin-bottom: var(--sp-1);
|
| 1215 |
+
}
|
| 1216 |
+
|
| 1217 |
+
.auth-subtitle {
|
| 1218 |
+
font-size: var(--text-sm);
|
| 1219 |
+
color: var(--text-secondary);
|
| 1220 |
+
margin-bottom: var(--sp-8);
|
| 1221 |
+
}
|
| 1222 |
+
|
| 1223 |
+
.auth-form {
|
| 1224 |
+
display: flex;
|
| 1225 |
+
flex-direction: column;
|
| 1226 |
+
gap: var(--sp-4);
|
| 1227 |
+
}
|
| 1228 |
+
|
| 1229 |
+
.auth-divider {
|
| 1230 |
+
display: flex;
|
| 1231 |
+
align-items: center;
|
| 1232 |
+
gap: var(--sp-3);
|
| 1233 |
+
margin: var(--sp-5) 0;
|
| 1234 |
+
}
|
| 1235 |
+
|
| 1236 |
+
.auth-divider-line {
|
| 1237 |
+
flex: 1;
|
| 1238 |
+
height: 1px;
|
| 1239 |
+
background: var(--bg-border);
|
| 1240 |
+
}
|
| 1241 |
+
|
| 1242 |
+
.auth-divider-text {
|
| 1243 |
+
font-size: var(--text-xs);
|
| 1244 |
+
color: var(--text-muted);
|
| 1245 |
+
text-transform: uppercase;
|
| 1246 |
+
letter-spacing: 0.08em;
|
| 1247 |
+
}
|
| 1248 |
+
|
| 1249 |
+
.oauth-btn {
|
| 1250 |
+
display: flex;
|
| 1251 |
+
align-items: center;
|
| 1252 |
+
justify-content: center;
|
| 1253 |
+
gap: var(--sp-3);
|
| 1254 |
+
padding: var(--sp-3);
|
| 1255 |
+
border: 1px solid var(--bg-border);
|
| 1256 |
+
border-radius: var(--radius-md);
|
| 1257 |
+
background: var(--bg-elevated);
|
| 1258 |
+
color: var(--text-primary);
|
| 1259 |
+
font-size: var(--text-sm);
|
| 1260 |
+
font-weight: 500;
|
| 1261 |
+
cursor: pointer;
|
| 1262 |
+
transition: all var(--transition-fast);
|
| 1263 |
+
text-decoration: none;
|
| 1264 |
+
width: 100%;
|
| 1265 |
+
}
|
| 1266 |
+
|
| 1267 |
+
.oauth-btn:hover {
|
| 1268 |
+
border-color: var(--text-muted);
|
| 1269 |
+
background: rgba(255, 255, 255, 0.04);
|
| 1270 |
+
}
|
| 1271 |
+
|
| 1272 |
+
.auth-footer {
|
| 1273 |
+
margin-top: var(--sp-6);
|
| 1274 |
+
text-align: center;
|
| 1275 |
+
font-size: var(--text-sm);
|
| 1276 |
+
color: var(--text-secondary);
|
| 1277 |
+
}
|
| 1278 |
+
|
| 1279 |
+
.auth-footer a, .auth-link {
|
| 1280 |
+
color: var(--accent-cyan);
|
| 1281 |
+
cursor: pointer;
|
| 1282 |
+
text-decoration: none;
|
| 1283 |
+
}
|
| 1284 |
+
|
| 1285 |
+
.auth-footer a:hover, .auth-link:hover {
|
| 1286 |
+
text-decoration: underline;
|
| 1287 |
+
}
|
| 1288 |
+
|
| 1289 |
+
.auth-error {
|
| 1290 |
+
padding: var(--sp-3) var(--sp-4);
|
| 1291 |
+
background: rgba(255, 61, 87, 0.1);
|
| 1292 |
+
border: 1px solid rgba(255, 61, 87, 0.3);
|
| 1293 |
+
border-radius: var(--radius-md);
|
| 1294 |
+
color: var(--accent-red);
|
| 1295 |
+
font-size: var(--text-sm);
|
| 1296 |
+
}
|
| 1297 |
+
|
| 1298 |
+
/* ============================================================
|
| 1299 |
+
Model Registry Card
|
| 1300 |
+
============================================================ */
|
| 1301 |
+
|
| 1302 |
+
.model-card {
|
| 1303 |
+
background: var(--bg-surface);
|
| 1304 |
+
border: 1px solid var(--bg-border);
|
| 1305 |
+
border-radius: var(--radius-lg);
|
| 1306 |
+
padding: var(--sp-5);
|
| 1307 |
+
display: flex;
|
| 1308 |
+
flex-direction: column;
|
| 1309 |
+
gap: var(--sp-4);
|
| 1310 |
+
transition: border-color var(--transition-normal), box-shadow var(--transition-normal);
|
| 1311 |
+
}
|
| 1312 |
+
|
| 1313 |
+
.model-card:hover {
|
| 1314 |
+
border-color: rgba(0, 229, 255, 0.3);
|
| 1315 |
+
box-shadow: 0 0 20px rgba(0, 229, 255, 0.06);
|
| 1316 |
+
}
|
| 1317 |
+
|
| 1318 |
+
.model-card-name {
|
| 1319 |
+
font-family: 'JetBrains Mono', monospace;
|
| 1320 |
+
font-size: var(--text-base);
|
| 1321 |
+
font-weight: 700;
|
| 1322 |
+
color: var(--text-primary);
|
| 1323 |
+
}
|
| 1324 |
+
|
| 1325 |
+
.model-card-pricing {
|
| 1326 |
+
display: flex;
|
| 1327 |
+
gap: var(--sp-6);
|
| 1328 |
+
}
|
| 1329 |
+
|
| 1330 |
+
.model-card-price-item {
|
| 1331 |
+
display: flex;
|
| 1332 |
+
flex-direction: column;
|
| 1333 |
+
gap: 2px;
|
| 1334 |
+
}
|
| 1335 |
+
|
| 1336 |
+
.model-card-price-label {
|
| 1337 |
+
font-size: var(--text-xs);
|
| 1338 |
+
color: var(--text-muted);
|
| 1339 |
+
text-transform: uppercase;
|
| 1340 |
+
letter-spacing: 0.08em;
|
| 1341 |
+
}
|
| 1342 |
+
|
| 1343 |
+
.model-card-price-value {
|
| 1344 |
+
font-family: 'JetBrains Mono', monospace;
|
| 1345 |
+
font-size: var(--text-sm);
|
| 1346 |
+
font-weight: 600;
|
| 1347 |
+
color: var(--accent-green);
|
| 1348 |
+
}
|
| 1349 |
+
|
| 1350 |
+
.capability-gauge {
|
| 1351 |
+
position: relative;
|
| 1352 |
+
width: 60px;
|
| 1353 |
+
height: 60px;
|
| 1354 |
+
flex-shrink: 0;
|
| 1355 |
+
}
|
| 1356 |
+
|
| 1357 |
+
/* ============================================================
|
| 1358 |
+
Settings
|
| 1359 |
+
============================================================ */
|
| 1360 |
+
|
| 1361 |
+
.settings-section {
|
| 1362 |
+
background: var(--bg-surface);
|
| 1363 |
+
border: 1px solid var(--bg-border);
|
| 1364 |
+
border-radius: var(--radius-lg);
|
| 1365 |
+
overflow: hidden;
|
| 1366 |
+
margin-bottom: var(--sp-5);
|
| 1367 |
+
flex-shrink: 0;
|
| 1368 |
+
}
|
| 1369 |
+
|
| 1370 |
+
.settings-section-header {
|
| 1371 |
+
padding: var(--sp-4) var(--sp-5);
|
| 1372 |
+
border-bottom: 1px solid var(--bg-border);
|
| 1373 |
+
background: var(--bg-elevated);
|
| 1374 |
+
}
|
| 1375 |
+
|
| 1376 |
+
.settings-section-title {
|
| 1377 |
+
font-family: 'JetBrains Mono', monospace;
|
| 1378 |
+
font-size: var(--text-sm);
|
| 1379 |
+
font-weight: 700;
|
| 1380 |
+
text-transform: uppercase;
|
| 1381 |
+
letter-spacing: 0.1em;
|
| 1382 |
+
color: var(--text-primary);
|
| 1383 |
+
}
|
| 1384 |
+
|
| 1385 |
+
.settings-section-desc {
|
| 1386 |
+
font-size: var(--text-xs);
|
| 1387 |
+
color: var(--text-secondary);
|
| 1388 |
+
margin-top: 2px;
|
| 1389 |
+
}
|
| 1390 |
+
|
| 1391 |
+
.settings-row {
|
| 1392 |
+
display: flex;
|
| 1393 |
+
align-items: center;
|
| 1394 |
+
justify-content: space-between;
|
| 1395 |
+
gap: var(--sp-4);
|
| 1396 |
+
padding: var(--sp-4) var(--sp-5);
|
| 1397 |
+
border-bottom: 1px solid rgba(37, 42, 56, 0.5);
|
| 1398 |
+
}
|
| 1399 |
+
|
| 1400 |
+
.settings-row:last-child {
|
| 1401 |
+
border-bottom: none;
|
| 1402 |
+
}
|
| 1403 |
+
|
| 1404 |
+
.settings-row-info {
|
| 1405 |
+
flex: 1;
|
| 1406 |
+
}
|
| 1407 |
+
|
| 1408 |
+
.settings-row-label {
|
| 1409 |
+
font-size: var(--text-sm);
|
| 1410 |
+
font-weight: 500;
|
| 1411 |
+
color: var(--text-primary);
|
| 1412 |
+
margin-bottom: 2px;
|
| 1413 |
+
}
|
| 1414 |
+
|
| 1415 |
+
.settings-row-desc {
|
| 1416 |
+
font-size: var(--text-xs);
|
| 1417 |
+
color: var(--text-secondary);
|
| 1418 |
+
}
|
| 1419 |
+
|
| 1420 |
+
.settings-key-input-wrapper {
|
| 1421 |
+
position: relative;
|
| 1422 |
+
display: flex;
|
| 1423 |
+
gap: var(--sp-2);
|
| 1424 |
+
align-items: center;
|
| 1425 |
+
width: 100%;
|
| 1426 |
+
max-width: 450px;
|
| 1427 |
+
}
|
| 1428 |
+
|
| 1429 |
+
/* ============================================================
|
| 1430 |
+
Loading spinner
|
| 1431 |
+
============================================================ */
|
| 1432 |
+
|
| 1433 |
+
.spinner {
|
| 1434 |
+
width: 16px;
|
| 1435 |
+
height: 16px;
|
| 1436 |
+
border: 2px solid var(--bg-border);
|
| 1437 |
+
border-top-color: var(--accent-cyan);
|
| 1438 |
+
border-radius: 50%;
|
| 1439 |
+
animation: spin 0.7s linear infinite;
|
| 1440 |
+
}
|
| 1441 |
+
|
| 1442 |
+
@keyframes spin {
|
| 1443 |
+
to { transform: rotate(360deg); }
|
| 1444 |
+
}
|
| 1445 |
+
|
| 1446 |
+
/* ============================================================
|
| 1447 |
+
Empty states
|
| 1448 |
+
============================================================ */
|
| 1449 |
+
|
| 1450 |
+
.empty-state {
|
| 1451 |
+
display: flex;
|
| 1452 |
+
flex-direction: column;
|
| 1453 |
+
align-items: center;
|
| 1454 |
+
justify-content: center;
|
| 1455 |
+
padding: var(--sp-12) var(--sp-8);
|
| 1456 |
+
text-align: center;
|
| 1457 |
+
color: var(--text-secondary);
|
| 1458 |
+
gap: var(--sp-4);
|
| 1459 |
+
}
|
| 1460 |
+
|
| 1461 |
+
.empty-state-icon {
|
| 1462 |
+
font-size: 48px;
|
| 1463 |
+
opacity: 0.3;
|
| 1464 |
+
}
|
| 1465 |
+
|
| 1466 |
+
.empty-state-title {
|
| 1467 |
+
font-family: 'JetBrains Mono', monospace;
|
| 1468 |
+
font-size: var(--text-lg);
|
| 1469 |
+
font-weight: 700;
|
| 1470 |
+
color: var(--text-muted);
|
| 1471 |
+
}
|
| 1472 |
+
|
| 1473 |
+
.empty-state-desc {
|
| 1474 |
+
font-size: var(--text-sm);
|
| 1475 |
+
max-width: 320px;
|
| 1476 |
+
}
|
| 1477 |
+
|
| 1478 |
+
/* ============================================================
|
| 1479 |
+
Topbar / Header strip
|
| 1480 |
+
============================================================ */
|
| 1481 |
+
|
| 1482 |
+
.topbar {
|
| 1483 |
+
height: 52px;
|
| 1484 |
+
background: var(--bg-surface);
|
| 1485 |
+
border-bottom: 1px solid var(--bg-border);
|
| 1486 |
+
display: flex;
|
| 1487 |
+
align-items: center;
|
| 1488 |
+
justify-content: space-between;
|
| 1489 |
+
padding: 0 var(--sp-6);
|
| 1490 |
+
flex-shrink: 0;
|
| 1491 |
+
}
|
| 1492 |
+
|
| 1493 |
+
.topbar-breadcrumb {
|
| 1494 |
+
font-family: 'JetBrains Mono', monospace;
|
| 1495 |
+
font-size: var(--text-xs);
|
| 1496 |
+
color: var(--text-secondary);
|
| 1497 |
+
display: flex;
|
| 1498 |
+
align-items: center;
|
| 1499 |
+
gap: var(--sp-2);
|
| 1500 |
+
}
|
| 1501 |
+
|
| 1502 |
+
.topbar-breadcrumb strong {
|
| 1503 |
+
color: var(--text-primary);
|
| 1504 |
+
font-weight: 700;
|
| 1505 |
+
}
|
| 1506 |
+
|
| 1507 |
+
.topbar-actions {
|
| 1508 |
+
display: flex;
|
| 1509 |
+
align-items: center;
|
| 1510 |
+
gap: var(--sp-3);
|
| 1511 |
+
}
|
| 1512 |
+
|
| 1513 |
+
.topbar-health {
|
| 1514 |
+
display: flex;
|
| 1515 |
+
align-items: center;
|
| 1516 |
+
gap: var(--sp-2);
|
| 1517 |
+
font-family: 'JetBrains Mono', monospace;
|
| 1518 |
+
font-size: var(--text-xs);
|
| 1519 |
+
color: var(--text-secondary);
|
| 1520 |
+
}
|
| 1521 |
+
|
| 1522 |
+
/* ============================================================
|
| 1523 |
+
Tooltip
|
| 1524 |
+
============================================================ */
|
| 1525 |
+
|
| 1526 |
+
[data-tooltip] {
|
| 1527 |
+
position: relative;
|
| 1528 |
+
}
|
| 1529 |
+
|
| 1530 |
+
[data-tooltip]::after {
|
| 1531 |
+
content: attr(data-tooltip);
|
| 1532 |
+
position: absolute;
|
| 1533 |
+
bottom: calc(100% + 8px);
|
| 1534 |
+
left: 50%;
|
| 1535 |
+
transform: translateX(-50%);
|
| 1536 |
+
background: var(--bg-elevated);
|
| 1537 |
+
border: 1px solid var(--bg-border);
|
| 1538 |
+
border-radius: var(--radius-sm);
|
| 1539 |
+
padding: 4px 8px;
|
| 1540 |
+
font-size: var(--text-xs);
|
| 1541 |
+
white-space: nowrap;
|
| 1542 |
+
pointer-events: none;
|
| 1543 |
+
opacity: 0;
|
| 1544 |
+
transition: opacity var(--transition-fast);
|
| 1545 |
+
z-index: 100;
|
| 1546 |
+
}
|
| 1547 |
+
|
| 1548 |
+
[data-tooltip]:hover::after {
|
| 1549 |
+
opacity: 1;
|
| 1550 |
+
}
|
| 1551 |
+
|
| 1552 |
+
/* ============================================================
|
| 1553 |
+
Misc utilities
|
| 1554 |
+
============================================================ */
|
| 1555 |
+
|
| 1556 |
+
.flex { display: flex; }
|
| 1557 |
+
.flex-col { display: flex; flex-direction: column; }
|
| 1558 |
+
.items-center { align-items: center; }
|
| 1559 |
+
.justify-between { justify-content: space-between; }
|
| 1560 |
+
.gap-2 { gap: var(--sp-2); }
|
| 1561 |
+
.gap-3 { gap: var(--sp-3); }
|
| 1562 |
+
.gap-4 { gap: var(--sp-4); }
|
| 1563 |
+
.gap-6 { gap: var(--sp-6); }
|
| 1564 |
+
.flex-1 { flex: 1; }
|
| 1565 |
+
.w-full { width: 100%; }
|
| 1566 |
+
.text-cyan { color: var(--accent-cyan); }
|
| 1567 |
+
.text-green { color: var(--accent-green); }
|
| 1568 |
+
.text-amber { color: var(--accent-amber); }
|
| 1569 |
+
.text-red { color: var(--accent-red); }
|
| 1570 |
+
.text-muted { color: var(--text-muted); }
|
| 1571 |
+
.text-secondary { color: var(--text-secondary); }
|
| 1572 |
+
.font-mono { font-family: 'JetBrains Mono', monospace; }
|
| 1573 |
+
.font-sm { font-size: var(--text-sm); }
|
| 1574 |
+
.font-xs { font-size: var(--text-xs); }
|
| 1575 |
+
.truncate { overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
|
| 1576 |
+
.mb-4 { margin-bottom: var(--sp-4); }
|
| 1577 |
+
.mb-6 { margin-bottom: var(--sp-6); }
|
| 1578 |
+
.mt-4 { margin-top: var(--sp-4); }
|
| 1579 |
+
|
| 1580 |
+
/* ============================================================
|
| 1581 |
+
Badge Variants
|
| 1582 |
+
============================================================ */
|
| 1583 |
+
|
| 1584 |
+
.badge-red {
|
| 1585 |
+
color: var(--accent-red);
|
| 1586 |
+
border-color: rgba(255, 61, 87, 0.4);
|
| 1587 |
+
background: rgba(255, 61, 87, 0.08);
|
| 1588 |
+
}
|
| 1589 |
+
|
| 1590 |
+
/* ============================================================
|
| 1591 |
+
Pipeline Visualizer β Vertical Layout (sidebar)
|
| 1592 |
+
============================================================ */
|
| 1593 |
+
|
| 1594 |
+
.pipeline-wrapper {
|
| 1595 |
+
display: flex;
|
| 1596 |
+
flex-direction: column;
|
| 1597 |
+
gap: 0;
|
| 1598 |
+
position: relative;
|
| 1599 |
+
padding: var(--sp-3) 0;
|
| 1600 |
+
}
|
| 1601 |
+
|
| 1602 |
+
.pipeline-stage {
|
| 1603 |
+
display: flex;
|
| 1604 |
+
align-items: flex-start;
|
| 1605 |
+
gap: var(--sp-3);
|
| 1606 |
+
padding: var(--sp-3) var(--sp-4);
|
| 1607 |
+
position: relative;
|
| 1608 |
+
transition: background var(--transition-fast);
|
| 1609 |
+
border-radius: var(--radius-md);
|
| 1610 |
+
}
|
| 1611 |
+
|
| 1612 |
+
.pipeline-stage.active {
|
| 1613 |
+
background: rgba(0, 229, 255, 0.05);
|
| 1614 |
+
}
|
| 1615 |
+
|
| 1616 |
+
.pipeline-stage.complete .pipeline-stage-label {
|
| 1617 |
+
color: var(--accent-green);
|
| 1618 |
+
}
|
| 1619 |
+
|
| 1620 |
+
.pipeline-stage.error .pipeline-stage-label {
|
| 1621 |
+
color: var(--accent-red);
|
| 1622 |
+
}
|
| 1623 |
+
|
| 1624 |
+
.pipeline-stage-icon {
|
| 1625 |
+
width: 24px;
|
| 1626 |
+
height: 24px;
|
| 1627 |
+
display: flex;
|
| 1628 |
+
align-items: center;
|
| 1629 |
+
justify-content: center;
|
| 1630 |
+
flex-shrink: 0;
|
| 1631 |
+
font-size: 14px;
|
| 1632 |
+
}
|
| 1633 |
+
|
| 1634 |
+
.pipeline-stage-body {
|
| 1635 |
+
flex: 1;
|
| 1636 |
+
min-width: 0;
|
| 1637 |
+
}
|
| 1638 |
+
|
| 1639 |
+
.pipeline-stage-label {
|
| 1640 |
+
font-size: var(--text-xs);
|
| 1641 |
+
font-weight: 600;
|
| 1642 |
+
font-family: 'JetBrains Mono', monospace;
|
| 1643 |
+
color: var(--text-secondary);
|
| 1644 |
+
transition: color var(--transition-fast);
|
| 1645 |
+
margin-bottom: 3px;
|
| 1646 |
+
}
|
| 1647 |
+
|
| 1648 |
+
.pipeline-stage.active .pipeline-stage-label {
|
| 1649 |
+
color: var(--accent-cyan);
|
| 1650 |
+
}
|
| 1651 |
+
|
| 1652 |
+
.pipeline-stage-detail {
|
| 1653 |
+
font-size: 10px;
|
| 1654 |
+
color: var(--text-muted);
|
| 1655 |
+
line-height: 1.4;
|
| 1656 |
+
word-break: break-word;
|
| 1657 |
+
}
|
| 1658 |
+
|
| 1659 |
+
.pipeline-stage.complete .pipeline-stage-detail {
|
| 1660 |
+
color: var(--text-secondary);
|
| 1661 |
+
}
|
| 1662 |
+
|
| 1663 |
+
.pipeline-connector {
|
| 1664 |
+
position: absolute;
|
| 1665 |
+
left: calc(var(--sp-4) + 11px);
|
| 1666 |
+
top: calc(100% - var(--sp-3));
|
| 1667 |
+
width: 2px;
|
| 1668 |
+
height: var(--sp-3);
|
| 1669 |
+
background: var(--bg-border);
|
| 1670 |
+
z-index: 0;
|
| 1671 |
+
}
|
| 1672 |
+
|
| 1673 |
+
/* ============================================================
|
| 1674 |
+
Metrics Bar (Response)
|
| 1675 |
+
============================================================ */
|
| 1676 |
+
|
| 1677 |
+
.metrics-bar {
|
| 1678 |
+
display: flex;
|
| 1679 |
+
align-items: center;
|
| 1680 |
+
flex-wrap: wrap;
|
| 1681 |
+
gap: 0;
|
| 1682 |
+
background: var(--bg-base);
|
| 1683 |
+
border: 1px solid var(--bg-border);
|
| 1684 |
+
border-radius: var(--radius-md);
|
| 1685 |
+
overflow: hidden;
|
| 1686 |
+
margin-bottom: var(--sp-4);
|
| 1687 |
+
}
|
| 1688 |
+
|
| 1689 |
+
.metric-item {
|
| 1690 |
+
flex: 1;
|
| 1691 |
+
min-width: 80px;
|
| 1692 |
+
padding: var(--sp-3) var(--sp-4);
|
| 1693 |
+
text-align: center;
|
| 1694 |
+
}
|
| 1695 |
+
|
| 1696 |
+
.metric-label {
|
| 1697 |
+
font-size: 10px;
|
| 1698 |
+
text-transform: uppercase;
|
| 1699 |
+
letter-spacing: 0.08em;
|
| 1700 |
+
color: var(--text-muted);
|
| 1701 |
+
margin-bottom: 4px;
|
| 1702 |
+
}
|
| 1703 |
+
|
| 1704 |
+
.metric-value {
|
| 1705 |
+
font-family: 'JetBrains Mono', monospace;
|
| 1706 |
+
font-size: var(--text-xs);
|
| 1707 |
+
font-weight: 700;
|
| 1708 |
+
color: var(--text-primary);
|
| 1709 |
+
}
|
| 1710 |
+
|
| 1711 |
+
.metric-divider {
|
| 1712 |
+
width: 1px;
|
| 1713 |
+
height: 40px;
|
| 1714 |
+
background: var(--bg-border);
|
| 1715 |
+
flex-shrink: 0;
|
| 1716 |
+
}
|
| 1717 |
+
|
| 1718 |
+
/* ============================================================
|
| 1719 |
+
Response Content (markdown rendering)
|
| 1720 |
+
============================================================ */
|
| 1721 |
+
|
| 1722 |
+
.response-content {
|
| 1723 |
+
font-size: var(--text-sm);
|
| 1724 |
+
line-height: 1.8;
|
| 1725 |
+
color: var(--text-primary);
|
| 1726 |
+
}
|
| 1727 |
+
|
| 1728 |
+
.response-content p { margin-bottom: var(--sp-4); }
|
| 1729 |
+
.response-content p:last-child { margin-bottom: 0; }
|
| 1730 |
+
|
| 1731 |
+
.response-content h1,
|
| 1732 |
+
.response-content h2,
|
| 1733 |
+
.response-content h3 {
|
| 1734 |
+
font-family: 'JetBrains Mono', monospace;
|
| 1735 |
+
color: var(--accent-cyan);
|
| 1736 |
+
margin: var(--sp-5) 0 var(--sp-3);
|
| 1737 |
+
font-size: var(--text-base);
|
| 1738 |
+
}
|
| 1739 |
+
|
| 1740 |
+
.response-content code {
|
| 1741 |
+
font-family: 'Fira Code', 'JetBrains Mono', monospace;
|
| 1742 |
+
font-size: 12px;
|
| 1743 |
+
background: var(--bg-base);
|
| 1744 |
+
border: 1px solid var(--bg-border);
|
| 1745 |
+
border-radius: var(--radius-sm);
|
| 1746 |
+
padding: 2px 6px;
|
| 1747 |
+
color: var(--accent-cyan);
|
| 1748 |
+
}
|
| 1749 |
+
|
| 1750 |
+
.response-content pre {
|
| 1751 |
+
background: var(--bg-base);
|
| 1752 |
+
border: 1px solid var(--bg-border);
|
| 1753 |
+
border-radius: var(--radius-md);
|
| 1754 |
+
padding: var(--sp-4);
|
| 1755 |
+
overflow-x: auto;
|
| 1756 |
+
margin: var(--sp-4) 0;
|
| 1757 |
+
}
|
| 1758 |
+
|
| 1759 |
+
.response-content pre code {
|
| 1760 |
+
background: none;
|
| 1761 |
+
border: none;
|
| 1762 |
+
padding: 0;
|
| 1763 |
+
color: var(--text-primary);
|
| 1764 |
+
font-size: 13px;
|
| 1765 |
+
line-height: 1.6;
|
| 1766 |
+
}
|
| 1767 |
+
|
| 1768 |
+
.response-content ul,
|
| 1769 |
+
.response-content ol {
|
| 1770 |
+
padding-left: var(--sp-6);
|
| 1771 |
+
margin-bottom: var(--sp-4);
|
| 1772 |
+
}
|
| 1773 |
+
|
| 1774 |
+
.response-content li {
|
| 1775 |
+
margin-bottom: var(--sp-2);
|
| 1776 |
+
}
|
| 1777 |
+
|
| 1778 |
+
.response-content blockquote {
|
| 1779 |
+
border-left: 3px solid var(--accent-cyan);
|
| 1780 |
+
padding-left: var(--sp-4);
|
| 1781 |
+
color: var(--text-secondary);
|
| 1782 |
+
margin: var(--sp-4) 0;
|
| 1783 |
+
}
|
| 1784 |
+
|
| 1785 |
+
.response-content table {
|
| 1786 |
+
width: 100%;
|
| 1787 |
+
border-collapse: collapse;
|
| 1788 |
+
margin: var(--sp-4) 0;
|
| 1789 |
+
font-size: var(--text-xs);
|
| 1790 |
+
}
|
| 1791 |
+
|
| 1792 |
+
.response-content th,
|
| 1793 |
+
.response-content td {
|
| 1794 |
+
padding: var(--sp-2) var(--sp-3);
|
| 1795 |
+
border: 1px solid var(--bg-border);
|
| 1796 |
+
text-align: left;
|
| 1797 |
+
}
|
| 1798 |
+
|
| 1799 |
+
.response-content th {
|
| 1800 |
+
background: var(--bg-elevated);
|
| 1801 |
+
color: var(--accent-cyan);
|
| 1802 |
+
font-weight: 700;
|
| 1803 |
+
}
|
| 1804 |
+
|
| 1805 |
+
/* ============================================================
|
| 1806 |
+
Example Prompt Buttons
|
| 1807 |
+
============================================================ */
|
| 1808 |
+
|
| 1809 |
+
.example-prompt-btn {
|
| 1810 |
+
display: flex;
|
| 1811 |
+
align-items: flex-start;
|
| 1812 |
+
gap: var(--sp-3);
|
| 1813 |
+
padding: var(--sp-3) var(--sp-4);
|
| 1814 |
+
background: var(--bg-elevated);
|
| 1815 |
+
border: 1px solid var(--bg-border);
|
| 1816 |
+
border-radius: var(--radius-md);
|
| 1817 |
+
color: var(--text-secondary);
|
| 1818 |
+
font-size: var(--text-sm);
|
| 1819 |
+
text-align: left;
|
| 1820 |
+
cursor: pointer;
|
| 1821 |
+
transition: all var(--transition-fast);
|
| 1822 |
+
width: 100%;
|
| 1823 |
+
font-family: inherit;
|
| 1824 |
+
}
|
| 1825 |
+
|
| 1826 |
+
.example-prompt-btn:hover {
|
| 1827 |
+
border-color: var(--accent-cyan);
|
| 1828 |
+
color: var(--text-primary);
|
| 1829 |
+
background: rgba(0, 229, 255, 0.04);
|
| 1830 |
+
}
|
| 1831 |
+
|
| 1832 |
+
.example-prompt-icon {
|
| 1833 |
+
color: var(--accent-cyan);
|
| 1834 |
+
font-weight: 700;
|
| 1835 |
+
flex-shrink: 0;
|
| 1836 |
+
}
|
| 1837 |
+
|
| 1838 |
+
/* ============================================================
|
| 1839 |
+
Rationale Card
|
| 1840 |
+
============================================================ */
|
| 1841 |
+
|
| 1842 |
+
.rationale-card {
|
| 1843 |
+
background: var(--bg-surface);
|
| 1844 |
+
border: 1px solid var(--bg-border);
|
| 1845 |
+
border-radius: var(--radius-lg);
|
| 1846 |
+
overflow: hidden;
|
| 1847 |
+
}
|
| 1848 |
+
|
| 1849 |
+
.rationale-card-title {
|
| 1850 |
+
display: flex;
|
| 1851 |
+
align-items: center;
|
| 1852 |
+
gap: var(--sp-2);
|
| 1853 |
+
padding: var(--sp-4) var(--sp-5);
|
| 1854 |
+
border-bottom: 1px solid var(--bg-border);
|
| 1855 |
+
font-size: var(--text-xs);
|
| 1856 |
+
font-weight: 700;
|
| 1857 |
+
font-family: 'JetBrains Mono', monospace;
|
| 1858 |
+
text-transform: uppercase;
|
| 1859 |
+
letter-spacing: 0.08em;
|
| 1860 |
+
color: var(--accent-cyan);
|
| 1861 |
+
}
|
| 1862 |
+
|
| 1863 |
+
.rationale-section {
|
| 1864 |
+
padding: var(--sp-4) var(--sp-5);
|
| 1865 |
+
border-bottom: 1px solid var(--bg-border);
|
| 1866 |
+
}
|
| 1867 |
+
|
| 1868 |
+
.rationale-section:last-child {
|
| 1869 |
+
border-bottom: none;
|
| 1870 |
+
}
|
| 1871 |
+
|
| 1872 |
+
.rationale-label {
|
| 1873 |
+
font-size: 10px;
|
| 1874 |
+
font-weight: 700;
|
| 1875 |
+
text-transform: uppercase;
|
| 1876 |
+
letter-spacing: 0.1em;
|
| 1877 |
+
color: var(--text-muted);
|
| 1878 |
+
margin-bottom: var(--sp-3);
|
| 1879 |
+
}
|
| 1880 |
+
|
| 1881 |
+
/* ============================================================
|
| 1882 |
+
Card component
|
| 1883 |
+
============================================================ */
|
| 1884 |
+
|
| 1885 |
+
.card {
|
| 1886 |
+
background: var(--bg-surface);
|
| 1887 |
+
border: 1px solid var(--bg-border);
|
| 1888 |
+
border-radius: var(--radius-lg);
|
| 1889 |
+
padding: var(--sp-5);
|
| 1890 |
+
transition: border-color var(--transition-normal);
|
| 1891 |
+
flex-shrink: 0;
|
| 1892 |
+
}
|
| 1893 |
+
|
| 1894 |
+
.card:hover {
|
| 1895 |
+
border-color: rgba(0, 229, 255, 0.15);
|
| 1896 |
+
}
|
| 1897 |
+
|
| 1898 |
+
.card-header {
|
| 1899 |
+
font-family: 'JetBrains Mono', monospace;
|
| 1900 |
+
font-size: var(--text-xs);
|
| 1901 |
+
font-weight: 700;
|
| 1902 |
+
text-transform: uppercase;
|
| 1903 |
+
letter-spacing: 0.08em;
|
| 1904 |
+
color: var(--text-muted);
|
| 1905 |
+
margin-bottom: var(--sp-4);
|
| 1906 |
+
}
|
| 1907 |
+
|
| 1908 |
+
/* Grid layout helpers */
|
| 1909 |
+
.grid-2 {
|
| 1910 |
+
display: grid;
|
| 1911 |
+
grid-template-columns: repeat(auto-fill, minmax(340px, 1fr));
|
| 1912 |
+
gap: var(--sp-4);
|
| 1913 |
+
}
|
| 1914 |
+
|
| 1915 |
+
/* Model card */
|
| 1916 |
+
.model-card {
|
| 1917 |
+
background: var(--bg-surface);
|
| 1918 |
+
border: 1px solid var(--bg-border);
|
| 1919 |
+
border-radius: var(--radius-lg);
|
| 1920 |
+
padding: var(--sp-5);
|
| 1921 |
+
display: flex;
|
| 1922 |
+
flex-direction: column;
|
| 1923 |
+
gap: var(--sp-4);
|
| 1924 |
+
transition: border-color var(--transition-normal), box-shadow var(--transition-normal);
|
| 1925 |
+
}
|
| 1926 |
+
|
| 1927 |
+
.model-card:hover {
|
| 1928 |
+
border-color: rgba(0, 229, 255, 0.2);
|
| 1929 |
+
}
|
| 1930 |
+
|
| 1931 |
+
.model-card-name {
|
| 1932 |
+
font-family: 'JetBrains Mono', monospace;
|
| 1933 |
+
font-size: var(--text-sm);
|
| 1934 |
+
font-weight: 700;
|
| 1935 |
+
color: var(--text-primary);
|
| 1936 |
+
word-break: break-all;
|
| 1937 |
+
}
|
| 1938 |
+
|
| 1939 |
+
.model-card-pricing {
|
| 1940 |
+
display: flex;
|
| 1941 |
+
gap: var(--sp-4);
|
| 1942 |
+
padding: var(--sp-3) 0;
|
| 1943 |
+
border-top: 1px solid var(--bg-border);
|
| 1944 |
+
border-bottom: 1px solid var(--bg-border);
|
| 1945 |
+
}
|
| 1946 |
+
|
| 1947 |
+
.model-card-price-item {
|
| 1948 |
+
flex: 1;
|
| 1949 |
+
text-align: center;
|
| 1950 |
+
}
|
| 1951 |
+
|
| 1952 |
+
.model-card-price-label {
|
| 1953 |
+
font-size: 10px;
|
| 1954 |
+
text-transform: uppercase;
|
| 1955 |
+
letter-spacing: 0.08em;
|
| 1956 |
+
color: var(--text-muted);
|
| 1957 |
+
margin-bottom: 4px;
|
| 1958 |
+
}
|
| 1959 |
+
|
| 1960 |
+
.model-card-price-value {
|
| 1961 |
+
font-family: 'JetBrains Mono', monospace;
|
| 1962 |
+
font-size: var(--text-xs);
|
| 1963 |
+
font-weight: 700;
|
| 1964 |
+
color: var(--text-primary);
|
| 1965 |
+
}
|
| 1966 |
+
|
| 1967 |
+
/* Prevent browser autofill style overrides */
|
| 1968 |
+
input:-webkit-autofill,
|
| 1969 |
+
input:-webkit-autofill:hover,
|
| 1970 |
+
input:-webkit-autofill:focus,
|
| 1971 |
+
textarea:-webkit-autofill,
|
| 1972 |
+
textarea:-webkit-autofill:hover,
|
| 1973 |
+
textarea:-webkit-autofill:focus,
|
| 1974 |
+
select:-webkit-autofill,
|
| 1975 |
+
select:-webkit-autofill:hover,
|
| 1976 |
+
select:-webkit-autofill:focus {
|
| 1977 |
+
border: 1px solid var(--accent-cyan) !important;
|
| 1978 |
+
-webkit-text-fill-color: var(--text-primary) !important;
|
| 1979 |
+
-webkit-box-shadow: 0 0 0px 1000px var(--bg-elevated) inset !important;
|
| 1980 |
+
transition: background-color 5000s ease-in-out 0s;
|
| 1981 |
+
}
|
| 1982 |
+
|
frontend/src/types.ts
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Global types for LLMOpt frontend β aligned with backend schemas
|
| 2 |
+
|
| 3 |
+
export type BudgetMode = 'cheap' | 'balanced' | 'quality';
|
| 4 |
+
|
| 5 |
+
export type PipelineStageStatus = 'idle' | 'active' | 'complete' | 'skipped' | 'error';
|
| 6 |
+
|
| 7 |
+
export type ComplexityTier = 'trivial' | 'easy' | 'medium' | 'hard' | 'expert';
|
| 8 |
+
|
| 9 |
+
export interface PipelineStage {
|
| 10 |
+
id: string;
|
| 11 |
+
label: string;
|
| 12 |
+
icon: string;
|
| 13 |
+
status: PipelineStageStatus;
|
| 14 |
+
latencyMs?: number;
|
| 15 |
+
detail?: string;
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
export interface GenerateRequest {
|
| 19 |
+
query: string;
|
| 20 |
+
budget_mode: BudgetMode;
|
| 21 |
+
max_cost_per_request?: number;
|
| 22 |
+
quality_threshold?: number;
|
| 23 |
+
exclude_providers?: string[];
|
| 24 |
+
only_providers?: string[];
|
| 25 |
+
prefer_local?: boolean;
|
| 26 |
+
conversation_history?: { role: string; content: string }[];
|
| 27 |
+
temperature?: number;
|
| 28 |
+
api_keys?: Record<string, string>;
|
| 29 |
+
alpha?: number;
|
| 30 |
+
beta?: number;
|
| 31 |
+
gamma?: number;
|
| 32 |
+
compression_enabled?: boolean;
|
| 33 |
+
evaluate?: boolean;
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
export interface GenerateResponse {
|
| 37 |
+
response: string;
|
| 38 |
+
model_used: string;
|
| 39 |
+
provider: string;
|
| 40 |
+
input_tokens: number;
|
| 41 |
+
output_tokens: number;
|
| 42 |
+
total_tokens: number;
|
| 43 |
+
estimated_cost: number;
|
| 44 |
+
tokens_saved: number;
|
| 45 |
+
cost_saved: number;
|
| 46 |
+
compression_ratio: number;
|
| 47 |
+
complexity_score: number;
|
| 48 |
+
complexity_tier: ComplexityTier;
|
| 49 |
+
latency_ms: number;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
// The /explain endpoint returns nested objects from core.py
|
| 53 |
+
export interface ExplainResponse {
|
| 54 |
+
query: string;
|
| 55 |
+
features: {
|
| 56 |
+
token_count: number;
|
| 57 |
+
sentence_count: number;
|
| 58 |
+
primary_domain: string;
|
| 59 |
+
estimated_output_length: string;
|
| 60 |
+
domain_code: boolean;
|
| 61 |
+
domain_math: boolean;
|
| 62 |
+
domain_science: boolean;
|
| 63 |
+
domain_reasoning: boolean;
|
| 64 |
+
domain_creative: boolean;
|
| 65 |
+
multi_step: boolean;
|
| 66 |
+
requires_comparison: boolean;
|
| 67 |
+
requires_generation: boolean;
|
| 68 |
+
requires_analysis: boolean;
|
| 69 |
+
requires_debate: boolean;
|
| 70 |
+
has_math_notation: boolean;
|
| 71 |
+
has_code_block: boolean;
|
| 72 |
+
[key: string]: unknown;
|
| 73 |
+
};
|
| 74 |
+
complexity: {
|
| 75 |
+
score: number;
|
| 76 |
+
tier: ComplexityTier;
|
| 77 |
+
required_reasoning: number;
|
| 78 |
+
required_coding: number;
|
| 79 |
+
required_math: number;
|
| 80 |
+
rationale: string[];
|
| 81 |
+
estimated_input_tokens: number;
|
| 82 |
+
estimated_output_tokens: number;
|
| 83 |
+
[key: string]: unknown;
|
| 84 |
+
};
|
| 85 |
+
optimization: {
|
| 86 |
+
selected_model: string;
|
| 87 |
+
provider: string;
|
| 88 |
+
fallback_model: string | null;
|
| 89 |
+
compression_enabled: boolean;
|
| 90 |
+
system_prompt_style: string;
|
| 91 |
+
estimated_input_tokens: number;
|
| 92 |
+
estimated_output_tokens: number;
|
| 93 |
+
estimated_cost: number;
|
| 94 |
+
rationale: string[];
|
| 95 |
+
budget_mode: string;
|
| 96 |
+
[key: string]: unknown;
|
| 97 |
+
};
|
| 98 |
+
optimized_prompt: {
|
| 99 |
+
tokens_before: number;
|
| 100 |
+
tokens_after: number;
|
| 101 |
+
tokens_saved: number;
|
| 102 |
+
compression_ratio: number;
|
| 103 |
+
[key: string]: unknown;
|
| 104 |
+
};
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
export interface HistoryItem {
|
| 108 |
+
id: number;
|
| 109 |
+
query: string;
|
| 110 |
+
response: string;
|
| 111 |
+
model_used: string;
|
| 112 |
+
provider: string;
|
| 113 |
+
input_tokens: number;
|
| 114 |
+
output_tokens: number;
|
| 115 |
+
total_tokens: number;
|
| 116 |
+
estimated_cost: number;
|
| 117 |
+
tokens_saved: number;
|
| 118 |
+
cost_saved: number;
|
| 119 |
+
latency_ms: number;
|
| 120 |
+
complexity_score: number;
|
| 121 |
+
complexity_tier: ComplexityTier;
|
| 122 |
+
time_ago: string;
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
export interface DashboardStats {
|
| 126 |
+
tokens_saved: string;
|
| 127 |
+
prompts_improved: number;
|
| 128 |
+
routing_savings: string;
|
| 129 |
+
avg_boost: string;
|
| 130 |
+
distribution: Record<string, number>;
|
| 131 |
+
recent_decisions: {
|
| 132 |
+
id: string;
|
| 133 |
+
time_ago: string;
|
| 134 |
+
model: string;
|
| 135 |
+
provider: string;
|
| 136 |
+
tier: string;
|
| 137 |
+
score: number;
|
| 138 |
+
reason: string;
|
| 139 |
+
}[];
|
| 140 |
+
recent_optimizations: {
|
| 141 |
+
name: string;
|
| 142 |
+
model_used: string;
|
| 143 |
+
time_ago: string;
|
| 144 |
+
score: string;
|
| 145 |
+
tokens_saved: string;
|
| 146 |
+
}[];
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
// ModelSpec from registry β uses model_name not id
|
| 150 |
+
export interface ModelSpec {
|
| 151 |
+
model_name: string;
|
| 152 |
+
provider: string;
|
| 153 |
+
input_cost_per_1k: number;
|
| 154 |
+
output_cost_per_1k: number;
|
| 155 |
+
context_window: number;
|
| 156 |
+
reasoning_score: number;
|
| 157 |
+
coding_score: number;
|
| 158 |
+
math_score: number;
|
| 159 |
+
instruction_following_score: number;
|
| 160 |
+
latency_score: number;
|
| 161 |
+
max_complexity: number;
|
| 162 |
+
capability_score: number;
|
| 163 |
+
notes: string;
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
export interface HealthStatus {
|
| 167 |
+
redis: 'ok' | 'error' | 'unknown';
|
| 168 |
+
ml_deps: 'ok' | 'error' | 'unknown';
|
| 169 |
+
api: 'ok' | 'error';
|
| 170 |
+
}
|
frontend/src/vite-env.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
/// <reference types="vite/client" />
|
frontend/tsconfig.app.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"compilerOptions": {
|
| 3 |
+
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
|
| 4 |
+
"target": "ES2020",
|
| 5 |
+
"useDefineForClassFields": true,
|
| 6 |
+
"lib": ["ES2020", "DOM", "DOM.Iterable"],
|
| 7 |
+
"module": "ESNext",
|
| 8 |
+
"skipLibCheck": true,
|
| 9 |
+
|
| 10 |
+
/* Bundler mode */
|
| 11 |
+
"moduleResolution": "Bundler",
|
| 12 |
+
"allowImportingTsExtensions": true,
|
| 13 |
+
"isolatedModules": true,
|
| 14 |
+
"moduleDetection": "force",
|
| 15 |
+
"noEmit": true,
|
| 16 |
+
"jsx": "react-jsx",
|
| 17 |
+
|
| 18 |
+
/* Linting */
|
| 19 |
+
"strict": true,
|
| 20 |
+
"noUnusedLocals": true,
|
| 21 |
+
"noUnusedParameters": true,
|
| 22 |
+
"noFallthroughCasesInSwitch": true,
|
| 23 |
+
"noUncheckedSideEffectImports": true
|
| 24 |
+
},
|
| 25 |
+
"include": ["src"]
|
| 26 |
+
}
|
frontend/tsconfig.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"files": [],
|
| 3 |
+
"references": [
|
| 4 |
+
{ "path": "./tsconfig.app.json" },
|
| 5 |
+
{ "path": "./tsconfig.node.json" }
|
| 6 |
+
]
|
| 7 |
+
}
|
frontend/tsconfig.node.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"compilerOptions": {
|
| 3 |
+
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo",
|
| 4 |
+
"target": "ES2022",
|
| 5 |
+
"lib": ["ES2023"],
|
| 6 |
+
"module": "ESNext",
|
| 7 |
+
"skipLibCheck": true,
|
| 8 |
+
|
| 9 |
+
/* Bundler mode */
|
| 10 |
+
"moduleResolution": "Bundler",
|
| 11 |
+
"allowImportingTsExtensions": true,
|
| 12 |
+
"isolatedModules": true,
|
| 13 |
+
"moduleDetection": "force",
|
| 14 |
+
"noEmit": true,
|
| 15 |
+
|
| 16 |
+
/* Linting */
|
| 17 |
+
"strict": true,
|
| 18 |
+
"noUnusedLocals": true,
|
| 19 |
+
"noUnusedParameters": true,
|
| 20 |
+
"noFallthroughCasesInSwitch": true,
|
| 21 |
+
"noUncheckedSideEffectImports": true
|
| 22 |
+
},
|
| 23 |
+
"include": ["vite.config.ts"]
|
| 24 |
+
}
|
frontend/vite.config.ts
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { defineConfig } from 'vite'
|
| 2 |
+
import react from '@vitejs/plugin-react'
|
| 3 |
+
import path from 'path'
|
| 4 |
+
|
| 5 |
+
// https://vite.dev/config/
|
| 6 |
+
export default defineConfig({
|
| 7 |
+
plugins: [react()],
|
| 8 |
+
base: '/ui/',
|
| 9 |
+
build: {
|
| 10 |
+
outDir: '../static',
|
| 11 |
+
emptyOutDir: true,
|
| 12 |
+
chunkSizeWarningLimit: 1600,
|
| 13 |
+
rollupOptions: {
|
| 14 |
+
output: {
|
| 15 |
+
manualChunks: {
|
| 16 |
+
vendor: ['react', 'react-dom', 'react-router-dom'],
|
| 17 |
+
charts: ['recharts'],
|
| 18 |
+
motion: ['framer-motion'],
|
| 19 |
+
markdown: ['react-markdown', 'react-syntax-highlighter'],
|
| 20 |
+
},
|
| 21 |
+
},
|
| 22 |
+
},
|
| 23 |
+
},
|
| 24 |
+
resolve: {
|
| 25 |
+
alias: {
|
| 26 |
+
'@': path.resolve(__dirname, './src'),
|
| 27 |
+
},
|
| 28 |
+
},
|
| 29 |
+
server: {
|
| 30 |
+
proxy: {
|
| 31 |
+
'/generate': 'http://localhost:8000',
|
| 32 |
+
'/explain': 'http://localhost:8000',
|
| 33 |
+
'/models': 'http://localhost:8000',
|
| 34 |
+
'/health': 'http://localhost:8000',
|
| 35 |
+
'/auth': 'http://localhost:8000',
|
| 36 |
+
'/stream': 'http://localhost:8000',
|
| 37 |
+
},
|
| 38 |
+
},
|
| 39 |
+
})
|
| 40 |
+
|
llmopt/analyzer/query_analyzer.py
CHANGED
|
@@ -161,18 +161,21 @@ class QueryAnalyzer:
|
|
| 161 |
"code", "math", "science", "creative",
|
| 162 |
"reasoning", "summarization", "translation", "factual"
|
| 163 |
]
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
"
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
|
|
|
|
|
|
|
|
|
| 176 |
|
| 177 |
def analyze(self, query: str) -> QueryFeatures:
|
| 178 |
q = query.strip()
|
|
|
|
| 161 |
"code", "math", "science", "creative",
|
| 162 |
"reasoning", "summarization", "translation", "factual"
|
| 163 |
]
|
| 164 |
+
import os
|
| 165 |
+
if os.getenv("USE_ML_ANALYZER", "false").lower() == "true":
|
| 166 |
+
try:
|
| 167 |
+
from transformers import pipeline # type: ignore
|
| 168 |
+
logger.info("Loading ML Zero-Shot Classifier for Query Analyzer...")
|
| 169 |
+
self.ml_classifier = pipeline(
|
| 170 |
+
"zero-shot-classification",
|
| 171 |
+
model="cross-encoder/nli-distilroberta-base",
|
| 172 |
+
device=-1,
|
| 173 |
+
local_files_only=True
|
| 174 |
+
)
|
| 175 |
+
except ImportError:
|
| 176 |
+
logger.info("transformers not found, using V1 heuristic Query Analyzer.")
|
| 177 |
+
except Exception as e:
|
| 178 |
+
logger.warning(f"Failed to load ML classifier: {e}. Falling back to V1.")
|
| 179 |
|
| 180 |
def analyze(self, query: str) -> QueryFeatures:
|
| 181 |
q = query.strip()
|
llmopt/api/app.py
CHANGED
|
@@ -11,15 +11,31 @@ Endpoints:
|
|
| 11 |
from __future__ import annotations
|
| 12 |
|
| 13 |
import os
|
|
|
|
|
|
|
|
|
|
| 14 |
import logging
|
| 15 |
from typing import Optional, Dict
|
| 16 |
|
| 17 |
-
from fastapi import FastAPI, HTTPException
|
| 18 |
-
from fastapi.responses import StreamingResponse
|
|
|
|
|
|
|
| 19 |
from pydantic import BaseModel, Field
|
| 20 |
|
| 21 |
from llmopt.core import LLMOpt
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
logger = logging.getLogger(__name__)
|
| 24 |
|
| 25 |
# ---------------------------------------------------------------------------
|
|
@@ -32,9 +48,39 @@ app = FastAPI(
|
|
| 32 |
version="0.1.0",
|
| 33 |
)
|
| 34 |
|
| 35 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
_client = LLMOpt(log_level=os.getenv("LOG_LEVEL", "WARNING"))
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
# ---------------------------------------------------------------------------
|
| 40 |
# Request / Response schemas
|
|
@@ -57,6 +103,11 @@ class GenerateRequest(BaseModel):
|
|
| 57 |
None,
|
| 58 |
description="Optional provider API keys (e.g. {'openai': 'sk-...', 'anthropic': '...' })"
|
| 59 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
|
| 62 |
class GenerateResponse(BaseModel):
|
|
@@ -78,20 +129,469 @@ class GenerateResponse(BaseModel):
|
|
| 78 |
class ExplainRequest(BaseModel):
|
| 79 |
query: str = Field(..., min_length=1, max_length=32000)
|
| 80 |
budget_mode: str = Field("balanced")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
|
|
|
|
|
|
| 82 |
|
| 83 |
# ---------------------------------------------------------------------------
|
| 84 |
# Endpoints
|
| 85 |
# ---------------------------------------------------------------------------
|
| 86 |
|
| 87 |
-
@app.
|
| 88 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
return {
|
| 90 |
-
"
|
| 91 |
-
"
|
| 92 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
}
|
| 94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
@app.get("/health")
|
| 97 |
def health():
|
|
@@ -105,11 +605,24 @@ def list_models():
|
|
| 105 |
|
| 106 |
|
| 107 |
@app.post("/generate", response_model=GenerateResponse)
|
| 108 |
-
def generate(req: GenerateRequest):
|
| 109 |
"""
|
| 110 |
Full pipeline: analyze β optimize β route β return response + metrics.
|
| 111 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
try:
|
|
|
|
|
|
|
|
|
|
| 113 |
result = _client.generate(
|
| 114 |
query=req.query,
|
| 115 |
budget_mode=req.budget_mode,
|
|
@@ -122,31 +635,85 @@ def generate(req: GenerateRequest):
|
|
| 122 |
temperature=req.temperature,
|
| 123 |
dry_run=req.dry_run,
|
| 124 |
api_keys=req.api_keys, # Pass BYOK keys
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
return GenerateResponse(**result.to_dict())
|
| 127 |
except KeyError as e:
|
| 128 |
raise HTTPException(status_code=400, detail=f"Model not found: {e}")
|
| 129 |
except Exception as e:
|
| 130 |
logger.exception("generate() failed")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
raise HTTPException(status_code=500, detail=str(e))
|
| 132 |
|
| 133 |
|
| 134 |
@app.post("/explain")
|
| 135 |
-
def explain(req: ExplainRequest):
|
| 136 |
"""
|
| 137 |
Returns the full routing decision for a query WITHOUT making an LLM API call.
|
| 138 |
Useful for debugging, testing, and understanding optimization decisions.
|
| 139 |
"""
|
|
|
|
|
|
|
| 140 |
try:
|
| 141 |
-
return _client.explain(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
except Exception as e:
|
| 143 |
logger.exception("explain() failed")
|
| 144 |
raise HTTPException(status_code=500, detail=str(e))
|
| 145 |
|
| 146 |
|
|
|
|
| 147 |
@app.post("/stream")
|
| 148 |
-
def stream_generate(req: GenerateRequest):
|
| 149 |
"""Server-sent stream of response tokens."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
def token_generator():
|
| 151 |
try:
|
| 152 |
for chunk in _client.stream(
|
|
|
|
| 11 |
from __future__ import annotations
|
| 12 |
|
| 13 |
import os
|
| 14 |
+
from dotenv import load_dotenv
|
| 15 |
+
load_dotenv(os.path.join(os.path.dirname(__file__), "..", "..", "config", ".env"))
|
| 16 |
+
|
| 17 |
import logging
|
| 18 |
from typing import Optional, Dict
|
| 19 |
|
| 20 |
+
from fastapi import FastAPI, HTTPException, Depends, Request, Response, status
|
| 21 |
+
from fastapi.responses import StreamingResponse, RedirectResponse
|
| 22 |
+
from fastapi.staticfiles import StaticFiles
|
| 23 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 24 |
from pydantic import BaseModel, Field
|
| 25 |
|
| 26 |
from llmopt.core import LLMOpt
|
| 27 |
+
from llmopt.cache.redis_client import redis_manager
|
| 28 |
+
from llmopt.api.security import (
|
| 29 |
+
create_session,
|
| 30 |
+
delete_session,
|
| 31 |
+
get_session_payload,
|
| 32 |
+
check_rate_limit,
|
| 33 |
+
get_session_id_from_request
|
| 34 |
+
)
|
| 35 |
+
from sqlalchemy.orm import Session
|
| 36 |
+
from llmopt.db.session import engine, get_db
|
| 37 |
+
from llmopt.db import models
|
| 38 |
+
from llmopt.api import crud
|
| 39 |
logger = logging.getLogger(__name__)
|
| 40 |
|
| 41 |
# ---------------------------------------------------------------------------
|
|
|
|
| 48 |
version="0.1.0",
|
| 49 |
)
|
| 50 |
|
| 51 |
+
# Detect dev mode β disable secure cookies on localhost
|
| 52 |
+
IS_DEV = os.getenv("ENVIRONMENT", "development").lower() in ("development", "dev", "local")
|
| 53 |
+
COOKIE_SECURE = not IS_DEV # True only in production (HTTPS)
|
| 54 |
+
COOKIE_SAMESITE = "lax" if IS_DEV else "none" # lax works on HTTP localhost
|
| 55 |
+
|
| 56 |
+
# Configure CORS β allow localhost in dev, full regex in prod
|
| 57 |
+
if IS_DEV:
|
| 58 |
+
app.add_middleware(
|
| 59 |
+
CORSMiddleware,
|
| 60 |
+
allow_origins=["http://localhost:5173", "http://localhost:8000", "http://127.0.0.1:8000"],
|
| 61 |
+
allow_credentials=True,
|
| 62 |
+
allow_methods=["*"],
|
| 63 |
+
allow_headers=["*"],
|
| 64 |
+
)
|
| 65 |
+
else:
|
| 66 |
+
app.add_middleware(
|
| 67 |
+
CORSMiddleware,
|
| 68 |
+
allow_origin_regex=r"https?://.*",
|
| 69 |
+
allow_credentials=True,
|
| 70 |
+
allow_methods=["*"],
|
| 71 |
+
allow_headers=["*"],
|
| 72 |
+
)
|
| 73 |
_client = LLMOpt(log_level=os.getenv("LOG_LEVEL", "WARNING"))
|
| 74 |
|
| 75 |
+
@app.on_event("startup")
|
| 76 |
+
async def startup_event():
|
| 77 |
+
models.Base.metadata.create_all(bind=engine)
|
| 78 |
+
await redis_manager.connect()
|
| 79 |
+
|
| 80 |
+
@app.on_event("shutdown")
|
| 81 |
+
async def shutdown_event():
|
| 82 |
+
await redis_manager.close()
|
| 83 |
+
|
| 84 |
|
| 85 |
# ---------------------------------------------------------------------------
|
| 86 |
# Request / Response schemas
|
|
|
|
| 103 |
None,
|
| 104 |
description="Optional provider API keys (e.g. {'openai': 'sk-...', 'anthropic': '...' })"
|
| 105 |
)
|
| 106 |
+
alpha: Optional[float] = Field(None, description="Custom cost weight")
|
| 107 |
+
beta: Optional[float] = Field(None, description="Custom token weight")
|
| 108 |
+
gamma: Optional[float] = Field(None, description="Custom quality weight")
|
| 109 |
+
compression_enabled: Optional[bool] = Field(None, description="Force enable/disable prompt compression")
|
| 110 |
+
evaluate: bool = Field(False, description="Enable LLM-as-judge evaluation")
|
| 111 |
|
| 112 |
|
| 113 |
class GenerateResponse(BaseModel):
|
|
|
|
| 129 |
class ExplainRequest(BaseModel):
|
| 130 |
query: str = Field(..., min_length=1, max_length=32000)
|
| 131 |
budget_mode: str = Field("balanced")
|
| 132 |
+
alpha: Optional[float] = Field(None)
|
| 133 |
+
beta: Optional[float] = Field(None)
|
| 134 |
+
gamma: Optional[float] = Field(None)
|
| 135 |
+
compression_enabled: Optional[bool] = Field(None)
|
| 136 |
+
exclude_providers: list[str] = Field(default_factory=list)
|
| 137 |
+
only_providers: list[str] = Field(default_factory=list)
|
| 138 |
+
|
| 139 |
+
|
| 140 |
|
| 141 |
+
class AuthRequest(BaseModel):
|
| 142 |
+
api_keys: Dict[str, str] = Field(..., description="Provider API keys")
|
| 143 |
|
| 144 |
# ---------------------------------------------------------------------------
|
| 145 |
# Endpoints
|
| 146 |
# ---------------------------------------------------------------------------
|
| 147 |
|
| 148 |
+
@app.post("/auth/register")
|
| 149 |
+
async def register(user: crud.UserCreate, db: Session = Depends(get_db)):
|
| 150 |
+
db_user = crud.get_user_by_email(db, email=user.email)
|
| 151 |
+
if db_user:
|
| 152 |
+
raise HTTPException(status_code=400, detail="Email already registered")
|
| 153 |
+
crud.create_user(db=db, user=user)
|
| 154 |
+
return {"message": "User created successfully"}
|
| 155 |
+
|
| 156 |
+
@app.post("/auth/login")
|
| 157 |
+
async def login_user(user: crud.UserLogin, response: Response, db: Session = Depends(get_db)):
|
| 158 |
+
db_user = crud.get_user_by_email(db, email=user.email)
|
| 159 |
+
if not db_user or not crud.verify_password(user.password, db_user.hashed_password):
|
| 160 |
+
raise HTTPException(status_code=400, detail="Incorrect email or password")
|
| 161 |
+
|
| 162 |
+
api_keys = {}
|
| 163 |
+
from llmopt.api.security import decrypt_string
|
| 164 |
+
user_keys_encrypted = crud.get_user_api_keys(db, db_user.id)
|
| 165 |
+
if user_keys_encrypted:
|
| 166 |
+
try:
|
| 167 |
+
api_keys = {p: decrypt_string(k) for p, k in user_keys_encrypted.items()}
|
| 168 |
+
except Exception:
|
| 169 |
+
pass
|
| 170 |
+
|
| 171 |
+
session_id = await create_session(api_keys, user_id=db_user.id)
|
| 172 |
+
response.set_cookie(
|
| 173 |
+
key="session_id",
|
| 174 |
+
value=session_id,
|
| 175 |
+
httponly=True,
|
| 176 |
+
secure=COOKIE_SECURE,
|
| 177 |
+
samesite=COOKIE_SAMESITE,
|
| 178 |
+
max_age=int(os.getenv("SESSION_TTL", 7200))
|
| 179 |
+
)
|
| 180 |
+
return {"message": "Logged in successfully", "session_id": session_id}
|
| 181 |
+
|
| 182 |
+
@app.post("/auth/keys")
|
| 183 |
+
async def update_keys(req: AuthRequest, session_id: str = Depends(get_session_id_from_request), db: Session = Depends(get_db)):
|
| 184 |
+
"""
|
| 185 |
+
Securely store API keys in Redis and the persistent database.
|
| 186 |
+
"""
|
| 187 |
+
if not req.api_keys:
|
| 188 |
+
raise HTTPException(status_code=400, detail="No API keys provided.")
|
| 189 |
+
|
| 190 |
+
payload = await get_session_payload(session_id)
|
| 191 |
+
user_id = payload.get("user_id")
|
| 192 |
+
|
| 193 |
+
# Merge keys with existing ones in Redis session
|
| 194 |
+
current_keys = payload.get("api_keys", {})
|
| 195 |
+
updated_keys = {**current_keys, **req.api_keys}
|
| 196 |
+
|
| 197 |
+
# Update redis session in-place
|
| 198 |
+
payload["api_keys"] = updated_keys
|
| 199 |
+
from llmopt.api.security import update_session_payload
|
| 200 |
+
await update_session_payload(session_id, payload)
|
| 201 |
+
|
| 202 |
+
# Save to db if authenticated
|
| 203 |
+
if user_id:
|
| 204 |
+
from llmopt.api.security import encrypt_string
|
| 205 |
+
encrypted_keys = {p: encrypt_string(k) for p, k in req.api_keys.items()}
|
| 206 |
+
crud.update_user_api_keys(db, user_id, encrypted_keys)
|
| 207 |
+
|
| 208 |
+
return {"message": "Keys updated securely"}
|
| 209 |
+
|
| 210 |
+
@app.delete("/auth/keys/{provider}")
|
| 211 |
+
async def delete_key(provider: str, session_id: str = Depends(get_session_id_from_request), db: Session = Depends(get_db)):
|
| 212 |
+
"""
|
| 213 |
+
Delete an API key for a specific provider.
|
| 214 |
+
"""
|
| 215 |
+
provider = provider.lower()
|
| 216 |
+
payload = await get_session_payload(session_id)
|
| 217 |
+
user_id = payload.get("user_id")
|
| 218 |
+
|
| 219 |
+
current_keys = payload.get("api_keys", {})
|
| 220 |
+
if provider in current_keys:
|
| 221 |
+
del current_keys[provider]
|
| 222 |
+
|
| 223 |
+
payload["api_keys"] = current_keys
|
| 224 |
+
from llmopt.api.security import update_session_payload
|
| 225 |
+
await update_session_payload(session_id, payload)
|
| 226 |
+
|
| 227 |
+
if user_id:
|
| 228 |
+
crud.delete_user_api_key(db, user_id, provider)
|
| 229 |
+
|
| 230 |
+
return {"message": f"Key for {provider} deleted successfully"}
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
@app.get("/auth/keys")
|
| 234 |
+
async def get_keys(session_id: str = Depends(get_session_id_from_request)):
|
| 235 |
+
"""
|
| 236 |
+
Get the list of providers that have API keys configured in the current session.
|
| 237 |
+
"""
|
| 238 |
+
payload = await get_session_payload(session_id)
|
| 239 |
+
api_keys = payload.get("api_keys", {})
|
| 240 |
+
connected = [provider for provider, key in api_keys.items() if key]
|
| 241 |
+
return {"connected_providers": connected}
|
| 242 |
+
|
| 243 |
+
# OAuth configuration
|
| 244 |
+
GOOGLE_CLIENT_ID = os.getenv("GOOGLE_CLIENT_ID")
|
| 245 |
+
GOOGLE_CLIENT_SECRET = os.getenv("GOOGLE_CLIENT_SECRET")
|
| 246 |
+
GITHUB_CLIENT_ID = os.getenv("GITHUB_CLIENT_ID")
|
| 247 |
+
GITHUB_CLIENT_SECRET = os.getenv("GITHUB_CLIENT_SECRET")
|
| 248 |
+
REDIRECT_URI_HOST = os.getenv("REDIRECT_URI_HOST", "http://localhost:8000")
|
| 249 |
+
|
| 250 |
+
@app.get("/auth/login/google")
|
| 251 |
+
def login_google():
|
| 252 |
+
if not GOOGLE_CLIENT_ID:
|
| 253 |
+
raise HTTPException(status_code=400, detail="Google Auth is not configured. Please set GOOGLE_CLIENT_ID env variable.")
|
| 254 |
+
redirect_uri = f"{REDIRECT_URI_HOST}/auth/callback/google"
|
| 255 |
+
auth_url = (
|
| 256 |
+
"https://accounts.google.com/o/oauth2/v2/auth"
|
| 257 |
+
f"?response_type=code"
|
| 258 |
+
f"&client_id={GOOGLE_CLIENT_ID}"
|
| 259 |
+
f"&redirect_uri={redirect_uri}"
|
| 260 |
+
f"&scope=openid%20email%20profile"
|
| 261 |
+
f"&state=google_auth_state"
|
| 262 |
+
)
|
| 263 |
+
return RedirectResponse(url=auth_url)
|
| 264 |
+
|
| 265 |
+
@app.get("/auth/callback/google")
|
| 266 |
+
async def callback_google(code: str, response: Response, db: Session = Depends(get_db)):
|
| 267 |
+
if not GOOGLE_CLIENT_ID or not GOOGLE_CLIENT_SECRET:
|
| 268 |
+
raise HTTPException(status_code=400, detail="Google Auth credentials missing.")
|
| 269 |
+
|
| 270 |
+
redirect_uri = f"{REDIRECT_URI_HOST}/auth/callback/google"
|
| 271 |
+
token_url = "https://oauth2.googleapis.com/token"
|
| 272 |
+
data = {
|
| 273 |
+
"code": code,
|
| 274 |
+
"client_id": GOOGLE_CLIENT_ID,
|
| 275 |
+
"client_secret": GOOGLE_CLIENT_SECRET,
|
| 276 |
+
"redirect_uri": redirect_uri,
|
| 277 |
+
"grant_type": "authorization_code",
|
| 278 |
+
}
|
| 279 |
+
|
| 280 |
+
import urllib.request
|
| 281 |
+
import urllib.parse
|
| 282 |
+
import json
|
| 283 |
+
|
| 284 |
+
try:
|
| 285 |
+
req_data = urllib.parse.urlencode(data).encode("utf-8")
|
| 286 |
+
req = urllib.request.Request(token_url, data=req_data, method="POST")
|
| 287 |
+
with urllib.request.urlopen(req) as r:
|
| 288 |
+
token_res = json.loads(r.read().decode("utf-8"))
|
| 289 |
+
|
| 290 |
+
access_token = token_res.get("access_token")
|
| 291 |
+
if not access_token:
|
| 292 |
+
raise HTTPException(status_code=400, detail="Failed to retrieve access token from Google.")
|
| 293 |
+
|
| 294 |
+
userinfo_url = "https://www.googleapis.com/oauth2/v3/userinfo"
|
| 295 |
+
req_user = urllib.request.Request(
|
| 296 |
+
userinfo_url,
|
| 297 |
+
headers={"Authorization": f"Bearer {access_token}"}
|
| 298 |
+
)
|
| 299 |
+
with urllib.request.urlopen(req_user) as r_user:
|
| 300 |
+
user_info = json.loads(r_user.read().decode("utf-8"))
|
| 301 |
+
|
| 302 |
+
email = user_info.get("email")
|
| 303 |
+
if not email:
|
| 304 |
+
raise HTTPException(status_code=400, detail="Google account has no email associated.")
|
| 305 |
+
|
| 306 |
+
db_user = crud.get_user_by_email(db, email=email)
|
| 307 |
+
if not db_user:
|
| 308 |
+
import secrets
|
| 309 |
+
random_pw = secrets.token_hex(16)
|
| 310 |
+
user_in = crud.UserCreate(email=email, password=random_pw)
|
| 311 |
+
db_user = crud.create_user(db, user_in)
|
| 312 |
+
|
| 313 |
+
api_keys = {}
|
| 314 |
+
from llmopt.api.security import decrypt_string
|
| 315 |
+
user_keys_encrypted = crud.get_user_api_keys(db, db_user.id)
|
| 316 |
+
if user_keys_encrypted:
|
| 317 |
+
try:
|
| 318 |
+
api_keys = {p: decrypt_string(k) for p, k in user_keys_encrypted.items()}
|
| 319 |
+
except Exception:
|
| 320 |
+
pass
|
| 321 |
+
|
| 322 |
+
session_id = await create_session(api_keys, user_id=db_user.id)
|
| 323 |
+
redirect_url = f"{REDIRECT_URI_HOST}/ui/workspace.html#api"
|
| 324 |
+
res = RedirectResponse(url=redirect_url)
|
| 325 |
+
res.set_cookie(
|
| 326 |
+
key="session_id",
|
| 327 |
+
value=session_id,
|
| 328 |
+
httponly=True,
|
| 329 |
+
secure=COOKIE_SECURE,
|
| 330 |
+
samesite=COOKIE_SAMESITE,
|
| 331 |
+
max_age=int(os.getenv("SESSION_TTL", 7200))
|
| 332 |
+
)
|
| 333 |
+
return res
|
| 334 |
+
|
| 335 |
+
except Exception as e:
|
| 336 |
+
logger.error(f"Google OAuth failed: {e}")
|
| 337 |
+
raise HTTPException(status_code=500, detail=f"Google OAuth failed: {str(e)}")
|
| 338 |
+
|
| 339 |
+
@app.get("/auth/login/github")
|
| 340 |
+
def login_github():
|
| 341 |
+
if not GITHUB_CLIENT_ID:
|
| 342 |
+
raise HTTPException(status_code=400, detail="GitHub Auth is not configured. Please set GITHUB_CLIENT_ID env variable.")
|
| 343 |
+
redirect_uri = f"{REDIRECT_URI_HOST}/auth/callback/github"
|
| 344 |
+
auth_url = (
|
| 345 |
+
"https://github.com/login/oauth/authorize"
|
| 346 |
+
f"?client_id={GITHUB_CLIENT_ID}"
|
| 347 |
+
f"&redirect_uri={redirect_uri}"
|
| 348 |
+
f"&scope=user:email"
|
| 349 |
+
f"&state=github_auth_state"
|
| 350 |
+
)
|
| 351 |
+
return RedirectResponse(url=auth_url)
|
| 352 |
+
|
| 353 |
+
@app.get("/auth/callback/github")
|
| 354 |
+
async def callback_github(code: str, response: Response, db: Session = Depends(get_db)):
|
| 355 |
+
if not GITHUB_CLIENT_ID or not GITHUB_CLIENT_SECRET:
|
| 356 |
+
raise HTTPException(status_code=400, detail="GitHub Auth credentials missing.")
|
| 357 |
+
|
| 358 |
+
redirect_uri = f"{REDIRECT_URI_HOST}/auth/callback/github"
|
| 359 |
+
token_url = "https://github.com/login/oauth/access_token"
|
| 360 |
+
data = {
|
| 361 |
+
"code": code,
|
| 362 |
+
"client_id": GITHUB_CLIENT_ID,
|
| 363 |
+
"client_secret": GITHUB_CLIENT_SECRET,
|
| 364 |
+
"redirect_uri": redirect_uri,
|
| 365 |
+
}
|
| 366 |
+
|
| 367 |
+
import urllib.request
|
| 368 |
+
import urllib.parse
|
| 369 |
+
import json
|
| 370 |
+
|
| 371 |
+
try:
|
| 372 |
+
req_data = urllib.parse.urlencode(data).encode("utf-8")
|
| 373 |
+
req = urllib.request.Request(
|
| 374 |
+
token_url,
|
| 375 |
+
data=req_data,
|
| 376 |
+
headers={"Accept": "application/json"},
|
| 377 |
+
method="POST"
|
| 378 |
+
)
|
| 379 |
+
with urllib.request.urlopen(req) as r:
|
| 380 |
+
token_res = json.loads(r.read().decode("utf-8"))
|
| 381 |
+
|
| 382 |
+
access_token = token_res.get("access_token")
|
| 383 |
+
if not access_token:
|
| 384 |
+
raise HTTPException(status_code=400, detail="Failed to retrieve access token from GitHub.")
|
| 385 |
+
|
| 386 |
+
email_url = "https://api.github.com/user/emails"
|
| 387 |
+
req_email = urllib.request.Request(
|
| 388 |
+
email_url,
|
| 389 |
+
headers={
|
| 390 |
+
"Authorization": f"token {access_token}",
|
| 391 |
+
"User-Agent": "LLMOpt-Server"
|
| 392 |
+
}
|
| 393 |
+
)
|
| 394 |
+
with urllib.request.urlopen(req_email) as r_email:
|
| 395 |
+
emails = json.loads(r_email.read().decode("utf-8"))
|
| 396 |
+
|
| 397 |
+
email = None
|
| 398 |
+
for email_info in emails:
|
| 399 |
+
if email_info.get("primary"):
|
| 400 |
+
email = email_info.get("email")
|
| 401 |
+
break
|
| 402 |
+
if not email and emails:
|
| 403 |
+
email = emails[0].get("email")
|
| 404 |
+
|
| 405 |
+
if not email:
|
| 406 |
+
raise HTTPException(status_code=400, detail="GitHub account has no email associated.")
|
| 407 |
+
|
| 408 |
+
db_user = crud.get_user_by_email(db, email=email)
|
| 409 |
+
if not db_user:
|
| 410 |
+
import secrets
|
| 411 |
+
random_pw = secrets.token_hex(16)
|
| 412 |
+
user_in = crud.UserCreate(email=email, password=random_pw)
|
| 413 |
+
db_user = crud.create_user(db, user_in)
|
| 414 |
+
|
| 415 |
+
api_keys = {}
|
| 416 |
+
from llmopt.api.security import decrypt_string
|
| 417 |
+
user_keys_encrypted = crud.get_user_api_keys(db, db_user.id)
|
| 418 |
+
if user_keys_encrypted:
|
| 419 |
+
try:
|
| 420 |
+
api_keys = {p: decrypt_string(k) for p, k in user_keys_encrypted.items()}
|
| 421 |
+
except Exception:
|
| 422 |
+
pass
|
| 423 |
+
|
| 424 |
+
session_id = await create_session(api_keys, user_id=db_user.id)
|
| 425 |
+
redirect_url = f"{REDIRECT_URI_HOST}/ui/workspace.html#api"
|
| 426 |
+
res = RedirectResponse(url=redirect_url)
|
| 427 |
+
res.set_cookie(
|
| 428 |
+
key="session_id",
|
| 429 |
+
value=session_id,
|
| 430 |
+
httponly=True,
|
| 431 |
+
secure=COOKIE_SECURE,
|
| 432 |
+
samesite=COOKIE_SAMESITE,
|
| 433 |
+
max_age=int(os.getenv("SESSION_TTL", 7200))
|
| 434 |
+
)
|
| 435 |
+
return res
|
| 436 |
+
|
| 437 |
+
except Exception as e:
|
| 438 |
+
logger.error(f"GitHub OAuth failed: {e}")
|
| 439 |
+
raise HTTPException(status_code=500, detail=f"GitHub OAuth failed: {str(e)}")
|
| 440 |
+
|
| 441 |
+
@app.get("/auth/dashboard-stats")
|
| 442 |
+
async def get_dashboard_stats(session_id: str = Depends(get_session_id_from_request), db: Session = Depends(get_db)):
|
| 443 |
+
"""Calculate dashboard statistics dynamically from the generation logs in the database."""
|
| 444 |
+
session_payload = await get_session_payload(session_id)
|
| 445 |
+
user_id = session_payload.get("user_id")
|
| 446 |
+
|
| 447 |
+
logs = db.query(models.GenerationLog).filter(models.GenerationLog.user_id == user_id).order_by(models.GenerationLog.created_at.desc()).all()
|
| 448 |
+
|
| 449 |
+
prompts_improved = len(logs)
|
| 450 |
+
|
| 451 |
+
total_tokens_saved = sum(log.tokens_saved for log in logs if log.tokens_saved)
|
| 452 |
+
total_cost_saved = sum(log.cost_saved for log in logs if log.cost_saved)
|
| 453 |
+
|
| 454 |
+
distribution = {}
|
| 455 |
+
total_with_provider = 0
|
| 456 |
+
for log in logs:
|
| 457 |
+
if log.provider:
|
| 458 |
+
provider = log.provider.lower()
|
| 459 |
+
distribution[provider] = distribution.get(provider, 0) + 1
|
| 460 |
+
total_with_provider += 1
|
| 461 |
+
|
| 462 |
+
distribution_percentages = {}
|
| 463 |
+
if total_with_provider > 0:
|
| 464 |
+
for provider, count in distribution.items():
|
| 465 |
+
distribution_percentages[provider] = round((count / total_with_provider) * 100, 1)
|
| 466 |
+
|
| 467 |
+
recent_decisions = []
|
| 468 |
+
from datetime import datetime
|
| 469 |
+
for log in logs[:5]:
|
| 470 |
+
time_diff = datetime.utcnow() - log.created_at
|
| 471 |
+
if time_diff.days > 0:
|
| 472 |
+
time_str = f"{time_diff.days}d ago"
|
| 473 |
+
elif time_diff.seconds // 3600 > 0:
|
| 474 |
+
time_str = f"{time_diff.seconds // 3600}h ago"
|
| 475 |
+
else:
|
| 476 |
+
time_str = f"{(time_diff.seconds % 3600) // 60}m ago"
|
| 477 |
+
if time_str == "0m ago":
|
| 478 |
+
time_str = "just now"
|
| 479 |
+
|
| 480 |
+
recent_decisions.append({
|
| 481 |
+
"id": f"PROMPT_{log.id}",
|
| 482 |
+
"time_ago": time_str,
|
| 483 |
+
"model": log.model_used,
|
| 484 |
+
"provider": log.provider,
|
| 485 |
+
"tier": log.complexity_tier or "standard",
|
| 486 |
+
"score": round((log.complexity_score or 0.72) * 100, 1),
|
| 487 |
+
"reason": f"Routed based on {log.complexity_tier or 'standard'} tier (complexity score: {round((log.complexity_score or 0.72)*100)}/100)."
|
| 488 |
+
})
|
| 489 |
+
|
| 490 |
+
recent_optimizations = []
|
| 491 |
+
for log in logs[:3]:
|
| 492 |
+
time_diff = datetime.utcnow() - log.created_at
|
| 493 |
+
if time_diff.days > 0:
|
| 494 |
+
time_str = f"{time_diff.days}d ago"
|
| 495 |
+
elif time_diff.seconds // 3600 > 0:
|
| 496 |
+
time_str = f"{time_diff.seconds // 3600}h ago"
|
| 497 |
+
else:
|
| 498 |
+
time_str = f"{(time_diff.seconds % 3600) // 60}m ago"
|
| 499 |
+
if time_str == "0m ago":
|
| 500 |
+
time_str = "just now"
|
| 501 |
+
|
| 502 |
+
recent_optimizations.append({
|
| 503 |
+
"name": log.query[:40] + ("..." if len(log.query) > 40 else ""),
|
| 504 |
+
"model_used": log.model_used,
|
| 505 |
+
"time_ago": time_str,
|
| 506 |
+
"score": f"{round((log.complexity_score or 0.72) * 100, 1)}%",
|
| 507 |
+
"tokens_saved": f"-{log.tokens_saved or 0} tokens/avg"
|
| 508 |
+
})
|
| 509 |
+
|
| 510 |
+
avg_complexity = 0.0
|
| 511 |
+
valid_scores = [log.complexity_score for log in logs if log.complexity_score is not None]
|
| 512 |
+
if valid_scores:
|
| 513 |
+
avg_complexity = sum(valid_scores) / len(valid_scores)
|
| 514 |
+
avg_boost = f"+{round(avg_complexity * 30, 1)}%" if avg_complexity > 0 else "0%"
|
| 515 |
+
|
| 516 |
return {
|
| 517 |
+
"tokens_saved": f"{total_tokens_saved:,}" if total_tokens_saved > 0 else "0",
|
| 518 |
+
"prompts_improved": prompts_improved,
|
| 519 |
+
"routing_savings": f"${total_cost_saved:,.2f}" if total_cost_saved > 0 else "$0.00",
|
| 520 |
+
"avg_boost": avg_boost,
|
| 521 |
+
"distribution": distribution_percentages,
|
| 522 |
+
"recent_decisions": recent_decisions,
|
| 523 |
+
"recent_optimizations": recent_optimizations,
|
| 524 |
+
"running_workflows": 0,
|
| 525 |
+
"queued_workflows": 0
|
| 526 |
}
|
| 527 |
|
| 528 |
+
@app.get("/auth/history")
|
| 529 |
+
async def get_history(session_id: str = Depends(get_session_id_from_request), db: Session = Depends(get_db)):
|
| 530 |
+
"""Fetch the list of recent generation logs for the authenticated user."""
|
| 531 |
+
session_payload = await get_session_payload(session_id)
|
| 532 |
+
user_id = session_payload.get("user_id")
|
| 533 |
+
if not user_id:
|
| 534 |
+
raise HTTPException(status_code=401, detail="Not authenticated")
|
| 535 |
+
|
| 536 |
+
logs = db.query(models.GenerationLog).filter(
|
| 537 |
+
models.GenerationLog.user_id == user_id
|
| 538 |
+
).order_by(models.GenerationLog.created_at.desc()).limit(20).all()
|
| 539 |
+
|
| 540 |
+
# Calculate time-ago strings for frontend
|
| 541 |
+
from datetime import datetime
|
| 542 |
+
history_items = []
|
| 543 |
+
for log in logs:
|
| 544 |
+
time_diff = datetime.utcnow() - log.created_at
|
| 545 |
+
if time_diff.days > 0:
|
| 546 |
+
time_str = f"{time_diff.days}d ago"
|
| 547 |
+
elif time_diff.seconds // 3600 > 0:
|
| 548 |
+
time_str = f"{time_diff.seconds // 3600}h ago"
|
| 549 |
+
else:
|
| 550 |
+
time_str = f"{(time_diff.seconds % 3600) // 60}m ago"
|
| 551 |
+
if time_str == "0m ago":
|
| 552 |
+
time_str = "just now"
|
| 553 |
+
|
| 554 |
+
history_items.append({
|
| 555 |
+
"id": log.id,
|
| 556 |
+
"query": log.query,
|
| 557 |
+
"response": log.response,
|
| 558 |
+
"model_used": log.model_used,
|
| 559 |
+
"provider": log.provider,
|
| 560 |
+
"input_tokens": log.input_tokens,
|
| 561 |
+
"output_tokens": log.output_tokens,
|
| 562 |
+
"total_tokens": log.total_tokens,
|
| 563 |
+
"estimated_cost": log.estimated_cost,
|
| 564 |
+
"tokens_saved": log.tokens_saved,
|
| 565 |
+
"cost_saved": log.cost_saved,
|
| 566 |
+
"latency_ms": log.latency_ms,
|
| 567 |
+
"complexity_score": log.complexity_score,
|
| 568 |
+
"complexity_tier": log.complexity_tier,
|
| 569 |
+
"time_ago": time_str
|
| 570 |
+
})
|
| 571 |
+
return history_items
|
| 572 |
+
|
| 573 |
+
@app.post("/auth/logout")
|
| 574 |
+
async def logout(response: Response, session_id: str = Depends(get_session_id_from_request)):
|
| 575 |
+
"""Clear the session from Redis and remove the cookie."""
|
| 576 |
+
await delete_session(session_id)
|
| 577 |
+
response.delete_cookie("session_id", samesite=COOKIE_SAMESITE, secure=COOKIE_SECURE)
|
| 578 |
+
return {"message": "Logged out"}
|
| 579 |
+
|
| 580 |
+
|
| 581 |
+
@app.get("/")
|
| 582 |
+
def root():
|
| 583 |
+
if IS_DEV:
|
| 584 |
+
return RedirectResponse(url="http://localhost:5173/ui/")
|
| 585 |
+
return RedirectResponse(url="/ui/")
|
| 586 |
+
|
| 587 |
+
if not IS_DEV:
|
| 588 |
+
app.mount("/ui", StaticFiles(directory="static", html=True), name="static")
|
| 589 |
+
else:
|
| 590 |
+
@app.get("/ui")
|
| 591 |
+
@app.get("/ui/{path:path}")
|
| 592 |
+
def redirect_to_vite(path: str = ""):
|
| 593 |
+
return RedirectResponse(url=f"http://localhost:5173/ui/{path}")
|
| 594 |
+
|
| 595 |
|
| 596 |
@app.get("/health")
|
| 597 |
def health():
|
|
|
|
| 605 |
|
| 606 |
|
| 607 |
@app.post("/generate", response_model=GenerateResponse)
|
| 608 |
+
async def generate(req: GenerateRequest, session_id: str = Depends(get_session_id_from_request), db: Session = Depends(get_db)):
|
| 609 |
"""
|
| 610 |
Full pipeline: analyze β optimize β route β return response + metrics.
|
| 611 |
"""
|
| 612 |
+
await check_rate_limit(session_id)
|
| 613 |
+
|
| 614 |
+
# Override req.api_keys with the ones securely stored in the session
|
| 615 |
+
session_payload = await get_session_payload(session_id)
|
| 616 |
+
session_keys = session_payload.get("api_keys", {})
|
| 617 |
+
user_id = session_payload.get("user_id")
|
| 618 |
+
if not req.api_keys:
|
| 619 |
+
req.api_keys = {}
|
| 620 |
+
req.api_keys.update(session_keys)
|
| 621 |
+
|
| 622 |
try:
|
| 623 |
+
# LLMOpt core relies on synchronous execution right now (litellm async is separate)
|
| 624 |
+
# Assuming _client.generate is synchronous, we run it normally
|
| 625 |
+
# In a high-concurrency async app, we might want run_in_threadpool
|
| 626 |
result = _client.generate(
|
| 627 |
query=req.query,
|
| 628 |
budget_mode=req.budget_mode,
|
|
|
|
| 635 |
temperature=req.temperature,
|
| 636 |
dry_run=req.dry_run,
|
| 637 |
api_keys=req.api_keys, # Pass BYOK keys
|
| 638 |
+
alpha=req.alpha,
|
| 639 |
+
beta=req.beta,
|
| 640 |
+
gamma=req.gamma,
|
| 641 |
+
compression_enabled=req.compression_enabled,
|
| 642 |
+
evaluate=req.evaluate,
|
| 643 |
)
|
| 644 |
+
|
| 645 |
+
# Save generation log to database
|
| 646 |
+
try:
|
| 647 |
+
log_entry = models.GenerationLog(
|
| 648 |
+
user_id=user_id,
|
| 649 |
+
query=req.query,
|
| 650 |
+
response=result.response,
|
| 651 |
+
model_used=result.model_used,
|
| 652 |
+
provider=result.provider,
|
| 653 |
+
input_tokens=result.input_tokens,
|
| 654 |
+
output_tokens=result.output_tokens,
|
| 655 |
+
total_tokens=result.total_tokens,
|
| 656 |
+
estimated_cost=result.estimated_cost,
|
| 657 |
+
tokens_saved=result.tokens_saved,
|
| 658 |
+
cost_saved=result.cost_saved,
|
| 659 |
+
latency_ms=result.latency_ms,
|
| 660 |
+
complexity_score=result.complexity.score,
|
| 661 |
+
complexity_tier=result.complexity.tier
|
| 662 |
+
)
|
| 663 |
+
db.add(log_entry)
|
| 664 |
+
db.commit()
|
| 665 |
+
except Exception as log_err:
|
| 666 |
+
logger.error(f"Failed to save generation log: {log_err}")
|
| 667 |
+
|
| 668 |
return GenerateResponse(**result.to_dict())
|
| 669 |
except KeyError as e:
|
| 670 |
raise HTTPException(status_code=400, detail=f"Model not found: {e}")
|
| 671 |
except Exception as e:
|
| 672 |
logger.exception("generate() failed")
|
| 673 |
+
error_msg = str(e).lower()
|
| 674 |
+
if "authentication" in error_msg or "unauthorized" in error_msg or "invalid api key" in error_msg or "401" in error_msg:
|
| 675 |
+
raise HTTPException(status_code=401, detail="API is expired or token limit ended")
|
| 676 |
+
elif "rate limit" in error_msg or "429" in error_msg:
|
| 677 |
+
raise HTTPException(status_code=429, detail="API is expired or token limit ended")
|
| 678 |
raise HTTPException(status_code=500, detail=str(e))
|
| 679 |
|
| 680 |
|
| 681 |
@app.post("/explain")
|
| 682 |
+
async def explain(req: ExplainRequest, session_id: str = Depends(get_session_id_from_request)):
|
| 683 |
"""
|
| 684 |
Returns the full routing decision for a query WITHOUT making an LLM API call.
|
| 685 |
Useful for debugging, testing, and understanding optimization decisions.
|
| 686 |
"""
|
| 687 |
+
session_payload = await get_session_payload(session_id)
|
| 688 |
+
session_keys = session_payload.get("api_keys", {})
|
| 689 |
try:
|
| 690 |
+
return _client.explain(
|
| 691 |
+
query=req.query,
|
| 692 |
+
budget_mode=req.budget_mode,
|
| 693 |
+
alpha=req.alpha,
|
| 694 |
+
beta=req.beta,
|
| 695 |
+
gamma=req.gamma,
|
| 696 |
+
compression_enabled=req.compression_enabled,
|
| 697 |
+
exclude_providers=req.exclude_providers,
|
| 698 |
+
only_providers=req.only_providers,
|
| 699 |
+
api_keys=session_keys,
|
| 700 |
+
)
|
| 701 |
except Exception as e:
|
| 702 |
logger.exception("explain() failed")
|
| 703 |
raise HTTPException(status_code=500, detail=str(e))
|
| 704 |
|
| 705 |
|
| 706 |
+
|
| 707 |
@app.post("/stream")
|
| 708 |
+
async def stream_generate(req: GenerateRequest, session_id: str = Depends(get_session_id_from_request)):
|
| 709 |
"""Server-sent stream of response tokens."""
|
| 710 |
+
await check_rate_limit(session_id)
|
| 711 |
+
session_payload = await get_session_payload(session_id)
|
| 712 |
+
session_keys = session_payload.get("api_keys", {})
|
| 713 |
+
if not req.api_keys:
|
| 714 |
+
req.api_keys = {}
|
| 715 |
+
req.api_keys.update(session_keys)
|
| 716 |
+
|
| 717 |
def token_generator():
|
| 718 |
try:
|
| 719 |
for chunk in _client.stream(
|
llmopt/api/crud.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sqlalchemy.orm import Session
|
| 2 |
+
from passlib.context import CryptContext
|
| 3 |
+
from llmopt.db import models
|
| 4 |
+
from pydantic import BaseModel
|
| 5 |
+
|
| 6 |
+
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
|
| 7 |
+
|
| 8 |
+
class UserCreate(BaseModel):
|
| 9 |
+
email: str
|
| 10 |
+
password: str
|
| 11 |
+
|
| 12 |
+
class UserLogin(BaseModel):
|
| 13 |
+
email: str
|
| 14 |
+
password: str
|
| 15 |
+
|
| 16 |
+
def get_password_hash(password):
|
| 17 |
+
return pwd_context.hash(password)
|
| 18 |
+
|
| 19 |
+
def verify_password(plain_password, hashed_password):
|
| 20 |
+
return pwd_context.verify(plain_password, hashed_password)
|
| 21 |
+
|
| 22 |
+
def get_user_by_email(db: Session, email: str):
|
| 23 |
+
return db.query(models.User).filter(models.User.email == email).first()
|
| 24 |
+
|
| 25 |
+
def create_user(db: Session, user: UserCreate):
|
| 26 |
+
hashed_password = get_password_hash(user.password)
|
| 27 |
+
db_user = models.User(email=user.email, hashed_password=hashed_password)
|
| 28 |
+
db.add(db_user)
|
| 29 |
+
db.commit()
|
| 30 |
+
db.refresh(db_user)
|
| 31 |
+
return db_user
|
| 32 |
+
|
| 33 |
+
def get_user_by_id(db: Session, user_id: int):
|
| 34 |
+
return db.query(models.User).filter(models.User.id == user_id).first()
|
| 35 |
+
|
| 36 |
+
def update_user_api_keys(db: Session, user_id: int, provider_keys: dict):
|
| 37 |
+
for provider, encrypted_key in provider_keys.items():
|
| 38 |
+
existing = db.query(models.UserAPIKey).filter(
|
| 39 |
+
models.UserAPIKey.user_id == user_id,
|
| 40 |
+
models.UserAPIKey.provider == provider
|
| 41 |
+
).first()
|
| 42 |
+
if existing:
|
| 43 |
+
existing.encrypted_key = encrypted_key
|
| 44 |
+
else:
|
| 45 |
+
api_key_record = models.UserAPIKey(user_id=user_id, provider=provider, encrypted_key=encrypted_key)
|
| 46 |
+
db.add(api_key_record)
|
| 47 |
+
db.commit()
|
| 48 |
+
|
| 49 |
+
def get_user_api_keys(db: Session, user_id: int) -> dict:
|
| 50 |
+
records = db.query(models.UserAPIKey).filter(models.UserAPIKey.user_id == user_id).all()
|
| 51 |
+
return {r.provider: r.encrypted_key for r in records}
|
| 52 |
+
|
| 53 |
+
def delete_user_api_key(db: Session, user_id: int, provider: str) -> None:
|
| 54 |
+
db.query(models.UserAPIKey).filter(
|
| 55 |
+
models.UserAPIKey.user_id == user_id,
|
| 56 |
+
models.UserAPIKey.provider == provider
|
| 57 |
+
).delete()
|
| 58 |
+
db.commit()
|
| 59 |
+
|
llmopt/api/security.py
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import uuid
|
| 4 |
+
import logging
|
| 5 |
+
from typing import Dict, Optional
|
| 6 |
+
from cryptography.fernet import Fernet
|
| 7 |
+
from fastapi import Request, HTTPException, status
|
| 8 |
+
from llmopt.cache.redis_client import get_redis
|
| 9 |
+
import jwt
|
| 10 |
+
import datetime
|
| 11 |
+
from llmopt.db.session import SessionLocal
|
| 12 |
+
from llmopt.api import crud
|
| 13 |
+
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
|
| 16 |
+
# Master key for encrypting user API keys in Redis
|
| 17 |
+
# In production, this MUST be set via environment variable.
|
| 18 |
+
_SECRET_KEY = os.getenv("SESSION_SECRET_KEY")
|
| 19 |
+
if not _SECRET_KEY:
|
| 20 |
+
logger.warning("SESSION_SECRET_KEY not set. Generating a temporary one for this process.")
|
| 21 |
+
_SECRET_KEY = Fernet.generate_key().decode("utf-8")
|
| 22 |
+
|
| 23 |
+
fernet = Fernet(_SECRET_KEY.encode("utf-8"))
|
| 24 |
+
|
| 25 |
+
# Default session TTL: 2 hours
|
| 26 |
+
SESSION_TTL = int(os.getenv("SESSION_TTL", 7200))
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def encrypt_payload(payload: dict) -> str:
|
| 30 |
+
"""Encrypts the dictionary payload into a secure string."""
|
| 31 |
+
json_data = json.dumps(payload)
|
| 32 |
+
return fernet.encrypt(json_data.encode("utf-8")).decode("utf-8")
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def decrypt_payload(encrypted_data: str) -> dict:
|
| 36 |
+
"""Decrypts the secure string back into a dictionary."""
|
| 37 |
+
json_data = fernet.decrypt(encrypted_data.encode("utf-8")).decode("utf-8")
|
| 38 |
+
return json.loads(json_data)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def encrypt_string(data: str) -> str:
|
| 42 |
+
return fernet.encrypt(data.encode("utf-8")).decode("utf-8")
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def decrypt_string(encrypted_data: str) -> str:
|
| 46 |
+
return fernet.decrypt(encrypted_data.encode("utf-8")).decode("utf-8")
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
async def create_session(api_keys: Dict[str, str], user_id: Optional[int] = None) -> str:
|
| 50 |
+
"""Stores encrypted API keys and user_id in Redis and returns a session ID (JWT)."""
|
| 51 |
+
redis = await get_redis()
|
| 52 |
+
if not redis:
|
| 53 |
+
raise HTTPException(
|
| 54 |
+
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
| 55 |
+
detail="Redis cache is unavailable. Cannot create session."
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
# Generate JWT for session_id
|
| 59 |
+
jti = str(uuid.uuid4())
|
| 60 |
+
jwt_payload = {"jti": jti}
|
| 61 |
+
if user_id is not None:
|
| 62 |
+
jwt_payload["user_id"] = user_id
|
| 63 |
+
|
| 64 |
+
session_id = jwt.encode(jwt_payload, _SECRET_KEY, algorithm="HS256")
|
| 65 |
+
|
| 66 |
+
payload = {"api_keys": api_keys, "user_id": user_id}
|
| 67 |
+
encrypted_payload = encrypt_payload(payload)
|
| 68 |
+
|
| 69 |
+
# Store with TTL
|
| 70 |
+
await redis.setex(f"session:{session_id}", SESSION_TTL, encrypted_payload)
|
| 71 |
+
return session_id
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
async def update_session_payload(session_id: str, payload: dict) -> None:
|
| 75 |
+
"""Updates the encrypted session payload in Redis under the existing session ID."""
|
| 76 |
+
redis = await get_redis()
|
| 77 |
+
if not redis:
|
| 78 |
+
raise HTTPException(
|
| 79 |
+
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
| 80 |
+
detail="Redis cache is unavailable. Cannot update session."
|
| 81 |
+
)
|
| 82 |
+
encrypted_payload = encrypt_payload(payload)
|
| 83 |
+
await redis.setex(f"session:{session_id}", SESSION_TTL, encrypted_payload)
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
async def get_session_payload(session_id: str) -> dict:
|
| 87 |
+
"""Retrieves and decrypts the payload from Redis. On cache miss, restores from DB using JWT."""
|
| 88 |
+
redis = await get_redis()
|
| 89 |
+
if not redis:
|
| 90 |
+
raise HTTPException(
|
| 91 |
+
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
| 92 |
+
detail="Redis cache is unavailable."
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
encrypted_keys = await redis.get(f"session:{session_id}")
|
| 96 |
+
|
| 97 |
+
# Cache hit
|
| 98 |
+
if encrypted_keys:
|
| 99 |
+
await redis.expire(f"session:{session_id}", SESSION_TTL)
|
| 100 |
+
try:
|
| 101 |
+
return decrypt_payload(encrypted_keys)
|
| 102 |
+
except Exception as e:
|
| 103 |
+
logger.error(f"Failed to decrypt session keys: {e}")
|
| 104 |
+
raise HTTPException(
|
| 105 |
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
| 106 |
+
detail="Failed to decrypt session."
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
# Cache miss: attempt to decode JWT and recover from database
|
| 110 |
+
try:
|
| 111 |
+
jwt_payload = jwt.decode(session_id, _SECRET_KEY, algorithms=["HS256"])
|
| 112 |
+
user_id = jwt_payload.get("user_id")
|
| 113 |
+
|
| 114 |
+
if not user_id:
|
| 115 |
+
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Session expired. No user context.")
|
| 116 |
+
|
| 117 |
+
# Fetch from database
|
| 118 |
+
db = SessionLocal()
|
| 119 |
+
try:
|
| 120 |
+
user_keys_encrypted = crud.get_user_api_keys(db, user_id)
|
| 121 |
+
finally:
|
| 122 |
+
db.close()
|
| 123 |
+
|
| 124 |
+
# Decrypt keys from DB
|
| 125 |
+
api_keys = {p: decrypt_string(k) for p, k in user_keys_encrypted.items()}
|
| 126 |
+
|
| 127 |
+
payload = {"api_keys": api_keys, "user_id": user_id}
|
| 128 |
+
encrypted_payload = encrypt_payload(payload)
|
| 129 |
+
|
| 130 |
+
# Repopulate Redis
|
| 131 |
+
await redis.setex(f"session:{session_id}", SESSION_TTL, encrypted_payload)
|
| 132 |
+
|
| 133 |
+
return payload
|
| 134 |
+
|
| 135 |
+
except jwt.InvalidTokenError:
|
| 136 |
+
raise HTTPException(
|
| 137 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
| 138 |
+
detail="Session expired or invalid."
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
async def delete_session(session_id: str) -> bool:
|
| 143 |
+
"""Removes the session from Redis."""
|
| 144 |
+
redis = await get_redis()
|
| 145 |
+
if not redis:
|
| 146 |
+
return False
|
| 147 |
+
await redis.delete(f"session:{session_id}")
|
| 148 |
+
return True
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
async def check_rate_limit(session_id: str) -> None:
|
| 152 |
+
"""
|
| 153 |
+
Basic rate limiting: max 20 requests per minute per session.
|
| 154 |
+
"""
|
| 155 |
+
redis = await get_redis()
|
| 156 |
+
if not redis:
|
| 157 |
+
return
|
| 158 |
+
|
| 159 |
+
key = f"ratelimit:{session_id}"
|
| 160 |
+
requests = await redis.incr(key)
|
| 161 |
+
|
| 162 |
+
if requests == 1:
|
| 163 |
+
await redis.expire(key, 60)
|
| 164 |
+
|
| 165 |
+
if requests > 20:
|
| 166 |
+
raise HTTPException(
|
| 167 |
+
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
|
| 168 |
+
detail="Rate limit exceeded. Please try again later."
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
def get_session_id_from_request(request: Request) -> str:
|
| 172 |
+
"""Extracts session ID from cookies or Authorization header."""
|
| 173 |
+
# First try cookie
|
| 174 |
+
session_id = request.cookies.get("session_id")
|
| 175 |
+
if session_id:
|
| 176 |
+
return session_id
|
| 177 |
+
|
| 178 |
+
# Then try Authorization header (Bearer token)
|
| 179 |
+
auth_header = request.headers.get("Authorization")
|
| 180 |
+
if auth_header and auth_header.startswith("Bearer "):
|
| 181 |
+
return auth_header.split(" ")[1]
|
| 182 |
+
|
| 183 |
+
raise HTTPException(
|
| 184 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
| 185 |
+
detail="Missing session_id cookie or Bearer token."
|
| 186 |
+
)
|
llmopt/cache/redis_client.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import logging
|
| 3 |
+
from typing import Optional
|
| 4 |
+
from redis import asyncio as aioredis
|
| 5 |
+
|
| 6 |
+
logger = logging.getLogger(__name__)
|
| 7 |
+
|
| 8 |
+
class RedisManager:
|
| 9 |
+
def __init__(self):
|
| 10 |
+
self.redis: Optional[aioredis.Redis] = None
|
| 11 |
+
|
| 12 |
+
async def connect(self):
|
| 13 |
+
redis_url = os.getenv("REDIS_URL")
|
| 14 |
+
if not redis_url:
|
| 15 |
+
logger.warning("REDIS_URL environment variable is not set. Redis features will be disabled.")
|
| 16 |
+
return
|
| 17 |
+
|
| 18 |
+
try:
|
| 19 |
+
self.redis = aioredis.from_url(
|
| 20 |
+
redis_url,
|
| 21 |
+
encoding="utf-8",
|
| 22 |
+
decode_responses=True,
|
| 23 |
+
socket_timeout=5.0,
|
| 24 |
+
socket_connect_timeout=5.0,
|
| 25 |
+
retry_on_timeout=True,
|
| 26 |
+
max_connections=10
|
| 27 |
+
)
|
| 28 |
+
await self.redis.ping()
|
| 29 |
+
logger.info("Successfully connected to Redis.")
|
| 30 |
+
except Exception as e:
|
| 31 |
+
logger.error(f"Failed to connect to Redis: {e}")
|
| 32 |
+
self.redis = None
|
| 33 |
+
|
| 34 |
+
async def close(self):
|
| 35 |
+
if self.redis:
|
| 36 |
+
await self.redis.close()
|
| 37 |
+
|
| 38 |
+
redis_manager = RedisManager()
|
| 39 |
+
|
| 40 |
+
async def get_redis():
|
| 41 |
+
return redis_manager.redis
|
llmopt/core.py
CHANGED
|
@@ -18,17 +18,18 @@ import time
|
|
| 18 |
import os
|
| 19 |
from dataclasses import dataclass
|
| 20 |
from pathlib import Path
|
| 21 |
-
from typing import Optional
|
| 22 |
|
| 23 |
from llmopt.analyzer.query_analyzer import QueryAnalyzer, QueryFeatures
|
| 24 |
from llmopt.estimator.complexity_estimator import ComplexityEstimator, ComplexityResult
|
| 25 |
from llmopt.engine.optimization_engine import OptimizationEngine, OptimizationResult, UserConstraints
|
|
|
|
|
|
|
| 26 |
from llmopt.optimizer.prompt_optimizer import PromptOptimizer, OptimizedPrompt
|
| 27 |
from llmopt.router.model_router import ModelRouter, RoutedResponse
|
| 28 |
from llmopt.registry.model_registry import ModelRegistry
|
| 29 |
from llmopt.cache.semantic_cache import SemanticCache
|
| 30 |
from llmopt.evaluation.evaluator import LLMJudge, EvaluationResult
|
| 31 |
-
import os
|
| 32 |
|
| 33 |
logger = logging.getLogger(__name__)
|
| 34 |
|
|
@@ -136,30 +137,40 @@ class LLMOpt:
|
|
| 136 |
registry_path: Optional[Path] = None,
|
| 137 |
ollama_base_url: Optional[str] = None,
|
| 138 |
log_level: str = "WARNING",
|
|
|
|
| 139 |
):
|
| 140 |
logging.basicConfig(level=getattr(logging, log_level.upper(), logging.WARNING))
|
| 141 |
|
| 142 |
self.registry = ModelRegistry(registry_path)
|
| 143 |
self.analyzer = QueryAnalyzer()
|
| 144 |
self.estimator = ComplexityEstimator()
|
| 145 |
-
self.engine = OptimizationEngine(self.registry)
|
| 146 |
self.optimizer = PromptOptimizer()
|
| 147 |
self.router = ModelRouter(ollama_base_url=ollama_base_url)
|
| 148 |
-
|
| 149 |
# Initialize Semantic Cache (reads REDIS_URL from env if available)
|
| 150 |
-
# Using python-dotenv to ensure .env is loaded
|
| 151 |
try:
|
| 152 |
from dotenv import load_dotenv # type: ignore
|
| 153 |
-
# Attempt to load from both the root and config/.env
|
| 154 |
load_dotenv()
|
| 155 |
load_dotenv("config/.env")
|
| 156 |
except ImportError:
|
| 157 |
pass
|
| 158 |
-
|
| 159 |
redis_url = os.environ.get("REDIS_URL")
|
| 160 |
self.cache = SemanticCache(redis_url=redis_url)
|
| 161 |
self.judge = LLMJudge(judge_model="gpt-4o-mini")
|
| 162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
# ------------------------------------------------------------------
|
| 164 |
# Primary API
|
| 165 |
# ------------------------------------------------------------------
|
|
@@ -178,6 +189,10 @@ class LLMOpt:
|
|
| 178 |
dry_run: bool = False,
|
| 179 |
evaluate: bool = False,
|
| 180 |
api_keys: Optional[Dict[str, str]] = None,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
) -> GenerateResult:
|
| 182 |
"""
|
| 183 |
Full pipeline: analyze β estimate β optimize β compress β route β return.
|
|
@@ -212,11 +227,17 @@ class LLMOpt:
|
|
| 212 |
latency_ms = (time.perf_counter() - t0) * 1000
|
| 213 |
logger.info("Returning cached response directly.")
|
| 214 |
|
| 215 |
-
constraints = UserConstraints(
|
|
|
|
|
|
|
|
|
|
| 216 |
optimization = self.engine.optimize(
|
| 217 |
complexity=complexity,
|
| 218 |
output_length_bucket=features.estimated_output_length,
|
| 219 |
constraints=constraints,
|
|
|
|
|
|
|
|
|
|
| 220 |
)
|
| 221 |
optimized_prompt = self.optimizer.optimize(
|
| 222 |
query=query,
|
|
@@ -257,16 +278,40 @@ class LLMOpt:
|
|
| 257 |
exclude_providers=exclude_providers or [],
|
| 258 |
only_providers=only_providers or [],
|
| 259 |
prefer_local=prefer_local,
|
|
|
|
| 260 |
)
|
| 261 |
if prefer_local:
|
| 262 |
constraints.only_providers = ["ollama"]
|
| 263 |
|
| 264 |
# 4. Optimize (select model + config)
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
logger.debug(f"Selected: {optimization.selected_model}")
|
| 271 |
|
| 272 |
# 5. Optimize prompt
|
|
@@ -284,16 +329,26 @@ class LLMOpt:
|
|
| 284 |
if dry_run:
|
| 285 |
routed = self._mock_response(optimization)
|
| 286 |
else:
|
| 287 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
routed = self.router.route(
|
| 289 |
model_name=optimization.selected_model,
|
| 290 |
provider=optimization.provider,
|
| 291 |
messages=messages,
|
| 292 |
max_tokens=optimization.max_tokens,
|
| 293 |
temperature=temperature,
|
| 294 |
-
input_cost_per_1k=
|
| 295 |
-
output_cost_per_1k=
|
| 296 |
-
api_keys=api_keys,
|
| 297 |
)
|
| 298 |
latency_ms = (time.perf_counter() - t0) * 1000
|
| 299 |
|
|
@@ -308,11 +363,21 @@ class LLMOpt:
|
|
| 308 |
)
|
| 309 |
cost_saved = max(0.0, baseline_cost - routed.estimated_cost)
|
| 310 |
|
| 311 |
-
# 9. Evaluate (if requested) and feed
|
| 312 |
evaluation = None
|
| 313 |
if evaluate and not dry_run:
|
| 314 |
evaluation = self.judge.evaluate(query, routed.content)
|
| 315 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
Ξ±, Ξ², Ξ³ = self.engine.bayes.get_weights(constraints.budget_mode)
|
| 317 |
self.engine.bayes.record_outcome(
|
| 318 |
budget_mode=constraints.budget_mode,
|
|
@@ -351,15 +416,26 @@ class LLMOpt:
|
|
| 351 |
api_keys: Optional[Dict[str, str]] = None,
|
| 352 |
**kwargs,
|
| 353 |
):
|
| 354 |
-
"""Yields text chunks.
|
| 355 |
-
features
|
| 356 |
-
complexity
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 363 |
optimized_prompt = self.optimizer.optimize(
|
| 364 |
query=query,
|
| 365 |
system_prompt_style=optimization.system_prompt_style,
|
|
@@ -370,27 +446,62 @@ class LLMOpt:
|
|
| 370 |
model_name=optimization.selected_model,
|
| 371 |
messages=messages,
|
| 372 |
max_tokens=optimization.max_tokens,
|
| 373 |
-
provider=optimization.provider,
|
| 374 |
-
api_keys=api_keys,
|
| 375 |
)
|
| 376 |
|
| 377 |
# ------------------------------------------------------------------
|
| 378 |
# Explainability (standalone)
|
| 379 |
# ------------------------------------------------------------------
|
| 380 |
|
| 381 |
-
def explain(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 382 |
"""
|
| 383 |
Returns a structured explanation of what LLMOpt would do for a query,
|
| 384 |
without making an actual API call.
|
| 385 |
"""
|
| 386 |
features = self.analyzer.analyze(query)
|
| 387 |
complexity = self.estimator.estimate(features)
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 394 |
optimized_prompt = self.optimizer.optimize(
|
| 395 |
query=query,
|
| 396 |
system_prompt_style=optimization.system_prompt_style,
|
|
@@ -404,6 +515,7 @@ class LLMOpt:
|
|
| 404 |
"optimized_prompt": optimized_prompt.to_dict(),
|
| 405 |
}
|
| 406 |
|
|
|
|
| 407 |
# ------------------------------------------------------------------
|
| 408 |
# Helpers
|
| 409 |
# ------------------------------------------------------------------
|
|
@@ -433,3 +545,62 @@ class LLMOpt:
|
|
| 433 |
latency_ms=0.0,
|
| 434 |
estimated_cost=optimization.estimated_cost,
|
| 435 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
import os
|
| 19 |
from dataclasses import dataclass
|
| 20 |
from pathlib import Path
|
| 21 |
+
from typing import Optional, Dict, List
|
| 22 |
|
| 23 |
from llmopt.analyzer.query_analyzer import QueryAnalyzer, QueryFeatures
|
| 24 |
from llmopt.estimator.complexity_estimator import ComplexityEstimator, ComplexityResult
|
| 25 |
from llmopt.engine.optimization_engine import OptimizationEngine, OptimizationResult, UserConstraints
|
| 26 |
+
from llmopt.engine.llmopt_engine import LLMOptEngine
|
| 27 |
+
from llmopt.engine.utility_engine import RoutingDecision
|
| 28 |
from llmopt.optimizer.prompt_optimizer import PromptOptimizer, OptimizedPrompt
|
| 29 |
from llmopt.router.model_router import ModelRouter, RoutedResponse
|
| 30 |
from llmopt.registry.model_registry import ModelRegistry
|
| 31 |
from llmopt.cache.semantic_cache import SemanticCache
|
| 32 |
from llmopt.evaluation.evaluator import LLMJudge, EvaluationResult
|
|
|
|
| 33 |
|
| 34 |
logger = logging.getLogger(__name__)
|
| 35 |
|
|
|
|
| 137 |
registry_path: Optional[Path] = None,
|
| 138 |
ollama_base_url: Optional[str] = None,
|
| 139 |
log_level: str = "WARNING",
|
| 140 |
+
use_v2_engine: bool = True,
|
| 141 |
):
|
| 142 |
logging.basicConfig(level=getattr(logging, log_level.upper(), logging.WARNING))
|
| 143 |
|
| 144 |
self.registry = ModelRegistry(registry_path)
|
| 145 |
self.analyzer = QueryAnalyzer()
|
| 146 |
self.estimator = ComplexityEstimator()
|
| 147 |
+
self.engine = OptimizationEngine(self.registry) # V1 β kept for fallback
|
| 148 |
self.optimizer = PromptOptimizer()
|
| 149 |
self.router = ModelRouter(ollama_base_url=ollama_base_url)
|
| 150 |
+
|
| 151 |
# Initialize Semantic Cache (reads REDIS_URL from env if available)
|
|
|
|
| 152 |
try:
|
| 153 |
from dotenv import load_dotenv # type: ignore
|
|
|
|
| 154 |
load_dotenv()
|
| 155 |
load_dotenv("config/.env")
|
| 156 |
except ImportError:
|
| 157 |
pass
|
| 158 |
+
|
| 159 |
redis_url = os.environ.get("REDIS_URL")
|
| 160 |
self.cache = SemanticCache(redis_url=redis_url)
|
| 161 |
self.judge = LLMJudge(judge_model="gpt-4o-mini")
|
| 162 |
|
| 163 |
+
# V2 Utility Engine β default active
|
| 164 |
+
self._use_v2 = use_v2_engine
|
| 165 |
+
self._v2_engine: Optional[LLMOptEngine] = None
|
| 166 |
+
if use_v2_engine:
|
| 167 |
+
self._v2_engine = LLMOptEngine(
|
| 168 |
+
available_keys={}, # populated per-request via update_keys()
|
| 169 |
+
include_ollama=True,
|
| 170 |
+
log_level=logging.WARNING,
|
| 171 |
+
)
|
| 172 |
+
logger.info("[LLMOpt] V2 utility engine active.")
|
| 173 |
+
|
| 174 |
# ------------------------------------------------------------------
|
| 175 |
# Primary API
|
| 176 |
# ------------------------------------------------------------------
|
|
|
|
| 189 |
dry_run: bool = False,
|
| 190 |
evaluate: bool = False,
|
| 191 |
api_keys: Optional[Dict[str, str]] = None,
|
| 192 |
+
alpha: Optional[float] = None,
|
| 193 |
+
beta: Optional[float] = None,
|
| 194 |
+
gamma: Optional[float] = None,
|
| 195 |
+
compression_enabled: Optional[bool] = None,
|
| 196 |
) -> GenerateResult:
|
| 197 |
"""
|
| 198 |
Full pipeline: analyze β estimate β optimize β compress β route β return.
|
|
|
|
| 227 |
latency_ms = (time.perf_counter() - t0) * 1000
|
| 228 |
logger.info("Returning cached response directly.")
|
| 229 |
|
| 230 |
+
constraints = UserConstraints(
|
| 231 |
+
budget_mode=budget_mode,
|
| 232 |
+
compression_enabled=compression_enabled,
|
| 233 |
+
)
|
| 234 |
optimization = self.engine.optimize(
|
| 235 |
complexity=complexity,
|
| 236 |
output_length_bucket=features.estimated_output_length,
|
| 237 |
constraints=constraints,
|
| 238 |
+
alpha=alpha,
|
| 239 |
+
beta=beta,
|
| 240 |
+
gamma=gamma,
|
| 241 |
)
|
| 242 |
optimized_prompt = self.optimizer.optimize(
|
| 243 |
query=query,
|
|
|
|
| 278 |
exclude_providers=exclude_providers or [],
|
| 279 |
only_providers=only_providers or [],
|
| 280 |
prefer_local=prefer_local,
|
| 281 |
+
compression_enabled=compression_enabled,
|
| 282 |
)
|
| 283 |
if prefer_local:
|
| 284 |
constraints.only_providers = ["ollama"]
|
| 285 |
|
| 286 |
# 4. Optimize (select model + config)
|
| 287 |
+
if self._use_v2 and self._v2_engine is not None:
|
| 288 |
+
# Update BYOK keys for this request
|
| 289 |
+
if api_keys:
|
| 290 |
+
self._v2_engine.update_keys(api_keys)
|
| 291 |
+
# Build constraints dict for V2 engine
|
| 292 |
+
v2_constraints = {
|
| 293 |
+
"exclude_providers": exclude_providers or [],
|
| 294 |
+
"only_providers": only_providers or [],
|
| 295 |
+
}
|
| 296 |
+
if max_cost_per_request is not None:
|
| 297 |
+
v2_constraints["max_cost_per_request"] = max_cost_per_request
|
| 298 |
+
if prefer_local:
|
| 299 |
+
v2_constraints["only_providers"] = ["ollama"]
|
| 300 |
+
decision = self._v2_engine.route(
|
| 301 |
+
query_features=features,
|
| 302 |
+
budget_mode=budget_mode,
|
| 303 |
+
constraints=v2_constraints,
|
| 304 |
+
)
|
| 305 |
+
optimization = self._v2_to_optimization_result(decision, complexity, features)
|
| 306 |
+
else:
|
| 307 |
+
optimization = self.engine.optimize(
|
| 308 |
+
complexity=complexity,
|
| 309 |
+
output_length_bucket=features.estimated_output_length,
|
| 310 |
+
constraints=constraints,
|
| 311 |
+
alpha=alpha,
|
| 312 |
+
beta=beta,
|
| 313 |
+
gamma=gamma,
|
| 314 |
+
)
|
| 315 |
logger.debug(f"Selected: {optimization.selected_model}")
|
| 316 |
|
| 317 |
# 5. Optimize prompt
|
|
|
|
| 329 |
if dry_run:
|
| 330 |
routed = self._mock_response(optimization)
|
| 331 |
else:
|
| 332 |
+
# Fetch model spec from appropriate registry
|
| 333 |
+
if self._use_v2 and self._v2_engine is not None:
|
| 334 |
+
# V2: look up from the merged V2 registry (knows all new model IDs)
|
| 335 |
+
v2_spec = self._v2_engine._registry.get_model(optimization.selected_model)
|
| 336 |
+
in_cost = v2_spec["input_cost_per_1k"] if v2_spec else optimization.estimated_cost / 2
|
| 337 |
+
out_cost = v2_spec["output_cost_per_1k"] if v2_spec else optimization.estimated_cost / 2
|
| 338 |
+
else:
|
| 339 |
+
# V1: look up from the old ModelRegistry
|
| 340 |
+
model_spec = self.registry.get(optimization.selected_model)
|
| 341 |
+
in_cost = model_spec.input_cost_per_1k
|
| 342 |
+
out_cost = model_spec.output_cost_per_1k
|
| 343 |
routed = self.router.route(
|
| 344 |
model_name=optimization.selected_model,
|
| 345 |
provider=optimization.provider,
|
| 346 |
messages=messages,
|
| 347 |
max_tokens=optimization.max_tokens,
|
| 348 |
temperature=temperature,
|
| 349 |
+
input_cost_per_1k=in_cost,
|
| 350 |
+
output_cost_per_1k=out_cost,
|
| 351 |
+
api_keys=api_keys,
|
| 352 |
)
|
| 353 |
latency_ms = (time.perf_counter() - t0) * 1000
|
| 354 |
|
|
|
|
| 363 |
)
|
| 364 |
cost_saved = max(0.0, baseline_cost - routed.estimated_cost)
|
| 365 |
|
| 366 |
+
# 9. Evaluate (if requested) and feed optimizer
|
| 367 |
evaluation = None
|
| 368 |
if evaluate and not dry_run:
|
| 369 |
evaluation = self.judge.evaluate(query, routed.content)
|
| 370 |
+
if self._use_v2 and self._v2_engine is not None:
|
| 371 |
+
# Feed outcome back into adaptive EMA updater
|
| 372 |
+
self._v2_engine.record_outcome(
|
| 373 |
+
model_id=routed.model_used,
|
| 374 |
+
latency_ms=routed.latency_ms,
|
| 375 |
+
success=True,
|
| 376 |
+
quality_score=evaluation.overall if evaluation else None,
|
| 377 |
+
cost_usd=routed.estimated_cost,
|
| 378 |
+
)
|
| 379 |
+
elif evaluation:
|
| 380 |
+
# V1 path: feed Bayesian optimizer
|
| 381 |
Ξ±, Ξ², Ξ³ = self.engine.bayes.get_weights(constraints.budget_mode)
|
| 382 |
self.engine.bayes.record_outcome(
|
| 383 |
budget_mode=constraints.budget_mode,
|
|
|
|
| 416 |
api_keys: Optional[Dict[str, str]] = None,
|
| 417 |
**kwargs,
|
| 418 |
):
|
| 419 |
+
"""Yields text chunks. Pipeline still runs fully before streaming."""
|
| 420 |
+
features = self.analyzer.analyze(query)
|
| 421 |
+
complexity = self.estimator.estimate(features)
|
| 422 |
+
|
| 423 |
+
if self._use_v2 and self._v2_engine is not None:
|
| 424 |
+
if api_keys:
|
| 425 |
+
self._v2_engine.update_keys(api_keys)
|
| 426 |
+
decision = self._v2_engine.route(
|
| 427 |
+
query_features=features,
|
| 428 |
+
budget_mode=budget_mode,
|
| 429 |
+
)
|
| 430 |
+
optimization = self._v2_to_optimization_result(decision, complexity, features)
|
| 431 |
+
else:
|
| 432 |
+
constraints = UserConstraints(budget_mode=budget_mode)
|
| 433 |
+
optimization = self.engine.optimize(
|
| 434 |
+
complexity=complexity,
|
| 435 |
+
output_length_bucket=features.estimated_output_length,
|
| 436 |
+
constraints=constraints,
|
| 437 |
+
)
|
| 438 |
+
|
| 439 |
optimized_prompt = self.optimizer.optimize(
|
| 440 |
query=query,
|
| 441 |
system_prompt_style=optimization.system_prompt_style,
|
|
|
|
| 446 |
model_name=optimization.selected_model,
|
| 447 |
messages=messages,
|
| 448 |
max_tokens=optimization.max_tokens,
|
| 449 |
+
provider=optimization.provider,
|
| 450 |
+
api_keys=api_keys,
|
| 451 |
)
|
| 452 |
|
| 453 |
# ------------------------------------------------------------------
|
| 454 |
# Explainability (standalone)
|
| 455 |
# ------------------------------------------------------------------
|
| 456 |
|
| 457 |
+
def explain(
|
| 458 |
+
self,
|
| 459 |
+
query: str,
|
| 460 |
+
budget_mode: str = "balanced",
|
| 461 |
+
alpha: Optional[float] = None,
|
| 462 |
+
beta: Optional[float] = None,
|
| 463 |
+
gamma: Optional[float] = None,
|
| 464 |
+
compression_enabled: Optional[bool] = None,
|
| 465 |
+
exclude_providers: Optional[list[str]] = None,
|
| 466 |
+
only_providers: Optional[list[str]] = None,
|
| 467 |
+
api_keys: Optional[Dict[str, str]] = None,
|
| 468 |
+
) -> dict:
|
| 469 |
"""
|
| 470 |
Returns a structured explanation of what LLMOpt would do for a query,
|
| 471 |
without making an actual API call.
|
| 472 |
"""
|
| 473 |
features = self.analyzer.analyze(query)
|
| 474 |
complexity = self.estimator.estimate(features)
|
| 475 |
+
|
| 476 |
+
if self._use_v2 and self._v2_engine is not None:
|
| 477 |
+
if api_keys:
|
| 478 |
+
self._v2_engine.update_keys(api_keys)
|
| 479 |
+
v2_constraints = {
|
| 480 |
+
"exclude_providers": exclude_providers or [],
|
| 481 |
+
"only_providers": only_providers or [],
|
| 482 |
+
}
|
| 483 |
+
decision = self._v2_engine.route(
|
| 484 |
+
query_features=features,
|
| 485 |
+
budget_mode=budget_mode,
|
| 486 |
+
constraints=v2_constraints,
|
| 487 |
+
)
|
| 488 |
+
optimization = self._v2_to_optimization_result(decision, complexity, features)
|
| 489 |
+
else:
|
| 490 |
+
constraints = UserConstraints(
|
| 491 |
+
budget_mode=budget_mode,
|
| 492 |
+
compression_enabled=compression_enabled,
|
| 493 |
+
exclude_providers=exclude_providers or [],
|
| 494 |
+
only_providers=only_providers or [],
|
| 495 |
+
)
|
| 496 |
+
optimization = self.engine.optimize(
|
| 497 |
+
complexity=complexity,
|
| 498 |
+
output_length_bucket=features.estimated_output_length,
|
| 499 |
+
constraints=constraints,
|
| 500 |
+
alpha=alpha,
|
| 501 |
+
beta=beta,
|
| 502 |
+
gamma=gamma,
|
| 503 |
+
)
|
| 504 |
+
|
| 505 |
optimized_prompt = self.optimizer.optimize(
|
| 506 |
query=query,
|
| 507 |
system_prompt_style=optimization.system_prompt_style,
|
|
|
|
| 515 |
"optimized_prompt": optimized_prompt.to_dict(),
|
| 516 |
}
|
| 517 |
|
| 518 |
+
|
| 519 |
# ------------------------------------------------------------------
|
| 520 |
# Helpers
|
| 521 |
# ------------------------------------------------------------------
|
|
|
|
| 545 |
latency_ms=0.0,
|
| 546 |
estimated_cost=optimization.estimated_cost,
|
| 547 |
)
|
| 548 |
+
|
| 549 |
+
@staticmethod
|
| 550 |
+
def _v2_to_optimization_result(
|
| 551 |
+
decision: RoutingDecision,
|
| 552 |
+
complexity: ComplexityResult,
|
| 553 |
+
features: QueryFeatures,
|
| 554 |
+
) -> OptimizationResult:
|
| 555 |
+
"""
|
| 556 |
+
Compatibility shim: maps RoutingDecision (V2) β OptimizationResult (V1 shape).
|
| 557 |
+
|
| 558 |
+
This allows all downstream pipeline stages (PromptOptimizer, ModelRouter,
|
| 559 |
+
logging, GenerateResult) to remain completely unchanged while the routing
|
| 560 |
+
layer has been replaced by the utility engine.
|
| 561 |
+
|
| 562 |
+
OptimizationResult fields (from optimization_engine.py):
|
| 563 |
+
selected_model, provider, estimated_cost, estimated_input_tokens,
|
| 564 |
+
estimated_output_tokens, max_tokens, compression_enabled,
|
| 565 |
+
system_prompt_style, rationale, fallback_model, objective_score
|
| 566 |
+
"""
|
| 567 |
+
ex = decision.explanation
|
| 568 |
+
|
| 569 |
+
# Build a rationale list from the V2 explanation dict
|
| 570 |
+
rationale = [
|
| 571 |
+
f"engine=utility_v2 domain={ex.get('primary_domain', 'general')}",
|
| 572 |
+
f"utility_score={decision.utility_score:.4f} budget_lambda={ex.get('lambda', '?')}",
|
| 573 |
+
f"top_dims={list(ex.get('query_dimensions', {}).keys())[:3]}",
|
| 574 |
+
f"candidates_evaluated={ex.get('candidates_evaluated', '?')}",
|
| 575 |
+
f"registry_source={ex.get('registry_source', 'baseline')}",
|
| 576 |
+
]
|
| 577 |
+
if decision.fallback_model_id:
|
| 578 |
+
rationale.append(f"fallback={decision.fallback_model_id} ({decision.fallback_provider})")
|
| 579 |
+
|
| 580 |
+
# Output length β token estimate lookup
|
| 581 |
+
output_token_map = {"short": 300, "medium": 700, "long": 1500, "very_long": 3000}
|
| 582 |
+
est_output = output_token_map.get(
|
| 583 |
+
str(getattr(features, 'estimated_output_length', 'medium')).lower(), 700
|
| 584 |
+
)
|
| 585 |
+
est_input = max(getattr(features, 'token_count', 100), 100)
|
| 586 |
+
|
| 587 |
+
# Budget mode drives compression and prompt style
|
| 588 |
+
budget_mode = ex.get("budget_mode", "balanced")
|
| 589 |
+
compression = (budget_mode == "cheap")
|
| 590 |
+
system_prompt_style = "minimal" if budget_mode == "cheap" else "standard"
|
| 591 |
+
max_tokens = min(est_output + 200, 4096)
|
| 592 |
+
|
| 593 |
+
return OptimizationResult(
|
| 594 |
+
selected_model=decision.model_id,
|
| 595 |
+
provider=decision.provider,
|
| 596 |
+
estimated_cost=decision.estimated_cost,
|
| 597 |
+
estimated_input_tokens=est_input,
|
| 598 |
+
estimated_output_tokens=est_output,
|
| 599 |
+
max_tokens=max_tokens,
|
| 600 |
+
compression_enabled=compression,
|
| 601 |
+
system_prompt_style=system_prompt_style,
|
| 602 |
+
rationale=rationale,
|
| 603 |
+
fallback_model=decision.fallback_model_id,
|
| 604 |
+
objective_score=1.0 - decision.utility_score, # invert: lower is better (V1 convention)
|
| 605 |
+
)
|
| 606 |
+
|
llmopt/db/models.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sqlalchemy import Column, Integer, String, DateTime, ForeignKey, Float
|
| 2 |
+
from sqlalchemy.orm import relationship
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
from llmopt.db.session import Base
|
| 5 |
+
|
| 6 |
+
class User(Base):
|
| 7 |
+
__tablename__ = "users"
|
| 8 |
+
|
| 9 |
+
id = Column(Integer, primary_key=True, index=True)
|
| 10 |
+
email = Column(String, unique=True, index=True)
|
| 11 |
+
hashed_password = Column(String)
|
| 12 |
+
created_at = Column(DateTime, default=datetime.utcnow)
|
| 13 |
+
|
| 14 |
+
api_keys = relationship("UserAPIKey", back_populates="user", cascade="all, delete-orphan")
|
| 15 |
+
generation_logs = relationship("GenerationLog", back_populates="user", cascade="all, delete-orphan")
|
| 16 |
+
|
| 17 |
+
class UserAPIKey(Base):
|
| 18 |
+
__tablename__ = "user_api_keys"
|
| 19 |
+
|
| 20 |
+
id = Column(Integer, primary_key=True, index=True)
|
| 21 |
+
user_id = Column(Integer, ForeignKey("users.id"))
|
| 22 |
+
provider = Column(String, index=True)
|
| 23 |
+
encrypted_key = Column(String)
|
| 24 |
+
|
| 25 |
+
user = relationship("User", back_populates="api_keys")
|
| 26 |
+
|
| 27 |
+
class GenerationLog(Base):
|
| 28 |
+
__tablename__ = "generation_logs"
|
| 29 |
+
|
| 30 |
+
id = Column(Integer, primary_key=True, index=True)
|
| 31 |
+
user_id = Column(Integer, ForeignKey("users.id"), nullable=True)
|
| 32 |
+
query = Column(String)
|
| 33 |
+
response = Column(String, nullable=True)
|
| 34 |
+
model_used = Column(String)
|
| 35 |
+
provider = Column(String)
|
| 36 |
+
input_tokens = Column(Integer)
|
| 37 |
+
output_tokens = Column(Integer)
|
| 38 |
+
total_tokens = Column(Integer)
|
| 39 |
+
estimated_cost = Column(Float)
|
| 40 |
+
tokens_saved = Column(Integer)
|
| 41 |
+
cost_saved = Column(Float)
|
| 42 |
+
latency_ms = Column(Float)
|
| 43 |
+
complexity_score = Column(Float)
|
| 44 |
+
complexity_tier = Column(String)
|
| 45 |
+
created_at = Column(DateTime, default=datetime.utcnow)
|
| 46 |
+
|
| 47 |
+
user = relationship("User", back_populates="generation_logs")
|
llmopt/db/session.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from sqlalchemy import create_engine
|
| 3 |
+
from sqlalchemy.orm import sessionmaker, declarative_base
|
| 4 |
+
|
| 5 |
+
# Default to local SQLite if DATABASE_URL is not set
|
| 6 |
+
SQLALCHEMY_DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./llmopt.db")
|
| 7 |
+
|
| 8 |
+
# For SQLite, we need connect_args={"check_same_thread": False}
|
| 9 |
+
if SQLALCHEMY_DATABASE_URL.startswith("sqlite"):
|
| 10 |
+
engine = create_engine(
|
| 11 |
+
SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False}
|
| 12 |
+
)
|
| 13 |
+
else:
|
| 14 |
+
# For Postgres (e.g. Neon, Supabase)
|
| 15 |
+
# SQLAlchemy requires `postgresql://` instead of `postgres://`
|
| 16 |
+
if SQLALCHEMY_DATABASE_URL.startswith("postgres://"):
|
| 17 |
+
SQLALCHEMY_DATABASE_URL = SQLALCHEMY_DATABASE_URL.replace("postgres://", "postgresql://", 1)
|
| 18 |
+
engine = create_engine(
|
| 19 |
+
SQLALCHEMY_DATABASE_URL,
|
| 20 |
+
pool_pre_ping=True,
|
| 21 |
+
pool_recycle=300,
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
| 25 |
+
|
| 26 |
+
Base = declarative_base()
|
| 27 |
+
|
| 28 |
+
# Dependency for FastAPI
|
| 29 |
+
def get_db():
|
| 30 |
+
db = SessionLocal()
|
| 31 |
+
try:
|
| 32 |
+
yield db
|
| 33 |
+
finally:
|
| 34 |
+
db.close()
|
llmopt/engine/__init__.py
CHANGED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""LLMOpt engine package."""
|
| 2 |
+
# V1 (legacy, kept for compatibility)
|
| 3 |
+
from llmopt.engine.optimization_engine import OptimizationEngine, OptimizationResult, UserConstraints
|
| 4 |
+
|
| 5 |
+
# V2 β utility-based routing
|
| 6 |
+
from llmopt.engine.utility_engine import UtilityOptimizationEngine, RoutingDecision, QueryUtilityProfile
|
| 7 |
+
from llmopt.engine.llmopt_engine import LLMOptEngine
|
| 8 |
+
|
| 9 |
+
__all__ = [
|
| 10 |
+
# V1
|
| 11 |
+
"OptimizationEngine", "OptimizationResult", "UserConstraints",
|
| 12 |
+
# V2
|
| 13 |
+
"UtilityOptimizationEngine", "RoutingDecision", "QueryUtilityProfile",
|
| 14 |
+
"LLMOptEngine",
|
| 15 |
+
]
|
llmopt/engine/llmopt_engine.py
ADDED
|
@@ -0,0 +1,275 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
LLMOpt β Main Facade (V2 Engine Entry Point)
|
| 3 |
+
=============================================
|
| 4 |
+
Wires the utility engine into the existing LLMOpt pipeline.
|
| 5 |
+
|
| 6 |
+
Replaces: OptimizationEngine
|
| 7 |
+
Keeps intact: QueryAnalyzer, ComplexityEstimator, SemanticCache,
|
| 8 |
+
PromptOptimizer, ModelRouter, LLMJudge
|
| 9 |
+
|
| 10 |
+
Usage (identical to old OptimizationEngine interface):
|
| 11 |
+
------------------------------------------------------
|
| 12 |
+
from llmopt.engine.llmopt_engine import LLMOptEngine
|
| 13 |
+
|
| 14 |
+
engine = LLMOptEngine(
|
| 15 |
+
available_keys={
|
| 16 |
+
"openai": "sk-...",
|
| 17 |
+
"anthropic": "sk-ant-...",
|
| 18 |
+
}
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
decision = engine.route(query_features, budget_mode="balanced")
|
| 22 |
+
# decision.model_id β "claude-sonnet-4-5"
|
| 23 |
+
# decision.provider β "anthropic"
|
| 24 |
+
# decision.utility_score β 0.8241
|
| 25 |
+
# decision.estimated_cost β 0.00312
|
| 26 |
+
# decision.explanation β {...full reasoning...}
|
| 27 |
+
# decision.fallback_model_id β "gpt-4.1-mini"
|
| 28 |
+
|
| 29 |
+
# After getting a response, record outcome for adaptive updating:
|
| 30 |
+
engine.record_outcome(
|
| 31 |
+
model_id="claude-sonnet-4-5",
|
| 32 |
+
latency_ms=1340,
|
| 33 |
+
success=True,
|
| 34 |
+
quality_score=8.5,
|
| 35 |
+
cost_usd=0.00312,
|
| 36 |
+
)
|
| 37 |
+
"""
|
| 38 |
+
|
| 39 |
+
import logging
|
| 40 |
+
import os
|
| 41 |
+
from typing import Optional
|
| 42 |
+
|
| 43 |
+
from llmopt.registry.hybrid_updater import HybridRegistryUpdater
|
| 44 |
+
from llmopt.engine.utility_engine import UtilityOptimizationEngine, RoutingDecision
|
| 45 |
+
from llmopt.updater.adaptive_updater import AdaptiveRuntimeUpdater
|
| 46 |
+
|
| 47 |
+
logger = logging.getLogger(__name__)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
class LLMOptEngine:
|
| 51 |
+
"""
|
| 52 |
+
Main entry point for the utility-based routing engine.
|
| 53 |
+
|
| 54 |
+
Pipeline position: slots in between QueryAnalyzer/ComplexityEstimator
|
| 55 |
+
and PromptOptimizer/ModelRouter β identical interface to old OptimizationEngine.
|
| 56 |
+
|
| 57 |
+
Args:
|
| 58 |
+
available_keys: Dict of provider β API key. Only provided keys are routed to.
|
| 59 |
+
openrouter_api_key: Optional OpenRouter key for live pricing patches.
|
| 60 |
+
include_ollama: Whether to include local Ollama as a routing option.
|
| 61 |
+
log_level: Logging verbosity.
|
| 62 |
+
"""
|
| 63 |
+
|
| 64 |
+
def __init__(
|
| 65 |
+
self,
|
| 66 |
+
available_keys: Optional[dict] = None,
|
| 67 |
+
openrouter_api_key: Optional[str] = None,
|
| 68 |
+
include_ollama: bool = True,
|
| 69 |
+
log_level: int = logging.WARNING,
|
| 70 |
+
):
|
| 71 |
+
logging.basicConfig(level=log_level)
|
| 72 |
+
|
| 73 |
+
# Resolve API keys: constructor args > environment variables
|
| 74 |
+
resolved_keys = self._resolve_keys(available_keys or {})
|
| 75 |
+
|
| 76 |
+
# Layer 1: Registry (baseline JSON + live OpenRouter patch)
|
| 77 |
+
or_key = openrouter_api_key or os.getenv("OPENROUTER_API_KEY", "")
|
| 78 |
+
self._registry = HybridRegistryUpdater(openrouter_api_key=or_key)
|
| 79 |
+
|
| 80 |
+
# Layer 2: Utility Engine (BYOK-aware routing)
|
| 81 |
+
self._engine = UtilityOptimizationEngine(
|
| 82 |
+
registry_updater=self._registry,
|
| 83 |
+
available_keys=resolved_keys,
|
| 84 |
+
include_ollama=include_ollama,
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
# Layer 3: Adaptive Runtime Stats (EMA-based, no RL)
|
| 88 |
+
self._runtime = AdaptiveRuntimeUpdater()
|
| 89 |
+
|
| 90 |
+
logger.info(
|
| 91 |
+
f"[LLMOptEngine] Initialized. "
|
| 92 |
+
f"Providers: {list(resolved_keys.keys())}"
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
# ββ Main routing method β drop-in for old OptimizationEngine βββββββββββββ
|
| 96 |
+
|
| 97 |
+
def route(
|
| 98 |
+
self,
|
| 99 |
+
query_features,
|
| 100 |
+
budget_mode: str = "balanced",
|
| 101 |
+
constraints: dict = None,
|
| 102 |
+
force_refresh_registry: bool = False,
|
| 103 |
+
) -> RoutingDecision:
|
| 104 |
+
"""
|
| 105 |
+
Route a query to the best available model.
|
| 106 |
+
|
| 107 |
+
Args:
|
| 108 |
+
query_features: QueryFeatures dataclass (from QueryAnalyzer) or dict.
|
| 109 |
+
budget_mode: "cheap" | "balanced" | "quality"
|
| 110 |
+
constraints: Optional hard overrides. See UtilityOptimizationEngine.route().
|
| 111 |
+
force_refresh_registry: Force live registry refresh from OpenRouter.
|
| 112 |
+
|
| 113 |
+
Returns:
|
| 114 |
+
RoutingDecision β same fields as old OptimizationEngine output.
|
| 115 |
+
"""
|
| 116 |
+
decision = self._engine.route(
|
| 117 |
+
query_features=query_features,
|
| 118 |
+
budget_mode=budget_mode,
|
| 119 |
+
constraints=constraints or {},
|
| 120 |
+
force_refresh_registry=force_refresh_registry,
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
# Inject runtime adjustment into utility score
|
| 124 |
+
adj = self._runtime.get_utility_adjustment(decision.model_id)
|
| 125 |
+
if adj != 0.0:
|
| 126 |
+
decision.utility_score = round(decision.utility_score + adj, 4)
|
| 127 |
+
decision.explanation["runtime_adjustment"] = adj
|
| 128 |
+
logger.debug(
|
| 129 |
+
f"[LLMOptEngine] Runtime adj for {decision.model_id}: {adj:+.4f}"
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
# Override latency estimate if we have runtime data
|
| 133 |
+
runtime_lat = self._runtime.get_latency_estimate(decision.model_id)
|
| 134 |
+
if runtime_lat:
|
| 135 |
+
decision.explanation["observed_latency_ms"] = round(runtime_lat, 0)
|
| 136 |
+
|
| 137 |
+
return decision
|
| 138 |
+
|
| 139 |
+
# ββ Outcome recording β call after each LLM API response βββββββββββββββββ
|
| 140 |
+
|
| 141 |
+
def record_outcome(
|
| 142 |
+
self,
|
| 143 |
+
model_id: str,
|
| 144 |
+
latency_ms: Optional[float] = None,
|
| 145 |
+
success: bool = True,
|
| 146 |
+
quality_score: Optional[float] = None,
|
| 147 |
+
cost_usd: Optional[float] = None,
|
| 148 |
+
):
|
| 149 |
+
"""
|
| 150 |
+
Record the outcome of a routing decision for adaptive updating.
|
| 151 |
+
|
| 152 |
+
Call this after the LLM API call completes (in ModelRouter or the main
|
| 153 |
+
generate() method). quality_score comes from LLMJudge if evaluate=True.
|
| 154 |
+
|
| 155 |
+
Args:
|
| 156 |
+
model_id: The model that was used.
|
| 157 |
+
latency_ms: Actual end-to-end latency.
|
| 158 |
+
success: Whether the API call succeeded.
|
| 159 |
+
quality_score: Optional 1β10 quality score from LLMJudge.
|
| 160 |
+
cost_usd: Actual cost of the request.
|
| 161 |
+
"""
|
| 162 |
+
self._runtime.record_outcome(
|
| 163 |
+
model_id=model_id,
|
| 164 |
+
latency_ms=latency_ms,
|
| 165 |
+
success=success,
|
| 166 |
+
quality_score=quality_score,
|
| 167 |
+
cost_usd=cost_usd,
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
# ββ Key management ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 171 |
+
|
| 172 |
+
def update_keys(self, keys: dict):
|
| 173 |
+
"""
|
| 174 |
+
Update available API keys mid-session.
|
| 175 |
+
Use this when keys are passed per-request (BYOK REST API mode).
|
| 176 |
+
"""
|
| 177 |
+
resolved = self._resolve_keys(keys)
|
| 178 |
+
self._engine.update_available_keys(resolved)
|
| 179 |
+
|
| 180 |
+
# ββ Observability βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 181 |
+
|
| 182 |
+
def explain(
|
| 183 |
+
self,
|
| 184 |
+
query_features,
|
| 185 |
+
budget_mode: str = "balanced",
|
| 186 |
+
constraints: dict = None,
|
| 187 |
+
) -> str:
|
| 188 |
+
"""
|
| 189 |
+
Dry-run routing β returns formatted explanation without making an API call.
|
| 190 |
+
Drop-in replacement for old client.explain() method.
|
| 191 |
+
"""
|
| 192 |
+
decision = self.route(query_features, budget_mode, constraints)
|
| 193 |
+
return self._format_explanation(decision)
|
| 194 |
+
|
| 195 |
+
def get_registry_info(self) -> dict:
|
| 196 |
+
"""Returns registry runtime metadata."""
|
| 197 |
+
return self._registry.get_registry().get("_runtime_meta", {})
|
| 198 |
+
|
| 199 |
+
def get_model_stats(self, model_id: str) -> dict:
|
| 200 |
+
"""Returns runtime stats for a specific model."""
|
| 201 |
+
return self._runtime.get_stats_summary(model_id)
|
| 202 |
+
|
| 203 |
+
def save_runtime_stats(self):
|
| 204 |
+
"""Persist runtime stats to disk (call on shutdown)."""
|
| 205 |
+
self._runtime.save()
|
| 206 |
+
|
| 207 |
+
# ββ Internal helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 208 |
+
|
| 209 |
+
@staticmethod
|
| 210 |
+
def _resolve_keys(keys: dict) -> dict:
|
| 211 |
+
"""
|
| 212 |
+
Merge provided keys with environment variables.
|
| 213 |
+
Provided keys take precedence over env vars.
|
| 214 |
+
"""
|
| 215 |
+
env_map = {
|
| 216 |
+
"openai": "OPENAI_API_KEY",
|
| 217 |
+
"anthropic": "ANTHROPIC_API_KEY",
|
| 218 |
+
"google": "GEMINI_API_KEY",
|
| 219 |
+
"mistral": "MISTRAL_API_KEY",
|
| 220 |
+
"deepseek": "DEEPSEEK_API_KEY",
|
| 221 |
+
}
|
| 222 |
+
resolved = {}
|
| 223 |
+
for provider, env_var in env_map.items():
|
| 224 |
+
# Explicit key takes priority; fall back to env
|
| 225 |
+
val = keys.get(provider) or os.getenv(env_var, "")
|
| 226 |
+
if val and not val.startswith("your_") and val != env_var:
|
| 227 |
+
resolved[provider] = val
|
| 228 |
+
|
| 229 |
+
# Pass through any extra keys provided (custom providers)
|
| 230 |
+
for k, v in keys.items():
|
| 231 |
+
if k not in resolved and v and not str(v).startswith("your_"):
|
| 232 |
+
resolved[k] = v
|
| 233 |
+
|
| 234 |
+
return resolved
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
@staticmethod
|
| 238 |
+
def _format_explanation(decision: RoutingDecision) -> str:
|
| 239 |
+
"""Formats a RoutingDecision as a human-readable explanation string."""
|
| 240 |
+
ex = decision.explanation
|
| 241 |
+
dims = ex.get("query_dimensions", {})
|
| 242 |
+
shortlist = ex.get("shortlist", [])
|
| 243 |
+
|
| 244 |
+
lines = [
|
| 245 |
+
"=" * 55,
|
| 246 |
+
"LLMOpt β Routing Decision",
|
| 247 |
+
"=" * 55,
|
| 248 |
+
f"Selected model : {decision.model_id} ({decision.provider})",
|
| 249 |
+
f"Utility score : {decision.utility_score:.4f}",
|
| 250 |
+
f"Estimated cost : ${decision.estimated_cost:.6f}",
|
| 251 |
+
f"Budget mode : {ex.get('budget_mode')} (Ξ»={ex.get('lambda')})",
|
| 252 |
+
f"Primary domain : {ex.get('primary_domain')}",
|
| 253 |
+
"",
|
| 254 |
+
"Query dimensions (active weights):",
|
| 255 |
+
]
|
| 256 |
+
for dim, weight in dims.items():
|
| 257 |
+
lines.append(f" {dim:<25} weight={weight:.2f}")
|
| 258 |
+
|
| 259 |
+
if shortlist:
|
| 260 |
+
lines += ["", "Top candidates:"]
|
| 261 |
+
for item in shortlist:
|
| 262 |
+
lines.append(
|
| 263 |
+
f" {item['model_id']:<35} U={item['utility_score']:.4f} "
|
| 264 |
+
f"cap={item['capability']:.3f} ${item['est_cost_usd']:.6f}"
|
| 265 |
+
)
|
| 266 |
+
|
| 267 |
+
if decision.fallback_model_id:
|
| 268 |
+
lines.append(f"\nFallback model : {decision.fallback_model_id}")
|
| 269 |
+
|
| 270 |
+
adj = ex.get("runtime_adjustment")
|
| 271 |
+
if adj:
|
| 272 |
+
lines.append(f"Runtime adj : {adj:+.4f} (from observed outcomes)")
|
| 273 |
+
|
| 274 |
+
lines.append("=" * 55)
|
| 275 |
+
return "\n".join(lines)
|
llmopt/engine/optimization_engine.py
CHANGED
|
@@ -240,12 +240,19 @@ class OptimizationEngine:
|
|
| 240 |
complexity: ComplexityResult,
|
| 241 |
output_length_bucket: str,
|
| 242 |
constraints: Optional[UserConstraints] = None,
|
|
|
|
|
|
|
|
|
|
| 243 |
) -> OptimizationResult:
|
| 244 |
if constraints is None:
|
| 245 |
constraints = UserConstraints()
|
| 246 |
|
| 247 |
-
|
| 248 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
|
| 250 |
# --- 1. Build candidate set ---
|
| 251 |
candidates = self.registry.capable_of(
|
|
|
|
| 240 |
complexity: ComplexityResult,
|
| 241 |
output_length_bucket: str,
|
| 242 |
constraints: Optional[UserConstraints] = None,
|
| 243 |
+
alpha: Optional[float] = None,
|
| 244 |
+
beta: Optional[float] = None,
|
| 245 |
+
gamma: Optional[float] = None,
|
| 246 |
) -> OptimizationResult:
|
| 247 |
if constraints is None:
|
| 248 |
constraints = UserConstraints()
|
| 249 |
|
| 250 |
+
if alpha is not None and beta is not None and gamma is not None:
|
| 251 |
+
Ξ±, Ξ², Ξ³ = alpha, beta, gamma
|
| 252 |
+
logger.debug(f"Using custom weights Ξ±={Ξ±:.3f} Ξ²={Ξ²:.3f} Ξ³={Ξ³:.3f}")
|
| 253 |
+
else:
|
| 254 |
+
Ξ±, Ξ², Ξ³ = self.bayes.get_weights(constraints.budget_mode)
|
| 255 |
+
logger.debug(f"Using weights Ξ±={Ξ±:.3f} Ξ²={Ξ²:.3f} Ξ³={Ξ³:.3f} for mode '{constraints.budget_mode}'")
|
| 256 |
|
| 257 |
# --- 1. Build candidate set ---
|
| 258 |
candidates = self.registry.capable_of(
|
llmopt/engine/utility_engine.py
ADDED
|
@@ -0,0 +1,665 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
LLMOpt β Utility-Based Optimization Engine (V2 Drop-in Replacement)
|
| 3 |
+
====================================================================
|
| 4 |
+
|
| 5 |
+
Replaces the old J(x) = Ξ±Β·Cost + Ξ²Β·Tokens β Ξ³Β·Quality complexity-routing engine.
|
| 6 |
+
|
| 7 |
+
NEW APPROACH β Utility-Constrained Routing:
|
| 8 |
+
--------------------------------------------
|
| 9 |
+
Instead of routing on query complexity alone, the engine:
|
| 10 |
+
|
| 11 |
+
1. Resolves available models from the user's actual API keys (BYOK)
|
| 12 |
+
2. Applies hard constraints (context window, required features, cost cap)
|
| 13 |
+
3. Builds a query utility profile β what dimensions matter FOR THIS QUERY
|
| 14 |
+
4. Scores each candidate: U(m, q) = Ξ£ wα΅’ Β· capabilityα΅’(m) β Ξ» Β· cost_norm(m)
|
| 15 |
+
where weights wα΅’ come from the query profile, not global defaults
|
| 16 |
+
5. Returns the best model + fallback + full explanation
|
| 17 |
+
|
| 18 |
+
Key differences from old engine:
|
| 19 |
+
- Routing is driven by WHAT THE QUERY NEEDS, not a global complexity score
|
| 20 |
+
- Only models with available API keys are considered (BYOK)
|
| 21 |
+
- Weights are query-derived, not budget-mode static
|
| 22 |
+
- Budget mode adjusts Ξ» (cost penalty), not the capability weights
|
| 23 |
+
- No Bayesian/Optuna dependency β deterministic, debuggable, stable
|
| 24 |
+
|
| 25 |
+
Drop-in interface:
|
| 26 |
+
engine = UtilityOptimizationEngine(available_keys={"openai": "sk-...", ...})
|
| 27 |
+
result = engine.route(query_features, budget_mode="balanced", constraints={})
|
| 28 |
+
# result.model_id, result.score, result.explanation, result.fallback_model_id
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
import math
|
| 32 |
+
import logging
|
| 33 |
+
from dataclasses import dataclass, field
|
| 34 |
+
from typing import Optional
|
| 35 |
+
|
| 36 |
+
logger = logging.getLogger(__name__)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
# ββ Data contracts (same shape as old engine output) βββββββββββββββββββββββββ
|
| 40 |
+
|
| 41 |
+
@dataclass
|
| 42 |
+
class QueryUtilityProfile:
|
| 43 |
+
"""
|
| 44 |
+
What this query actually needs β extracted from QueryFeatures.
|
| 45 |
+
Each weight β [0.0, 1.0] indicating how important that dimension is.
|
| 46 |
+
Weights do NOT need to sum to 1 β they're importance signals, not probabilities.
|
| 47 |
+
"""
|
| 48 |
+
# Capability dimension weights
|
| 49 |
+
reasoning: float = 0.0
|
| 50 |
+
coding: float = 0.0
|
| 51 |
+
math: float = 0.0
|
| 52 |
+
creativity: float = 0.0
|
| 53 |
+
factuality: float = 0.0
|
| 54 |
+
instruction_following: float = 0.5 # always baseline important
|
| 55 |
+
long_context: float = 0.0
|
| 56 |
+
multilingual: float = 0.0
|
| 57 |
+
tool_use: float = 0.0
|
| 58 |
+
summarization: float = 0.0
|
| 59 |
+
conversation: float = 0.0
|
| 60 |
+
|
| 61 |
+
# Hard requirements (boolean)
|
| 62 |
+
requires_tool_calling: bool = False
|
| 63 |
+
requires_image_input: bool = False
|
| 64 |
+
requires_json_mode: bool = False
|
| 65 |
+
min_context_tokens: int = 0
|
| 66 |
+
|
| 67 |
+
# Estimated token budget for this query
|
| 68 |
+
estimated_input_tokens: int = 500
|
| 69 |
+
estimated_output_tokens: int = 500
|
| 70 |
+
|
| 71 |
+
# Primary domain label (for logging/explainability)
|
| 72 |
+
primary_domain: str = "general"
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
@dataclass
|
| 76 |
+
class RoutingDecision:
|
| 77 |
+
"""
|
| 78 |
+
Output of the engine β same fields the old OptimizationEngine returned,
|
| 79 |
+
plus richer explanation. Drop-in compatible.
|
| 80 |
+
"""
|
| 81 |
+
model_id: str
|
| 82 |
+
provider: str
|
| 83 |
+
utility_score: float # U(m,q) β higher is better
|
| 84 |
+
estimated_cost: float # USD for this request
|
| 85 |
+
input_cost_per_1k: float
|
| 86 |
+
output_cost_per_1k: float
|
| 87 |
+
context_window: int
|
| 88 |
+
fallback_model_id: Optional[str] = None
|
| 89 |
+
fallback_provider: Optional[str] = None
|
| 90 |
+
|
| 91 |
+
# Explainability β replaces old "rationale" string
|
| 92 |
+
explanation: dict = field(default_factory=dict)
|
| 93 |
+
|
| 94 |
+
# Mirrors old engine fields for pipeline compatibility
|
| 95 |
+
capability_score: float = 0.0 # overall capability of selected model
|
| 96 |
+
complexity_score: float = 0.0 # pass-through from QueryFeatures if available
|
| 97 |
+
tokens_saved: int = 0
|
| 98 |
+
compression_ratio: float = 0.0
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
# ββ Budget mode β cost penalty Ξ» βββββββββββββββββββββββββββββββββββββββββββββ
|
| 102 |
+
|
| 103 |
+
BUDGET_LAMBDA = {
|
| 104 |
+
# Ξ» scales how much cost penalizes utility score
|
| 105 |
+
# higher Ξ» = cost matters more = cheaper models win more often
|
| 106 |
+
"cheap": 3.0,
|
| 107 |
+
"balanced": 1.2,
|
| 108 |
+
"quality": 0.3,
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
# Minimum acceptable utility score for a model to be considered
|
| 112 |
+
# (filters out wildly incapable models even if they're the only ones available)
|
| 113 |
+
MIN_UTILITY_THRESHOLD = 0.25
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
# ββ Core Engine βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 117 |
+
|
| 118 |
+
class UtilityOptimizationEngine:
|
| 119 |
+
"""
|
| 120 |
+
Drop-in replacement for the old OptimizationEngine.
|
| 121 |
+
|
| 122 |
+
Instantiate once per request session (or per server lifecycle if keys are static).
|
| 123 |
+
Keys can be updated between requests via update_available_keys().
|
| 124 |
+
"""
|
| 125 |
+
|
| 126 |
+
def __init__(
|
| 127 |
+
self,
|
| 128 |
+
registry_updater, # HybridRegistryUpdater instance
|
| 129 |
+
available_keys: dict, # {"openai": "sk-...", "anthropic": "sk-ant-..."}
|
| 130 |
+
include_ollama: bool = True # whether local Ollama counts as available
|
| 131 |
+
):
|
| 132 |
+
self._updater = registry_updater
|
| 133 |
+
self._available_keys = {}
|
| 134 |
+
self._include_ollama = include_ollama
|
| 135 |
+
self.update_available_keys(available_keys)
|
| 136 |
+
|
| 137 |
+
def update_available_keys(self, keys: dict):
|
| 138 |
+
"""
|
| 139 |
+
Call this when user's API keys change.
|
| 140 |
+
keys format: {"openai": "sk-...", "anthropic": "...", "ollama": "local", ...}
|
| 141 |
+
Providers with empty/None values are treated as unavailable.
|
| 142 |
+
|
| 143 |
+
Ollama is included only if:
|
| 144 |
+
- "ollama" key is explicitly in keys dict, OR
|
| 145 |
+
- include_ollama=True AND keys dict is empty (no cloud keys at all)
|
| 146 |
+
This prevents Ollama from silently dominating routing when the user
|
| 147 |
+
only provided cloud API keys.
|
| 148 |
+
"""
|
| 149 |
+
self._available_keys = {
|
| 150 |
+
provider.lower(): key
|
| 151 |
+
for provider, key in keys.items()
|
| 152 |
+
if key and str(key).strip()
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
# Include Ollama only when explicitly requested or as last-resort fallback
|
| 156 |
+
ollama_explicitly_set = "ollama" in {k.lower() for k in keys}
|
| 157 |
+
no_cloud_keys = not any(
|
| 158 |
+
p in self._available_keys
|
| 159 |
+
for p in ("openai", "anthropic", "google", "mistral", "deepseek")
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
if self._include_ollama and (ollama_explicitly_set or no_cloud_keys):
|
| 163 |
+
self._available_keys.setdefault("ollama", "__local__")
|
| 164 |
+
|
| 165 |
+
logger.info(f"[Engine] Available providers: {list(self._available_keys.keys())}")
|
| 166 |
+
|
| 167 |
+
# ββ Main routing method βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 168 |
+
|
| 169 |
+
def route(
|
| 170 |
+
self,
|
| 171 |
+
query_features, # QueryFeatures dataclass from QueryAnalyzer
|
| 172 |
+
budget_mode: str = "balanced",
|
| 173 |
+
constraints: dict = None,
|
| 174 |
+
force_refresh_registry: bool = False,
|
| 175 |
+
) -> RoutingDecision:
|
| 176 |
+
"""
|
| 177 |
+
Route a query to the best available model.
|
| 178 |
+
|
| 179 |
+
Args:
|
| 180 |
+
query_features: Output of QueryAnalyzer (QueryFeatures dataclass or dict)
|
| 181 |
+
budget_mode: "cheap" | "balanced" | "quality"
|
| 182 |
+
constraints: Optional hard overrides:
|
| 183 |
+
max_cost_per_request: float (USD)
|
| 184 |
+
max_latency_ms: int
|
| 185 |
+
min_context_tokens: int
|
| 186 |
+
exclude_providers: list[str]
|
| 187 |
+
only_providers: list[str]
|
| 188 |
+
require_tool_calling: bool
|
| 189 |
+
require_image_input: bool
|
| 190 |
+
require_json_mode: bool
|
| 191 |
+
force_refresh_registry: Force live registry refresh
|
| 192 |
+
|
| 193 |
+
Returns:
|
| 194 |
+
RoutingDecision (drop-in compatible with old engine output)
|
| 195 |
+
"""
|
| 196 |
+
constraints = constraints or {}
|
| 197 |
+
budget_mode = budget_mode if budget_mode in BUDGET_LAMBDA else "balanced"
|
| 198 |
+
|
| 199 |
+
# 1. Get merged registry (baseline + live patch)
|
| 200 |
+
registry = self._updater.get_registry(force_refresh=force_refresh_registry)
|
| 201 |
+
all_models = registry.get("models", {})
|
| 202 |
+
|
| 203 |
+
# 2. Build query utility profile from query features
|
| 204 |
+
profile = self._build_utility_profile(query_features, constraints)
|
| 205 |
+
|
| 206 |
+
# 3. Resolve available candidate pool (BYOK filter)
|
| 207 |
+
candidates = self._filter_by_availability(all_models, constraints)
|
| 208 |
+
|
| 209 |
+
if not candidates:
|
| 210 |
+
raise RuntimeError(
|
| 211 |
+
"No models available. Please provide at least one valid API key "
|
| 212 |
+
"(OpenAI, Anthropic, Google, Mistral, DeepSeek) or run Ollama locally."
|
| 213 |
+
)
|
| 214 |
+
|
| 215 |
+
# 4. Apply hard constraints (context window, features, cost cap)
|
| 216 |
+
viable = self._apply_hard_constraints(candidates, profile, constraints)
|
| 217 |
+
|
| 218 |
+
if not viable:
|
| 219 |
+
# Relax hard constraints partially β fall back to best available
|
| 220 |
+
logger.warning(
|
| 221 |
+
"[Engine] No models passed hard constraints. "
|
| 222 |
+
"Relaxing cost/latency caps and retrying."
|
| 223 |
+
)
|
| 224 |
+
relaxed_constraints = {
|
| 225 |
+
k: v for k, v in constraints.items()
|
| 226 |
+
if k not in ("max_cost_per_request", "max_latency_ms")
|
| 227 |
+
}
|
| 228 |
+
viable = self._apply_hard_constraints(candidates, profile, relaxed_constraints)
|
| 229 |
+
|
| 230 |
+
if not viable:
|
| 231 |
+
# Last resort: use all available candidates
|
| 232 |
+
logger.warning("[Engine] Using all available candidates (no constraints).")
|
| 233 |
+
viable = candidates
|
| 234 |
+
|
| 235 |
+
# 5. Score each viable model by utility
|
| 236 |
+
scored = self._score_candidates(viable, profile, budget_mode)
|
| 237 |
+
|
| 238 |
+
if not scored:
|
| 239 |
+
raise RuntimeError("Scoring produced no results. Check model registry integrity.")
|
| 240 |
+
|
| 241 |
+
# Sort: highest utility first
|
| 242 |
+
scored.sort(key=lambda x: x[1], reverse=True)
|
| 243 |
+
|
| 244 |
+
best_id, best_score = scored[0]
|
| 245 |
+
best_spec = viable[best_id]
|
| 246 |
+
|
| 247 |
+
fallback_id = None
|
| 248 |
+
fallback_prov = None
|
| 249 |
+
if len(scored) > 1:
|
| 250 |
+
fallback_id = scored[1][0]
|
| 251 |
+
fallback_prov = viable[fallback_id]["provider"]
|
| 252 |
+
|
| 253 |
+
# 6. Estimate request cost
|
| 254 |
+
est_cost = self._estimate_cost(best_spec, profile)
|
| 255 |
+
|
| 256 |
+
# 7. Build explanation
|
| 257 |
+
explanation = self._build_explanation(
|
| 258 |
+
scored, viable, profile, budget_mode, constraints, best_id
|
| 259 |
+
)
|
| 260 |
+
|
| 261 |
+
return RoutingDecision(
|
| 262 |
+
model_id=best_id,
|
| 263 |
+
provider=best_spec["provider"],
|
| 264 |
+
utility_score=round(best_score, 4),
|
| 265 |
+
estimated_cost=round(est_cost, 8),
|
| 266 |
+
input_cost_per_1k=best_spec["input_cost_per_1k"],
|
| 267 |
+
output_cost_per_1k=best_spec["output_cost_per_1k"],
|
| 268 |
+
context_window=best_spec["context_window"],
|
| 269 |
+
fallback_model_id=fallback_id,
|
| 270 |
+
fallback_provider=fallback_prov,
|
| 271 |
+
explanation=explanation,
|
| 272 |
+
capability_score=self._overall_capability(best_spec),
|
| 273 |
+
)
|
| 274 |
+
|
| 275 |
+
# ββ Step 2: Build Query Utility Profile βββββββββββββββββββββββββββββββββββ
|
| 276 |
+
|
| 277 |
+
def _build_utility_profile(self, qf, constraints: dict) -> QueryUtilityProfile:
|
| 278 |
+
"""
|
| 279 |
+
Convert QueryFeatures β QueryUtilityProfile.
|
| 280 |
+
Works with both QueryFeatures dataclass and plain dict.
|
| 281 |
+
Uses domain flags to derive per-dimension importance weights.
|
| 282 |
+
"""
|
| 283 |
+
|
| 284 |
+
# Normalize input β support both dataclass and dict
|
| 285 |
+
def g(attr, default=False):
|
| 286 |
+
if isinstance(qf, dict):
|
| 287 |
+
return qf.get(attr, default)
|
| 288 |
+
return getattr(qf, attr, default)
|
| 289 |
+
|
| 290 |
+
profile = QueryUtilityProfile()
|
| 291 |
+
|
| 292 |
+
# ββ Dimension weights from domain flags ββββββββββββββββββββββββββββββ
|
| 293 |
+
# These are NOT boolean β they express HOW IMPORTANT each dim is.
|
| 294 |
+
# Multiple domains can be active simultaneously.
|
| 295 |
+
|
| 296 |
+
if g("domain_reasoning") or g("requires_analysis") or g("requires_debate"):
|
| 297 |
+
profile.reasoning = 0.85
|
| 298 |
+
profile.factuality = 0.70
|
| 299 |
+
|
| 300 |
+
if g("domain_code") or g("domain_coding"):
|
| 301 |
+
profile.coding = 0.90
|
| 302 |
+
profile.reasoning = max(profile.reasoning, 0.60)
|
| 303 |
+
profile.instruction_following = max(profile.instruction_following, 0.70)
|
| 304 |
+
|
| 305 |
+
if g("domain_math"):
|
| 306 |
+
profile.math = 0.90
|
| 307 |
+
profile.reasoning = max(profile.reasoning, 0.70)
|
| 308 |
+
|
| 309 |
+
if g("domain_creative") or g("domain_creative_writing"):
|
| 310 |
+
profile.creativity = 0.88
|
| 311 |
+
profile.instruction_following = max(profile.instruction_following, 0.60)
|
| 312 |
+
|
| 313 |
+
if g("domain_factual") or g("domain_science"):
|
| 314 |
+
profile.factuality = max(profile.factuality, 0.80)
|
| 315 |
+
profile.reasoning = max(profile.reasoning, 0.55)
|
| 316 |
+
|
| 317 |
+
if g("domain_summarization"):
|
| 318 |
+
profile.summarization = 0.85
|
| 319 |
+
profile.long_context = 0.60
|
| 320 |
+
|
| 321 |
+
if g("domain_translation") or g("domain_multilingual"):
|
| 322 |
+
profile.multilingual = 0.90
|
| 323 |
+
profile.factuality = max(profile.factuality, 0.60)
|
| 324 |
+
|
| 325 |
+
if g("domain_conversational") or g("domain_factual"):
|
| 326 |
+
profile.conversation = 0.70
|
| 327 |
+
|
| 328 |
+
# Multi-step / complex reasoning boost
|
| 329 |
+
if g("multi_step") or g("requires_comparison"):
|
| 330 |
+
profile.reasoning = min(1.0, profile.reasoning + 0.15)
|
| 331 |
+
|
| 332 |
+
# Expert-level signal β raise the bar on all active dimensions
|
| 333 |
+
if g("_expert_signal") or g("expert_signal"):
|
| 334 |
+
for dim in ["reasoning", "coding", "math"]:
|
| 335 |
+
val = getattr(profile, dim)
|
| 336 |
+
if val > 0:
|
| 337 |
+
setattr(profile, dim, min(1.0, val + 0.10))
|
| 338 |
+
|
| 339 |
+
# Tool use requirement
|
| 340 |
+
if g("requires_tool_use") or g("has_tool_calls") or constraints.get("require_tool_calling"):
|
| 341 |
+
profile.tool_use = 0.80
|
| 342 |
+
profile.requires_tool_calling = True
|
| 343 |
+
|
| 344 |
+
# Image input requirement
|
| 345 |
+
if g("has_image") or constraints.get("require_image_input"):
|
| 346 |
+
profile.requires_image_input = True
|
| 347 |
+
|
| 348 |
+
# JSON mode requirement
|
| 349 |
+
if g("requires_json") or constraints.get("require_json_mode"):
|
| 350 |
+
profile.requires_json_mode = True
|
| 351 |
+
|
| 352 |
+
# Context window requirement
|
| 353 |
+
token_count = g("token_count", 0)
|
| 354 |
+
min_ctx = constraints.get("min_context_tokens", 0)
|
| 355 |
+
profile.min_context_tokens = max(
|
| 356 |
+
int(min_ctx),
|
| 357 |
+
int(token_count * 3) # conservative: input tokens Γ 3 headroom
|
| 358 |
+
)
|
| 359 |
+
profile.estimated_input_tokens = max(int(token_count), 100)
|
| 360 |
+
|
| 361 |
+
# Estimate output length
|
| 362 |
+
output_len_map = {
|
| 363 |
+
"short": 300,
|
| 364 |
+
"medium": 700,
|
| 365 |
+
"long": 1500,
|
| 366 |
+
"very_long": 3000,
|
| 367 |
+
}
|
| 368 |
+
est_output = g("estimated_output_length", "medium")
|
| 369 |
+
profile.estimated_output_tokens = output_len_map.get(
|
| 370 |
+
str(est_output).lower(), 700
|
| 371 |
+
)
|
| 372 |
+
|
| 373 |
+
# Primary domain label
|
| 374 |
+
domain_priority = [
|
| 375 |
+
("domain_code", "coding"),
|
| 376 |
+
("domain_coding", "coding"),
|
| 377 |
+
("domain_math", "math"),
|
| 378 |
+
("domain_reasoning", "reasoning"),
|
| 379 |
+
("domain_creative", "creative"),
|
| 380 |
+
("domain_science", "science"),
|
| 381 |
+
("domain_summarization", "summarization"),
|
| 382 |
+
("domain_translation", "translation"),
|
| 383 |
+
("domain_factual", "factual"),
|
| 384 |
+
]
|
| 385 |
+
for flag, label in domain_priority:
|
| 386 |
+
if g(flag):
|
| 387 |
+
profile.primary_domain = label
|
| 388 |
+
break
|
| 389 |
+
|
| 390 |
+
return profile
|
| 391 |
+
|
| 392 |
+
# ββ Step 3: BYOK Provider Filter βββββββββββββββββββββββββββββββββββββββββ
|
| 393 |
+
|
| 394 |
+
def _filter_by_availability(self, all_models: dict, constraints: dict) -> dict:
|
| 395 |
+
"""
|
| 396 |
+
Filter models to only those whose provider has an available API key.
|
| 397 |
+
|
| 398 |
+
Respects:
|
| 399 |
+
- available_keys (BYOK)
|
| 400 |
+
- constraints["exclude_providers"]
|
| 401 |
+
- constraints["only_providers"]
|
| 402 |
+
"""
|
| 403 |
+
exclude = {p.lower() for p in constraints.get("exclude_providers", [])}
|
| 404 |
+
only = {p.lower() for p in constraints.get("only_providers", [])} \
|
| 405 |
+
if constraints.get("only_providers") else None
|
| 406 |
+
|
| 407 |
+
available = {}
|
| 408 |
+
for mid, spec in all_models.items():
|
| 409 |
+
provider = spec.get("provider", "").lower()
|
| 410 |
+
|
| 411 |
+
# Must have a key for this provider
|
| 412 |
+
if provider not in self._available_keys:
|
| 413 |
+
continue
|
| 414 |
+
|
| 415 |
+
# Respect exclude list
|
| 416 |
+
if provider in exclude:
|
| 417 |
+
continue
|
| 418 |
+
|
| 419 |
+
# Respect only list
|
| 420 |
+
if only and provider not in only:
|
| 421 |
+
continue
|
| 422 |
+
|
| 423 |
+
available[mid] = spec
|
| 424 |
+
|
| 425 |
+
logger.debug(
|
| 426 |
+
f"[Engine] Available candidate pool: {len(available)} models "
|
| 427 |
+
f"from providers: {set(s['provider'] for s in available.values())}"
|
| 428 |
+
)
|
| 429 |
+
return available
|
| 430 |
+
|
| 431 |
+
# ββ Step 4: Hard Constraints Filter ββββββββββββββββββββββββββββββββββββββ
|
| 432 |
+
|
| 433 |
+
def _apply_hard_constraints(
|
| 434 |
+
self, candidates: dict, profile: QueryUtilityProfile, constraints: dict
|
| 435 |
+
) -> dict:
|
| 436 |
+
"""
|
| 437 |
+
Filter candidates by hard constraints that are non-negotiable.
|
| 438 |
+
Returns a potentially empty dict β caller handles the empty case.
|
| 439 |
+
"""
|
| 440 |
+
viable = {}
|
| 441 |
+
max_cost = constraints.get("max_cost_per_request") # USD
|
| 442 |
+
max_latency = constraints.get("max_latency_ms") # ms
|
| 443 |
+
|
| 444 |
+
for mid, spec in candidates.items():
|
| 445 |
+
# Context window check
|
| 446 |
+
if spec.get("context_window", 0) < profile.min_context_tokens:
|
| 447 |
+
logger.debug(f"[Filter] {mid}: context too small "
|
| 448 |
+
f"({spec['context_window']} < {profile.min_context_tokens})")
|
| 449 |
+
continue
|
| 450 |
+
|
| 451 |
+
# Feature: tool calling
|
| 452 |
+
if profile.requires_tool_calling and not spec.get("features", {}).get("tool_calling"):
|
| 453 |
+
logger.debug(f"[Filter] {mid}: no tool_calling support")
|
| 454 |
+
continue
|
| 455 |
+
|
| 456 |
+
# Feature: image input
|
| 457 |
+
if profile.requires_image_input and not spec.get("features", {}).get("image_input"):
|
| 458 |
+
logger.debug(f"[Filter] {mid}: no image_input support")
|
| 459 |
+
continue
|
| 460 |
+
|
| 461 |
+
# Feature: json mode
|
| 462 |
+
if profile.requires_json_mode and not spec.get("features", {}).get("json_mode"):
|
| 463 |
+
logger.debug(f"[Filter] {mid}: no json_mode support")
|
| 464 |
+
continue
|
| 465 |
+
|
| 466 |
+
# Cost cap
|
| 467 |
+
if max_cost is not None:
|
| 468 |
+
est = self._estimate_cost(spec, profile)
|
| 469 |
+
if est > max_cost:
|
| 470 |
+
logger.debug(f"[Filter] {mid}: cost {est:.6f} > cap {max_cost}")
|
| 471 |
+
continue
|
| 472 |
+
|
| 473 |
+
# Latency cap
|
| 474 |
+
if max_latency is not None:
|
| 475 |
+
if spec.get("avg_latency_ms", 99999) > max_latency:
|
| 476 |
+
logger.debug(f"[Filter] {mid}: latency too high")
|
| 477 |
+
continue
|
| 478 |
+
|
| 479 |
+
viable[mid] = spec
|
| 480 |
+
|
| 481 |
+
return viable
|
| 482 |
+
|
| 483 |
+
# ββ Step 5: Utility Scoring βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 484 |
+
|
| 485 |
+
def _score_candidates(
|
| 486 |
+
self, candidates: dict, profile: QueryUtilityProfile, budget_mode: str
|
| 487 |
+
) -> list:
|
| 488 |
+
"""
|
| 489 |
+
Score each candidate model with:
|
| 490 |
+
|
| 491 |
+
U(m, q) = (Ξ£ wα΅’ Β· capα΅’(m)) / (Ξ£ wα΅’) β Ξ» Β· cost_norm(m)
|
| 492 |
+
|
| 493 |
+
where:
|
| 494 |
+
wα΅’ = importance weight for capability dimension i (from profile)
|
| 495 |
+
capα΅’(m) = model m's score on dimension i (0β1, from registry)
|
| 496 |
+
Ξ» = budget penalty (from BUDGET_LAMBDA)
|
| 497 |
+
cost_norm = model's estimated request cost normalized across candidates
|
| 498 |
+
|
| 499 |
+
Returns list of (model_id, utility_score) tuples.
|
| 500 |
+
"""
|
| 501 |
+
lam = BUDGET_LAMBDA[budget_mode]
|
| 502 |
+
|
| 503 |
+
# Dimension weights from profile
|
| 504 |
+
dimension_weights = {
|
| 505 |
+
"reasoning": profile.reasoning,
|
| 506 |
+
"coding": profile.coding,
|
| 507 |
+
"math": profile.math,
|
| 508 |
+
"creativity": profile.creativity,
|
| 509 |
+
"factuality": profile.factuality,
|
| 510 |
+
"instruction_following": profile.instruction_following,
|
| 511 |
+
"long_context": profile.long_context,
|
| 512 |
+
"multilingual": profile.multilingual,
|
| 513 |
+
"tool_use": profile.tool_use,
|
| 514 |
+
"summarization": profile.summarization,
|
| 515 |
+
"conversation": profile.conversation,
|
| 516 |
+
}
|
| 517 |
+
|
| 518 |
+
# Only keep dimensions with non-zero weight
|
| 519 |
+
active_dims = {k: w for k, w in dimension_weights.items() if w > 0}
|
| 520 |
+
total_weight = sum(active_dims.values())
|
| 521 |
+
if total_weight == 0:
|
| 522 |
+
# Pathological case: no signals β use instruction_following as baseline
|
| 523 |
+
active_dims = {"instruction_following": 1.0, "conversation": 0.5}
|
| 524 |
+
total_weight = 1.5
|
| 525 |
+
|
| 526 |
+
# Compute raw costs for normalization
|
| 527 |
+
costs = {
|
| 528 |
+
mid: self._estimate_cost(spec, profile)
|
| 529 |
+
for mid, spec in candidates.items()
|
| 530 |
+
}
|
| 531 |
+
|
| 532 |
+
# Log-scale normalization: separates $0.0001 from $0.003 from $0.020
|
| 533 |
+
# meaningfully β linear scale collapses these differences when one
|
| 534 |
+
# expensive model anchors the range.
|
| 535 |
+
# Free models (Ollama, cost=0) stay at cost_norm=0.
|
| 536 |
+
LOG_EPS = 1e-7 # prevents log(0); smaller than any real API cost
|
| 537 |
+
log_costs = {mid: math.log(c + LOG_EPS) for mid, c in costs.items()}
|
| 538 |
+
log_max = max(log_costs.values())
|
| 539 |
+
log_min = min(log_costs.values())
|
| 540 |
+
log_range = max(log_max - log_min, 1e-9)
|
| 541 |
+
|
| 542 |
+
scored = []
|
| 543 |
+
for mid, spec in candidates.items():
|
| 544 |
+
caps = spec.get("capabilities", {})
|
| 545 |
+
|
| 546 |
+
# Weighted capability sum
|
| 547 |
+
cap_sum = sum(
|
| 548 |
+
w * caps.get(dim, 0.0)
|
| 549 |
+
for dim, w in active_dims.items()
|
| 550 |
+
)
|
| 551 |
+
cap_score = cap_sum / total_weight # normalized to [0, 1]
|
| 552 |
+
|
| 553 |
+
# Cost normalization on log scale (0 = cheapest, 1 = most expensive)
|
| 554 |
+
cost_norm = (log_costs[mid] - log_min) / log_range
|
| 555 |
+
|
| 556 |
+
# Final utility
|
| 557 |
+
utility = cap_score - (lam * cost_norm) / (1 + lam)
|
| 558 |
+
# The division by (1+lam) prevents Ξ» from pushing utility below 0
|
| 559 |
+
# for genuinely capable but expensive models
|
| 560 |
+
|
| 561 |
+
# Provider-tier adjustment:
|
| 562 |
+
# Ollama (free, local) is great for "cheap" mode but should not
|
| 563 |
+
# dominate "balanced" or "quality" modes β local inference has
|
| 564 |
+
# higher latency variance and lower reliability than cloud APIs.
|
| 565 |
+
provider = spec.get("provider", "")
|
| 566 |
+
if provider == "ollama":
|
| 567 |
+
# At cheap(Ξ»=3): penaltyβ0.0 | balanced(Ξ»=1.2): β0.04 | quality(Ξ»=0.3): β0.10
|
| 568 |
+
ollama_penalty = 0.12 / (1 + lam)
|
| 569 |
+
utility -= ollama_penalty
|
| 570 |
+
|
| 571 |
+
if utility >= MIN_UTILITY_THRESHOLD or len(candidates) <= 2:
|
| 572 |
+
scored.append((mid, utility))
|
| 573 |
+
|
| 574 |
+
logger.debug(
|
| 575 |
+
f"[Score] {mid}: cap={cap_score:.3f} cost_norm={cost_norm:.3f} "
|
| 576 |
+
f"U={utility:.4f} (Ξ»={lam})"
|
| 577 |
+
)
|
| 578 |
+
|
| 579 |
+
return scored
|
| 580 |
+
|
| 581 |
+
# ββ Helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 582 |
+
|
| 583 |
+
def _estimate_cost(self, spec: dict, profile: QueryUtilityProfile) -> float:
|
| 584 |
+
"""Estimate USD cost for one request with this model."""
|
| 585 |
+
in_cost = spec.get("input_cost_per_1k", 0) * profile.estimated_input_tokens / 1000
|
| 586 |
+
out_cost = spec.get("output_cost_per_1k", 0) * profile.estimated_output_tokens / 1000
|
| 587 |
+
return in_cost + out_cost
|
| 588 |
+
|
| 589 |
+
def _overall_capability(self, spec: dict) -> float:
|
| 590 |
+
"""Single overall capability score for a model (for legacy field compatibility)."""
|
| 591 |
+
caps = spec.get("capabilities", {})
|
| 592 |
+
weights = {"reasoning": 0.30, "coding": 0.25, "math": 0.15,
|
| 593 |
+
"instruction_following": 0.15, "factuality": 0.15}
|
| 594 |
+
return round(
|
| 595 |
+
sum(caps.get(k, 0) * w for k, w in weights.items()), 4
|
| 596 |
+
)
|
| 597 |
+
|
| 598 |
+
def _build_explanation(
|
| 599 |
+
self,
|
| 600 |
+
scored: list,
|
| 601 |
+
viable: dict,
|
| 602 |
+
profile: QueryUtilityProfile,
|
| 603 |
+
budget_mode: str,
|
| 604 |
+
constraints: dict,
|
| 605 |
+
winner_id: str,
|
| 606 |
+
) -> dict:
|
| 607 |
+
"""Build the full explainability dict β replaces old rationale string."""
|
| 608 |
+
winner_spec = viable[winner_id]
|
| 609 |
+
|
| 610 |
+
# Top 4 capability dimensions that drove this decision
|
| 611 |
+
dim_weights = {
|
| 612 |
+
"reasoning": profile.reasoning,
|
| 613 |
+
"coding": profile.coding,
|
| 614 |
+
"math": profile.math,
|
| 615 |
+
"creativity": profile.creativity,
|
| 616 |
+
"factuality": profile.factuality,
|
| 617 |
+
"instruction_following": profile.instruction_following,
|
| 618 |
+
"long_context": profile.long_context,
|
| 619 |
+
"multilingual": profile.multilingual,
|
| 620 |
+
"tool_use": profile.tool_use,
|
| 621 |
+
"summarization": profile.summarization,
|
| 622 |
+
"conversation": profile.conversation,
|
| 623 |
+
}
|
| 624 |
+
top_dims = sorted(
|
| 625 |
+
[(k, v) for k, v in dim_weights.items() if v > 0],
|
| 626 |
+
key=lambda x: x[1], reverse=True
|
| 627 |
+
)[:4]
|
| 628 |
+
|
| 629 |
+
# Shortlist with scores
|
| 630 |
+
shortlist = [
|
| 631 |
+
{
|
| 632 |
+
"model_id": mid,
|
| 633 |
+
"provider": viable[mid]["provider"],
|
| 634 |
+
"utility_score": round(score, 4),
|
| 635 |
+
"capability": self._overall_capability(viable[mid]),
|
| 636 |
+
"est_cost_usd": round(self._estimate_cost(viable[mid], profile), 8),
|
| 637 |
+
}
|
| 638 |
+
for mid, score in scored[:5]
|
| 639 |
+
]
|
| 640 |
+
|
| 641 |
+
return {
|
| 642 |
+
"selected_model": winner_id,
|
| 643 |
+
"provider": winner_spec["provider"],
|
| 644 |
+
"budget_mode": budget_mode,
|
| 645 |
+
"lambda": BUDGET_LAMBDA[budget_mode],
|
| 646 |
+
"primary_domain": profile.primary_domain,
|
| 647 |
+
"query_dimensions": {k: round(v, 2) for k, v in top_dims},
|
| 648 |
+
"hard_constraints_applied": {
|
| 649 |
+
k: v for k, v in constraints.items()
|
| 650 |
+
if k in ("max_cost_per_request", "max_latency_ms", "min_context_tokens")
|
| 651 |
+
},
|
| 652 |
+
"feature_requirements": {
|
| 653 |
+
"tool_calling": profile.requires_tool_calling,
|
| 654 |
+
"image_input": profile.requires_image_input,
|
| 655 |
+
"json_mode": profile.requires_json_mode,
|
| 656 |
+
"min_context": profile.min_context_tokens,
|
| 657 |
+
},
|
| 658 |
+
"estimated_tokens": {
|
| 659 |
+
"input": profile.estimated_input_tokens,
|
| 660 |
+
"output": profile.estimated_output_tokens,
|
| 661 |
+
},
|
| 662 |
+
"shortlist": shortlist,
|
| 663 |
+
"candidates_evaluated": len(scored),
|
| 664 |
+
"registry_source": winner_spec.get("live_patch", {}).get("source", "baseline"),
|
| 665 |
+
}
|
llmopt/registry/__init__.py
CHANGED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""LLMOpt registry package β hybrid model registry."""
|
| 2 |
+
from llmopt.registry.hybrid_updater import HybridRegistryUpdater
|
| 3 |
+
|
| 4 |
+
__all__ = ["HybridRegistryUpdater"]
|
llmopt/registry/hybrid_updater.py
ADDED
|
@@ -0,0 +1,267 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
LLMOpt β Hybrid Registry Updater
|
| 3 |
+
=================================
|
| 4 |
+
Strategy:
|
| 5 |
+
1. Load data/model_registry_v2.json as the authoritative capability baseline
|
| 6 |
+
(benchmark scores, context windows, feature support)
|
| 7 |
+
2. Fetch live data from OpenRouter API to patch:
|
| 8 |
+
- Current pricing (input/output cost per 1k)
|
| 9 |
+
- Model availability (is it still listed?)
|
| 10 |
+
- Any new models to flag for manual addition
|
| 11 |
+
3. Merge: registry baseline + live patch β runtime model pool
|
| 12 |
+
4. Cache the merged result for TTL minutes to avoid hammering the API
|
| 13 |
+
|
| 14 |
+
This runs at startup and on a background refresh cycle.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
import json
|
| 18 |
+
import time
|
| 19 |
+
import logging
|
| 20 |
+
import os
|
| 21 |
+
import copy
|
| 22 |
+
from datetime import datetime, timezone
|
| 23 |
+
from pathlib import Path
|
| 24 |
+
from typing import Optional
|
| 25 |
+
|
| 26 |
+
try:
|
| 27 |
+
import requests
|
| 28 |
+
REQUESTS_AVAILABLE = True
|
| 29 |
+
except ImportError:
|
| 30 |
+
REQUESTS_AVAILABLE = False
|
| 31 |
+
|
| 32 |
+
logger = logging.getLogger(__name__)
|
| 33 |
+
|
| 34 |
+
# ββ Constants ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 35 |
+
|
| 36 |
+
# V2 registry β benchmark-derived capability vectors
|
| 37 |
+
REGISTRY_PATH = Path(__file__).parent.parent.parent / "data" / "model_registry_v2.json"
|
| 38 |
+
|
| 39 |
+
OPENROUTER_MODELS_URL = "https://openrouter.ai/api/v1/models"
|
| 40 |
+
CACHE_TTL_SECONDS = 1800 # 30 minutes β pricing changes infrequently
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
# ββ Provider β OpenRouter prefix map βββββββββββββββββββββββββββββββββββββββββ
|
| 44 |
+
|
| 45 |
+
# OpenRouter model IDs follow the pattern: "provider/model-name"
|
| 46 |
+
# This maps our registry provider names to OpenRouter's prefix scheme
|
| 47 |
+
PROVIDER_PREFIX_MAP = {
|
| 48 |
+
"openai": "openai/",
|
| 49 |
+
"anthropic": "anthropic/",
|
| 50 |
+
"google": "google/",
|
| 51 |
+
"mistral": "mistral/",
|
| 52 |
+
"deepseek": "deepseek/",
|
| 53 |
+
"meta": "meta-llama/",
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
# Maps our registry model_id β OpenRouter model id (where they differ)
|
| 57 |
+
MODEL_ID_ALIASES = {
|
| 58 |
+
"gpt-4o": "openai/gpt-4o",
|
| 59 |
+
"gpt-4o-mini": "openai/gpt-4o-mini",
|
| 60 |
+
"gpt-4.1": "openai/gpt-4.1",
|
| 61 |
+
"gpt-4.1-mini": "openai/gpt-4.1-mini",
|
| 62 |
+
"claude-opus-4-5": "anthropic/claude-opus-4-5",
|
| 63 |
+
"claude-sonnet-4-5": "anthropic/claude-sonnet-4-5",
|
| 64 |
+
"claude-haiku-3-5": "anthropic/claude-3-5-haiku",
|
| 65 |
+
"gemini-2.5-pro": "google/gemini-2.5-pro",
|
| 66 |
+
"gemini-2.5-flash": "google/gemini-2.5-flash",
|
| 67 |
+
"gemini-1.5-flash": "google/gemini-2.5-flash",
|
| 68 |
+
"mistral-large-latest": "mistral/mistral-large",
|
| 69 |
+
"mistral-small-latest": "mistral/mistral-small",
|
| 70 |
+
"deepseek-chat": "deepseek/deepseek-chat",
|
| 71 |
+
"deepseek-reasoner": "deepseek/deepseek-r1",
|
| 72 |
+
# Ollama is local β no OpenRouter equivalent
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
# ββ Main Updater Class ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 77 |
+
|
| 78 |
+
class HybridRegistryUpdater:
|
| 79 |
+
"""
|
| 80 |
+
Loads the registry JSON baseline and patches it with live OpenRouter data.
|
| 81 |
+
|
| 82 |
+
Usage:
|
| 83 |
+
updater = HybridRegistryUpdater()
|
| 84 |
+
registry = updater.get_registry() # always returns a valid registry
|
| 85 |
+
"""
|
| 86 |
+
|
| 87 |
+
def __init__(self, openrouter_api_key: Optional[str] = None):
|
| 88 |
+
self._baseline: dict = {}
|
| 89 |
+
self._live_patch: dict = {} # openrouter_model_id β pricing dict
|
| 90 |
+
self._merged: dict = {} # final merged runtime registry
|
| 91 |
+
self._cache_timestamp: float = 0.0
|
| 92 |
+
self._openrouter_key = openrouter_api_key or os.getenv("OPENROUTER_API_KEY", "")
|
| 93 |
+
|
| 94 |
+
# Load baseline immediately (synchronous β always available)
|
| 95 |
+
self._load_baseline()
|
| 96 |
+
|
| 97 |
+
# ββ Public API ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 98 |
+
|
| 99 |
+
def get_registry(self, force_refresh: bool = False) -> dict:
|
| 100 |
+
"""
|
| 101 |
+
Returns the merged registry dict.
|
| 102 |
+
Refreshes live patch if cache is stale or force_refresh=True.
|
| 103 |
+
Falls back gracefully to baseline if live fetch fails.
|
| 104 |
+
"""
|
| 105 |
+
now = time.time()
|
| 106 |
+
cache_expired = (now - self._cache_timestamp) > CACHE_TTL_SECONDS
|
| 107 |
+
|
| 108 |
+
if force_refresh or cache_expired or not self._merged:
|
| 109 |
+
self._refresh_live_patch()
|
| 110 |
+
self._build_merged()
|
| 111 |
+
self._cache_timestamp = now
|
| 112 |
+
|
| 113 |
+
return self._merged
|
| 114 |
+
|
| 115 |
+
def get_model(self, model_id: str) -> Optional[dict]:
|
| 116 |
+
"""Returns a single model's merged spec, or None if not found."""
|
| 117 |
+
registry = self.get_registry()
|
| 118 |
+
return registry.get("models", {}).get(model_id)
|
| 119 |
+
|
| 120 |
+
def list_available_for_providers(self, available_providers: set) -> dict:
|
| 121 |
+
"""
|
| 122 |
+
Returns only models whose provider is in available_providers.
|
| 123 |
+
'ollama' is always included if it's in the registry (local, no key needed unless specified).
|
| 124 |
+
"""
|
| 125 |
+
registry = self.get_registry()
|
| 126 |
+
return {
|
| 127 |
+
mid: spec
|
| 128 |
+
for mid, spec in registry.get("models", {}).items()
|
| 129 |
+
if spec.get("provider") in available_providers
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
def get_last_updated(self) -> str:
|
| 133 |
+
return datetime.fromtimestamp(self._cache_timestamp, tz=timezone.utc).isoformat() \
|
| 134 |
+
if self._cache_timestamp else "never"
|
| 135 |
+
|
| 136 |
+
# ββ Internal: Load Baseline βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 137 |
+
|
| 138 |
+
def _load_baseline(self):
|
| 139 |
+
"""Load registry JSON from disk. Dies loudly if missing β it's required."""
|
| 140 |
+
if not REGISTRY_PATH.exists():
|
| 141 |
+
raise FileNotFoundError(
|
| 142 |
+
f"Model registry not found at {REGISTRY_PATH}. "
|
| 143 |
+
"This file is required for LLMOpt to function."
|
| 144 |
+
)
|
| 145 |
+
with open(REGISTRY_PATH, "r") as f:
|
| 146 |
+
self._baseline = json.load(f)
|
| 147 |
+
logger.info(
|
| 148 |
+
f"[Registry] Loaded baseline: {len(self._baseline.get('models', {}))} models"
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
# ββ Internal: Live Patch from OpenRouter βββββββββββββββββββββββββββββββββ
|
| 152 |
+
|
| 153 |
+
def _refresh_live_patch(self):
|
| 154 |
+
"""
|
| 155 |
+
Fetch current model list + pricing from OpenRouter.
|
| 156 |
+
Stores results in self._live_patch keyed by openrouter model id.
|
| 157 |
+
Silently skips on any error β baseline is always the fallback.
|
| 158 |
+
"""
|
| 159 |
+
if not REQUESTS_AVAILABLE:
|
| 160 |
+
logger.warning("[Registry] 'requests' not installed. Skipping live patch.")
|
| 161 |
+
return
|
| 162 |
+
|
| 163 |
+
headers = {"Content-Type": "application/json"}
|
| 164 |
+
if self._openrouter_key:
|
| 165 |
+
headers["Authorization"] = f"Bearer {self._openrouter_key}"
|
| 166 |
+
|
| 167 |
+
try:
|
| 168 |
+
resp = requests.get(
|
| 169 |
+
OPENROUTER_MODELS_URL,
|
| 170 |
+
headers=headers,
|
| 171 |
+
timeout=8
|
| 172 |
+
)
|
| 173 |
+
resp.raise_for_status()
|
| 174 |
+
data = resp.json()
|
| 175 |
+
except Exception as e:
|
| 176 |
+
logger.warning(f"[Registry] Live fetch failed: {e}. Using baseline only.")
|
| 177 |
+
return
|
| 178 |
+
|
| 179 |
+
patch = {}
|
| 180 |
+
for model in data.get("data", []):
|
| 181 |
+
model_id = model.get("id", "")
|
| 182 |
+
pricing = model.get("pricing", {})
|
| 183 |
+
|
| 184 |
+
# OpenRouter returns pricing as strings like "0.000002" per token
|
| 185 |
+
# We normalize to per-1k-token cost (float)
|
| 186 |
+
try:
|
| 187 |
+
input_per_token = float(pricing.get("prompt", 0) or 0)
|
| 188 |
+
output_per_token = float(pricing.get("completion", 0) or 0)
|
| 189 |
+
input_per_1k = round(input_per_token * 1000, 8)
|
| 190 |
+
output_per_1k = round(output_per_token * 1000, 8)
|
| 191 |
+
except (ValueError, TypeError):
|
| 192 |
+
continue
|
| 193 |
+
|
| 194 |
+
patch[model_id] = {
|
| 195 |
+
"input_cost_per_1k": input_per_1k,
|
| 196 |
+
"output_cost_per_1k": output_per_1k,
|
| 197 |
+
"context_window": model.get("context_length"),
|
| 198 |
+
"available_on_openrouter": True,
|
| 199 |
+
"fetched_at": datetime.now(timezone.utc).isoformat(),
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
self._live_patch = patch
|
| 203 |
+
logger.info(f"[Registry] Live patch: {len(patch)} models from OpenRouter")
|
| 204 |
+
|
| 205 |
+
# Flag new models we don't have in registry (for manual review)
|
| 206 |
+
known_or_ids = set(MODEL_ID_ALIASES.values())
|
| 207 |
+
for or_id in patch:
|
| 208 |
+
if or_id not in known_or_ids:
|
| 209 |
+
logger.debug(f"[Registry] Unknown OpenRouter model (not in registry): {or_id}")
|
| 210 |
+
|
| 211 |
+
# ββ Internal: Build Merged Registry ββββββββββββββββββββββββββββββββββββββ
|
| 212 |
+
|
| 213 |
+
def _build_merged(self):
|
| 214 |
+
"""
|
| 215 |
+
Merge baseline + live_patch into self._merged.
|
| 216 |
+
|
| 217 |
+
Merge rules:
|
| 218 |
+
- Capability scores: always from baseline (benchmark-sourced, stable)
|
| 219 |
+
- Feature support: always from baseline
|
| 220 |
+
- Pricing (cost/1k): live_patch wins if available, else baseline
|
| 221 |
+
- Context window: live_patch wins if non-null, else baseline
|
| 222 |
+
- live_patch metadata: stored in model["live_patch"] for observability
|
| 223 |
+
"""
|
| 224 |
+
merged = copy.deepcopy(self._baseline)
|
| 225 |
+
models = merged.get("models", {})
|
| 226 |
+
|
| 227 |
+
for our_model_id, spec in models.items():
|
| 228 |
+
# Find the OpenRouter ID for this model
|
| 229 |
+
or_id = MODEL_ID_ALIASES.get(our_model_id)
|
| 230 |
+
if or_id and or_id in self._live_patch:
|
| 231 |
+
patch = self._live_patch[or_id]
|
| 232 |
+
|
| 233 |
+
# Price override
|
| 234 |
+
if patch.get("input_cost_per_1k") is not None:
|
| 235 |
+
spec["input_cost_per_1k"] = patch["input_cost_per_1k"]
|
| 236 |
+
if patch.get("output_cost_per_1k") is not None:
|
| 237 |
+
spec["output_cost_per_1k"] = patch["output_cost_per_1k"]
|
| 238 |
+
|
| 239 |
+
# Context window override (OpenRouter may have more accurate values)
|
| 240 |
+
if patch.get("context_window"):
|
| 241 |
+
spec["context_window"] = patch["context_window"]
|
| 242 |
+
|
| 243 |
+
# Store patch metadata for explainability
|
| 244 |
+
spec["live_patch"] = {
|
| 245 |
+
"source": "openrouter",
|
| 246 |
+
"fetched_at": patch.get("fetched_at"),
|
| 247 |
+
"input_cost_per_1k": patch["input_cost_per_1k"],
|
| 248 |
+
"output_cost_per_1k": patch["output_cost_per_1k"],
|
| 249 |
+
}
|
| 250 |
+
else:
|
| 251 |
+
spec["live_patch"] = {"source": "baseline_only"}
|
| 252 |
+
|
| 253 |
+
merged["_runtime_meta"] = {
|
| 254 |
+
"last_live_fetch": datetime.now(timezone.utc).isoformat(),
|
| 255 |
+
"live_models_patched": sum(
|
| 256 |
+
1 for s in models.values()
|
| 257 |
+
if s.get("live_patch", {}).get("source") == "openrouter"
|
| 258 |
+
),
|
| 259 |
+
"total_models": len(models),
|
| 260 |
+
}
|
| 261 |
+
|
| 262 |
+
self._merged = merged
|
| 263 |
+
logger.info(
|
| 264 |
+
f"[Registry] Merged registry ready: "
|
| 265 |
+
f"{merged['_runtime_meta']['live_models_patched']} live-patched, "
|
| 266 |
+
f"{merged['_runtime_meta']['total_models']} total"
|
| 267 |
+
)
|
llmopt/router/model_router.py
CHANGED
|
@@ -53,26 +53,44 @@ class RoutedResponse:
|
|
| 53 |
|
| 54 |
# LiteLLM uses "provider/model" strings for non-OpenAI providers
|
| 55 |
_LITELLM_MODEL_MAP = {
|
| 56 |
-
# OpenAI
|
| 57 |
-
|
| 58 |
-
"gpt-4o
|
| 59 |
-
"gpt-
|
| 60 |
-
|
| 61 |
-
"
|
| 62 |
-
"
|
| 63 |
-
|
| 64 |
-
#
|
| 65 |
-
"
|
| 66 |
-
"
|
| 67 |
-
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
"mistral-large-latest": "mistral/mistral-large-latest",
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
#
|
| 73 |
-
"
|
| 74 |
-
"
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
}
|
| 77 |
|
| 78 |
_OLLAMA_PROVIDER = "ollama"
|
|
|
|
| 53 |
|
| 54 |
# LiteLLM uses "provider/model" strings for non-OpenAI providers
|
| 55 |
_LITELLM_MODEL_MAP = {
|
| 56 |
+
# ββ OpenAI βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 57 |
+
# no prefix needed for OpenAI models
|
| 58 |
+
"gpt-4o": "gpt-4o",
|
| 59 |
+
"gpt-4o-mini": "gpt-4o-mini",
|
| 60 |
+
"gpt-4.1": "gpt-4.1",
|
| 61 |
+
"gpt-4.1-mini": "gpt-4.1-mini",
|
| 62 |
+
"gpt-3.5-turbo": "gpt-3.5-turbo",
|
| 63 |
+
|
| 64 |
+
# ββ Anthropic ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 65 |
+
"claude-opus-4-5": "anthropic/claude-opus-4-5",
|
| 66 |
+
"claude-sonnet-4-5": "anthropic/claude-sonnet-4-5",
|
| 67 |
+
"claude-haiku-3-5": "anthropic/claude-3-5-haiku-20241022",
|
| 68 |
+
# Legacy Anthropic IDs (V1 registry)
|
| 69 |
+
"claude-3-5-haiku-20241022": "claude-3-5-haiku-20241022",
|
| 70 |
+
"claude-3-5-sonnet-20241022": "claude-3-5-sonnet-20241022",
|
| 71 |
+
"claude-3-haiku-20240307": "claude-3-haiku-20240307",
|
| 72 |
+
|
| 73 |
+
# ββ Google βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 74 |
+
"gemini-2.5-pro": "gemini/gemini-2.5-pro",
|
| 75 |
+
"gemini-2.5-flash": "gemini/gemini-2.5-flash",
|
| 76 |
+
"gemini-1.5-flash": "gemini/gemini-2.5-flash",
|
| 77 |
+
"gemini-1.5-pro": "gemini/gemini-2.5-pro",
|
| 78 |
+
|
| 79 |
+
# ββ Mistral ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 80 |
"mistral-large-latest": "mistral/mistral-large-latest",
|
| 81 |
+
"mistral-small-latest": "mistral/mistral-small-latest",
|
| 82 |
+
|
| 83 |
+
# ββ DeepSeek βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 84 |
+
"deepseek-chat": "deepseek/deepseek-chat",
|
| 85 |
+
"deepseek-reasoner": "deepseek/deepseek-reasoner",
|
| 86 |
+
|
| 87 |
+
# ββ Ollama (local) βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 88 |
+
"llama3.3-70b": "ollama/llama3.3:70b",
|
| 89 |
+
"llama3.2-vision": "ollama/llama3.2-vision",
|
| 90 |
+
# Legacy Ollama IDs (V1 registry)
|
| 91 |
+
"llama3.2:3b": "ollama/llama3.2:3b",
|
| 92 |
+
"llama3.1:8b": "ollama/llama3.1:8b",
|
| 93 |
+
"llama3.1:70b": "ollama/llama3.1:70b",
|
| 94 |
}
|
| 95 |
|
| 96 |
_OLLAMA_PROVIDER = "ollama"
|
llmopt/updater/__init__.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""LLMOpt updater package β adaptive runtime statistics."""
|
| 2 |
+
from llmopt.updater.adaptive_updater import AdaptiveRuntimeUpdater
|
| 3 |
+
|
| 4 |
+
__all__ = ["AdaptiveRuntimeUpdater"]
|
llmopt/updater/adaptive_updater.py
ADDED
|
@@ -0,0 +1,268 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
LLMOpt β Adaptive Runtime Statistics (EMA-Based)
|
| 3 |
+
=================================================
|
| 4 |
+
Lightweight online learning β NO RL, NO Optuna, NO GPU.
|
| 5 |
+
|
| 6 |
+
What this updates at runtime:
|
| 7 |
+
- avg_latency_ms (per model, exponential moving average)
|
| 8 |
+
- provider_reliability (rolling success rate)
|
| 9 |
+
- observed_utility (quality Γ cost-efficiency product, EMA)
|
| 10 |
+
|
| 11 |
+
These stats are combined with registry capability scores at routing time
|
| 12 |
+
to produce small dynamic adjustments. They do NOT overwrite benchmark scores.
|
| 13 |
+
|
| 14 |
+
Formula:
|
| 15 |
+
EMA update: s_new = Ξ± Β· s_old + (1 β Ξ±) Β· x_observed
|
| 16 |
+
where Ξ± = momentum (0.85β0.95 for stability)
|
| 17 |
+
|
| 18 |
+
Confidence decay:
|
| 19 |
+
If a model hasn't been observed recently, its runtime adjustment
|
| 20 |
+
fades back toward 0 (no adjustment), so baseline registry scores take over.
|
| 21 |
+
|
| 22 |
+
Storage: Simple JSON file (no DB needed for MVP).
|
| 23 |
+
Can be swapped for Redis or SQLite later.
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
import json
|
| 27 |
+
import math
|
| 28 |
+
import logging
|
| 29 |
+
import os
|
| 30 |
+
import time
|
| 31 |
+
from dataclasses import dataclass, field, asdict
|
| 32 |
+
from datetime import datetime, timezone
|
| 33 |
+
from pathlib import Path
|
| 34 |
+
from typing import Optional
|
| 35 |
+
|
| 36 |
+
logger = logging.getLogger(__name__)
|
| 37 |
+
|
| 38 |
+
# Runtime stats persisted to data/ at project root
|
| 39 |
+
STATS_PATH = Path(__file__).parent.parent.parent / "data" / "runtime_stats.json"
|
| 40 |
+
|
| 41 |
+
# EMA momentum β higher = slower to update (more stable)
|
| 42 |
+
# 0.90 means new obs counts for 10% of the new value
|
| 43 |
+
LATENCY_ALPHA = 0.90
|
| 44 |
+
RELIABILITY_ALPHA = 0.92
|
| 45 |
+
UTILITY_ALPHA = 0.88
|
| 46 |
+
|
| 47 |
+
# After this many seconds without an observation, decay confidence to 0
|
| 48 |
+
CONFIDENCE_DECAY_HALF_LIFE_SECONDS = 3 * 24 * 3600 # 3 days
|
| 49 |
+
|
| 50 |
+
# Minimum observations before runtime stats influence routing
|
| 51 |
+
MIN_OBS_FOR_INFLUENCE = 5
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
@dataclass
|
| 55 |
+
class ModelRuntimeStats:
|
| 56 |
+
model_id: str
|
| 57 |
+
obs_count: int = 0
|
| 58 |
+
ema_latency_ms: float = 0.0 # 0 = no data yet
|
| 59 |
+
ema_reliability: float = 1.0 # starts optimistic
|
| 60 |
+
ema_utility: float = 0.0 # 0 = no feedback yet
|
| 61 |
+
last_observed_ts: float = 0.0 # unix timestamp
|
| 62 |
+
confidence: float = 0.0 # 0β1, grows with observations
|
| 63 |
+
|
| 64 |
+
# Raw accumulators for logging
|
| 65 |
+
total_successes: int = 0
|
| 66 |
+
total_failures: int = 0
|
| 67 |
+
total_requests: int = 0
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
class AdaptiveRuntimeUpdater:
|
| 71 |
+
"""
|
| 72 |
+
Tracks per-model runtime statistics and provides small adjustments
|
| 73 |
+
to utility scores during routing.
|
| 74 |
+
|
| 75 |
+
Usage:
|
| 76 |
+
updater = AdaptiveRuntimeUpdater()
|
| 77 |
+
updater.record_outcome(model_id, latency_ms=1200, success=True, quality_score=8.5)
|
| 78 |
+
adjustment = updater.get_utility_adjustment(model_id)
|
| 79 |
+
"""
|
| 80 |
+
|
| 81 |
+
def __init__(self, stats_path: Optional[Path] = None):
|
| 82 |
+
self._path = stats_path or STATS_PATH
|
| 83 |
+
self._stats: dict[str, ModelRuntimeStats] = {}
|
| 84 |
+
self._load()
|
| 85 |
+
|
| 86 |
+
# ββ Public API ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 87 |
+
|
| 88 |
+
def record_outcome(
|
| 89 |
+
self,
|
| 90 |
+
model_id: str,
|
| 91 |
+
latency_ms: Optional[float] = None,
|
| 92 |
+
success: bool = True,
|
| 93 |
+
quality_score: Optional[float] = None, # 1β10 from LLMJudge, or None
|
| 94 |
+
cost_usd: Optional[float] = None,
|
| 95 |
+
):
|
| 96 |
+
"""
|
| 97 |
+
Record a single routing outcome for a model.
|
| 98 |
+
Called after each LLM API response.
|
| 99 |
+
|
| 100 |
+
quality_score: optional 1β10 score (from LLMJudge or user feedback)
|
| 101 |
+
"""
|
| 102 |
+
stats = self._get_or_create(model_id)
|
| 103 |
+
now = time.time()
|
| 104 |
+
|
| 105 |
+
stats.obs_count += 1
|
| 106 |
+
stats.total_requests += 1
|
| 107 |
+
stats.last_observed_ts = now
|
| 108 |
+
|
| 109 |
+
# ββ Latency EMA ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 110 |
+
if latency_ms is not None and latency_ms > 0:
|
| 111 |
+
if stats.ema_latency_ms == 0.0:
|
| 112 |
+
# Cold start: initialize to first observation
|
| 113 |
+
stats.ema_latency_ms = latency_ms
|
| 114 |
+
else:
|
| 115 |
+
stats.ema_latency_ms = (
|
| 116 |
+
LATENCY_ALPHA * stats.ema_latency_ms +
|
| 117 |
+
(1 - LATENCY_ALPHA) * latency_ms
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
# ββ Reliability EMA ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 121 |
+
outcome_val = 1.0 if success else 0.0
|
| 122 |
+
if success:
|
| 123 |
+
stats.total_successes += 1
|
| 124 |
+
else:
|
| 125 |
+
stats.total_failures += 1
|
| 126 |
+
|
| 127 |
+
stats.ema_reliability = (
|
| 128 |
+
RELIABILITY_ALPHA * stats.ema_reliability +
|
| 129 |
+
(1 - RELIABILITY_ALPHA) * outcome_val
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
# ββ Utility EMA (from quality + cost efficiency) ββββββββββββββββββ
|
| 133 |
+
if quality_score is not None and cost_usd is not None and cost_usd > 0:
|
| 134 |
+
# Observed utility = quality (normalized to 0β1) Γ cost-efficiency
|
| 135 |
+
# cost_efficiency: higher means cheaper relative to quality delivered
|
| 136 |
+
q_norm = quality_score / 10.0
|
| 137 |
+
cost_eff = 1.0 / (1.0 + cost_usd * 100) # sigmoid-like penalty
|
| 138 |
+
obs_util = q_norm * (0.7 + 0.3 * cost_eff) # quality-dominant
|
| 139 |
+
|
| 140 |
+
if stats.ema_utility == 0.0:
|
| 141 |
+
stats.ema_utility = obs_util
|
| 142 |
+
else:
|
| 143 |
+
stats.ema_utility = (
|
| 144 |
+
UTILITY_ALPHA * stats.ema_utility +
|
| 145 |
+
(1 - UTILITY_ALPHA) * obs_util
|
| 146 |
+
)
|
| 147 |
+
elif quality_score is not None:
|
| 148 |
+
obs_util = quality_score / 10.0
|
| 149 |
+
if stats.ema_utility == 0.0:
|
| 150 |
+
stats.ema_utility = obs_util
|
| 151 |
+
else:
|
| 152 |
+
stats.ema_utility = (
|
| 153 |
+
UTILITY_ALPHA * stats.ema_utility +
|
| 154 |
+
(1 - UTILITY_ALPHA) * obs_util
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
# ββ Confidence βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 158 |
+
# Grows with observations (saturates at 1.0 after ~50 obs)
|
| 159 |
+
stats.confidence = min(1.0, stats.obs_count / MIN_OBS_FOR_INFLUENCE) * \
|
| 160 |
+
self._time_decay_factor(stats.last_observed_ts)
|
| 161 |
+
|
| 162 |
+
logger.debug(
|
| 163 |
+
f"[Runtime] {model_id}: lat={stats.ema_latency_ms:.0f}ms "
|
| 164 |
+
f"rel={stats.ema_reliability:.3f} util={stats.ema_utility:.3f} "
|
| 165 |
+
f"conf={stats.confidence:.3f} n={stats.obs_count}"
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
+
# Persist every 10 observations to avoid too many writes
|
| 169 |
+
if stats.obs_count % 10 == 0:
|
| 170 |
+
self._save()
|
| 171 |
+
|
| 172 |
+
def get_utility_adjustment(self, model_id: str) -> float:
|
| 173 |
+
"""
|
| 174 |
+
Returns a small adjustment β [-0.15, +0.15] to add to the
|
| 175 |
+
utility score during routing.
|
| 176 |
+
|
| 177 |
+
Returns 0.0 if we don't have enough observations yet
|
| 178 |
+
(< MIN_OBS_FOR_INFLUENCE), ensuring cold start doesn't distort routing.
|
| 179 |
+
|
| 180 |
+
The adjustment is intentionally small β runtime observations refine
|
| 181 |
+
the routing, they don't override benchmark-based capability scores.
|
| 182 |
+
"""
|
| 183 |
+
stats = self._stats.get(model_id)
|
| 184 |
+
if not stats or stats.obs_count < MIN_OBS_FOR_INFLUENCE:
|
| 185 |
+
return 0.0
|
| 186 |
+
|
| 187 |
+
conf = stats.confidence
|
| 188 |
+
if conf < 0.1:
|
| 189 |
+
return 0.0
|
| 190 |
+
|
| 191 |
+
# Reliability penalty (poor reliability β negative adjustment)
|
| 192 |
+
reliability_adj = (stats.ema_reliability - 0.95) * 0.5
|
| 193 |
+
# e.g. 90% reliability β (0.90 - 0.95) * 0.5 = -0.025
|
| 194 |
+
|
| 195 |
+
# Utility signal (if we have quality feedback)
|
| 196 |
+
utility_adj = 0.0
|
| 197 |
+
if stats.ema_utility > 0:
|
| 198 |
+
utility_adj = (stats.ema_utility - 0.7) * 0.2
|
| 199 |
+
# e.g. avg quality 8/10 = 0.8 β (0.8 - 0.7) * 0.2 = +0.02
|
| 200 |
+
|
| 201 |
+
total_adj = (reliability_adj + utility_adj) * conf
|
| 202 |
+
return max(-0.15, min(0.15, total_adj))
|
| 203 |
+
|
| 204 |
+
def get_latency_estimate(self, model_id: str) -> Optional[float]:
|
| 205 |
+
"""Returns EMA latency estimate if available, else None."""
|
| 206 |
+
stats = self._stats.get(model_id)
|
| 207 |
+
if stats and stats.ema_latency_ms > 0 and stats.obs_count >= 3:
|
| 208 |
+
return stats.ema_latency_ms
|
| 209 |
+
return None
|
| 210 |
+
|
| 211 |
+
def get_stats_summary(self, model_id: str) -> dict:
|
| 212 |
+
"""Returns full stats dict for observability / logging."""
|
| 213 |
+
stats = self._stats.get(model_id)
|
| 214 |
+
if not stats:
|
| 215 |
+
return {"model_id": model_id, "obs_count": 0, "status": "no_data"}
|
| 216 |
+
return {
|
| 217 |
+
**asdict(stats),
|
| 218 |
+
"success_rate": (
|
| 219 |
+
stats.total_successes / stats.total_requests
|
| 220 |
+
if stats.total_requests > 0 else None
|
| 221 |
+
),
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
def save(self):
|
| 225 |
+
"""Explicitly save stats to disk."""
|
| 226 |
+
self._save()
|
| 227 |
+
|
| 228 |
+
# ββ Internal ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 229 |
+
|
| 230 |
+
def _get_or_create(self, model_id: str) -> ModelRuntimeStats:
|
| 231 |
+
if model_id not in self._stats:
|
| 232 |
+
self._stats[model_id] = ModelRuntimeStats(model_id=model_id)
|
| 233 |
+
return self._stats[model_id]
|
| 234 |
+
|
| 235 |
+
def _time_decay_factor(self, last_ts: float) -> float:
|
| 236 |
+
"""
|
| 237 |
+
Returns 1.0 if recently observed, decays toward 0 if stale.
|
| 238 |
+
Uses exponential decay with CONFIDENCE_DECAY_HALF_LIFE_SECONDS.
|
| 239 |
+
"""
|
| 240 |
+
if last_ts == 0:
|
| 241 |
+
return 0.0
|
| 242 |
+
elapsed = time.time() - last_ts
|
| 243 |
+
half_life = CONFIDENCE_DECAY_HALF_LIFE_SECONDS
|
| 244 |
+
return math.exp(-math.log(2) * elapsed / half_life)
|
| 245 |
+
|
| 246 |
+
def _load(self):
|
| 247 |
+
if not self._path.exists():
|
| 248 |
+
logger.info("[Runtime] No existing stats file. Starting fresh.")
|
| 249 |
+
return
|
| 250 |
+
try:
|
| 251 |
+
with open(self._path, "r") as f:
|
| 252 |
+
raw = json.load(f)
|
| 253 |
+
for mid, data in raw.items():
|
| 254 |
+
self._stats[mid] = ModelRuntimeStats(**data)
|
| 255 |
+
logger.info(f"[Runtime] Loaded stats for {len(self._stats)} models.")
|
| 256 |
+
except Exception as e:
|
| 257 |
+
logger.warning(f"[Runtime] Failed to load stats: {e}. Starting fresh.")
|
| 258 |
+
|
| 259 |
+
def _save(self):
|
| 260 |
+
self._path.parent.mkdir(parents=True, exist_ok=True)
|
| 261 |
+
try:
|
| 262 |
+
with open(self._path, "w") as f:
|
| 263 |
+
json.dump(
|
| 264 |
+
{mid: asdict(s) for mid, s in self._stats.items()},
|
| 265 |
+
f, indent=2
|
| 266 |
+
)
|
| 267 |
+
except Exception as e:
|
| 268 |
+
logger.warning(f"[Runtime] Failed to save stats: {e}")
|