Shrot101 commited on
Commit
bd238e9
Β·
1 Parent(s): 2c126c1

feat: initialize core LLMOpt framework including model routing, optimization engines, and frontend dashboard infrastructure.

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. .dockerignore +9 -0
  2. .env.example +38 -8
  3. .gitignore +8 -0
  4. Dockerfile +0 -2
  5. data/model_registry.json +20 -20
  6. data/model_registry_v2.json +507 -0
  7. docs/design.md +361 -0
  8. frontend/.gitignore +24 -0
  9. frontend/README.md +50 -0
  10. frontend/eslint.config.js +28 -0
  11. frontend/index.html +16 -0
  12. frontend/package-lock.json +0 -0
  13. frontend/package.json +38 -0
  14. frontend/public/vite.svg +1 -0
  15. frontend/src/App.css +42 -0
  16. frontend/src/App.tsx +217 -0
  17. frontend/src/api.ts +153 -0
  18. frontend/src/assets/react.svg +1 -0
  19. frontend/src/index.css +68 -0
  20. frontend/src/main.tsx +10 -0
  21. frontend/src/pages/Analytics.tsx +385 -0
  22. frontend/src/pages/Login.tsx +209 -0
  23. frontend/src/pages/ModelRegistry.tsx +354 -0
  24. frontend/src/pages/Playground.tsx +606 -0
  25. frontend/src/pages/Settings.tsx +349 -0
  26. frontend/src/store.ts +88 -0
  27. frontend/src/theme.css +1982 -0
  28. frontend/src/types.ts +170 -0
  29. frontend/src/vite-env.d.ts +1 -0
  30. frontend/tsconfig.app.json +26 -0
  31. frontend/tsconfig.json +7 -0
  32. frontend/tsconfig.node.json +24 -0
  33. frontend/vite.config.ts +40 -0
  34. llmopt/analyzer/query_analyzer.py +15 -12
  35. llmopt/api/app.py +580 -13
  36. llmopt/api/crud.py +59 -0
  37. llmopt/api/security.py +186 -0
  38. llmopt/cache/redis_client.py +41 -0
  39. llmopt/core.py +208 -37
  40. llmopt/db/models.py +47 -0
  41. llmopt/db/session.py +34 -0
  42. llmopt/engine/__init__.py +15 -0
  43. llmopt/engine/llmopt_engine.py +275 -0
  44. llmopt/engine/optimization_engine.py +9 -2
  45. llmopt/engine/utility_engine.py +665 -0
  46. llmopt/registry/__init__.py +4 -0
  47. llmopt/registry/hybrid_updater.py +267 -0
  48. llmopt/router/model_router.py +37 -19
  49. llmopt/updater/__init__.py +4 -0
  50. llmopt/updater/adaptive_updater.py +268 -0
.dockerignore ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ .git
2
+ .github
3
+ __pycache__/
4
+ *.py[cod]
5
+ .pytest_cache/
6
+ llmopt.db
7
+ config/.env
8
+ tests/
9
+ .env
.env.example CHANGED
@@ -1,12 +1,42 @@
1
  # LLMOpt Environment Variables
2
 
3
- # OpenAI
4
- OPENAI_API_KEY=your_openai_api_key_here
 
 
 
 
5
 
6
- # Anthropic
7
- ANTHROPIC_API_KEY=your_anthropic_api_key_here
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- # Redis Semantic Cache (V2)
10
- # Option 1: Local Docker -> redis://localhost:6379
11
- # Option 2: Redis Cloud -> redis://default:password@endpoint.redis-cloud.com:12345
12
- REDIS_URL=redis://localhost:6379
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # LLMOpt Environment Variables
2
 
3
+ # ==========================================
4
+ # 1. Database & Redis Session Cache (Production)
5
+ # ==========================================
6
+ # PostgreSQL Database URL (e.g. Neon, Supabase, etc.)
7
+ # If not set, LLMOpt defaults to local SQLite.
8
+ DATABASE_URL=postgresql://user:password@ep-cool-fog-12345.aws.neon.tech/neondb?sslmode=require
9
 
10
+ # Upstash Redis or Redis Cloud connection string (Mandatory for sessions)
11
+ REDIS_URL=redis://default:password@endpoint.upstash.io:30000
12
+
13
+ # 32-byte base64-encoded session key for encrypting user API keys in transit/at rest.
14
+ # Generate in python with: cryptography.fernet.Fernet.generate_key().decode()
15
+ SESSION_SECRET_KEY=generate_your_own_32_byte_base64_key_here
16
+
17
+ # Session Time-to-Live (TTL) in seconds (default: 7200 seconds / 2 hours)
18
+ SESSION_TTL=7200
19
+
20
+ # ==========================================
21
+ # 2. Third-Party OAuth Sign-In (Optional)
22
+ # ==========================================
23
+ # Google OAuth
24
+ GOOGLE_CLIENT_ID=your_google_client_id.apps.googleusercontent.com
25
+ GOOGLE_CLIENT_SECRET=GOCSPX-your_google_client_secret_here
26
 
27
+ # GitHub OAuth
28
+ GITHUB_CLIENT_ID=your_github_client_id_here
29
+ GITHUB_CLIENT_SECRET=your_github_client_secret_here
30
+
31
+ # The base URL of the frontend for OAuth redirect callbacks (e.g. your Vercel URL)
32
+ REDIRECT_URI_HOST=https://your-frontend.vercel.app
33
+
34
+ # ==========================================
35
+ # 3. Direct LLM Provider Keys (Fallback / Local run only)
36
+ # ==========================================
37
+ # In Bring Your Own Key (BYOK) mode, these are not stored on the server.
38
+ # Provide them here only if running locally or using server-wide default keys.
39
+ OPENAI_API_KEY=your_openai_api_key_here
40
+ ANTHROPIC_API_KEY=your_anthropic_api_key_here
41
+ GEMINI_API_KEY=your_gemini_api_key_here
42
+ OLLAMA_API_BASE=http://localhost:11434
.gitignore CHANGED
@@ -51,3 +51,11 @@ coverage.xml
51
  .idea/
52
  *.swp
53
  *.swo
 
 
 
 
 
 
 
 
 
51
  .idea/
52
  *.swp
53
  *.swo
54
+
55
+ # Runtime/Database files
56
+ llmopt.db
57
+ data/runtime_stats.json
58
+
59
+ # Local Environment secrets
60
+ /config/.env
61
+
Dockerfile CHANGED
@@ -2,10 +2,8 @@ FROM python:3.10-slim
2
 
3
  # Install system dependencies
4
  # build-essential is needed for some ML package wheels
5
- # redis-server is needed for the local Semantic Caching layer
6
  RUN apt-get update && apt-get install -y \
7
  build-essential \
8
- redis-server \
9
  && rm -rf /var/lib/apt/lists/*
10
 
11
  # Set up a new user named "user" with user ID 1000 (Mandatory for Hugging Face Spaces)
 
2
 
3
  # Install system dependencies
4
  # build-essential is needed for some ML package wheels
 
5
  RUN apt-get update && apt-get install -y \
6
  build-essential \
 
7
  && rm -rf /var/lib/apt/lists/*
8
 
9
  # Set up a new user named "user" with user ID 1000 (Mandatory for Hugging Face Spaces)
data/model_registry.json CHANGED
@@ -84,32 +84,32 @@
84
  "notes": "Cheapest Anthropic model. Good for classification, summarization."
85
  },
86
  {
87
- "model_name": "gemini-1.5-flash",
88
  "provider": "google",
89
- "input_cost_per_1k": 0.000075,
90
- "output_cost_per_1k": 0.000300,
91
- "context_window": 1000000,
92
- "reasoning_score": 0.74,
93
- "coding_score": 0.74,
94
- "math_score": 0.70,
95
- "instruction_following_score": 0.78,
96
- "latency_score": 0.88,
97
- "max_complexity": 0.72,
98
- "notes": "Extremely cheap and fast. Long context support."
99
  },
100
  {
101
- "model_name": "gemini-1.5-pro",
102
  "provider": "google",
103
  "input_cost_per_1k": 0.00125,
104
- "output_cost_per_1k": 0.005,
105
- "context_window": 2000000,
106
- "reasoning_score": 0.88,
107
- "coding_score": 0.87,
108
- "math_score": 0.85,
109
- "instruction_following_score": 0.90,
110
- "latency_score": 0.72,
111
  "max_complexity": 0.95,
112
- "notes": "Massive context window. Great for long-doc analysis."
113
  },
114
  {
115
  "model_name": "mistral-small-latest",
 
84
  "notes": "Cheapest Anthropic model. Good for classification, summarization."
85
  },
86
  {
87
+ "model_name": "gemini-2.5-flash",
88
  "provider": "google",
89
+ "input_cost_per_1k": 0.00015,
90
+ "output_cost_per_1k": 0.0006,
91
+ "context_window": 1048576,
92
+ "reasoning_score": 0.83,
93
+ "coding_score": 0.82,
94
+ "math_score": 0.84,
95
+ "instruction_following_score": 0.85,
96
+ "latency_score": 0.90,
97
+ "max_complexity": 0.83,
98
+ "notes": "Very cheap and fast Gemini 2.5 model."
99
  },
100
  {
101
+ "model_name": "gemini-2.5-pro",
102
  "provider": "google",
103
  "input_cost_per_1k": 0.00125,
104
+ "output_cost_per_1k": 0.010,
105
+ "context_window": 1048576,
106
+ "reasoning_score": 0.94,
107
+ "coding_score": 0.92,
108
+ "math_score": 0.93,
109
+ "instruction_following_score": 0.92,
110
+ "latency_score": 0.75,
111
  "max_complexity": 0.95,
112
+ "notes": "Powerful Gemini 2.5 model with massive context window."
113
  },
114
  {
115
  "model_name": "mistral-small-latest",
data/model_registry_v2.json ADDED
@@ -0,0 +1,507 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_meta": {
3
+ "version": "2.0.0",
4
+ "description": "LLMOpt Utility-Based Model Registry. Scores sourced from LMSYS Arena, Artificial Analysis, HumanEval, MMLU-Pro, MATH, IFEval benchmarks. Pricing from provider docs + OpenRouter. Updated via hybrid fetcher.",
5
+ "last_updated": "2025-01-01T00:00:00Z",
6
+ "score_range": "All capability scores normalized 0.0–1.0",
7
+ "pricing_unit": "USD per 1000 tokens"
8
+ },
9
+ "models": {
10
+ "gpt-4o": {
11
+ "provider": "openai",
12
+ "model_family": "gpt-4o",
13
+ "context_window": 128000,
14
+ "max_output_tokens": 16384,
15
+ "input_cost_per_1k": 0.0025,
16
+ "output_cost_per_1k": 0.010,
17
+ "avg_latency_ms": 1800,
18
+ "tokens_per_second": 80,
19
+ "capabilities": {
20
+ "reasoning": 0.92,
21
+ "coding": 0.91,
22
+ "math": 0.87,
23
+ "creativity": 0.88,
24
+ "factuality": 0.89,
25
+ "instruction_following": 0.94,
26
+ "long_context": 0.85,
27
+ "multilingual": 0.84,
28
+ "tool_use": 0.93,
29
+ "summarization": 0.90,
30
+ "conversation": 0.91
31
+ },
32
+ "features": {
33
+ "tool_calling": true,
34
+ "json_mode": true,
35
+ "streaming": true,
36
+ "image_input": true,
37
+ "function_calling": true
38
+ },
39
+ "benchmark_sources": ["lmsys_arena", "mmlu_pro", "humaneval", "math_benchmark", "ifeval"],
40
+ "pricing_last_updated": "2025-01-01T00:00:00Z",
41
+ "live_patch": {}
42
+ },
43
+ "gpt-4o-mini": {
44
+ "provider": "openai",
45
+ "model_family": "gpt-4o",
46
+ "context_window": 128000,
47
+ "max_output_tokens": 16384,
48
+ "input_cost_per_1k": 0.00015,
49
+ "output_cost_per_1k": 0.0006,
50
+ "avg_latency_ms": 900,
51
+ "tokens_per_second": 120,
52
+ "capabilities": {
53
+ "reasoning": 0.78,
54
+ "coding": 0.76,
55
+ "math": 0.72,
56
+ "creativity": 0.74,
57
+ "factuality": 0.75,
58
+ "instruction_following": 0.82,
59
+ "long_context": 0.76,
60
+ "multilingual": 0.72,
61
+ "tool_use": 0.80,
62
+ "summarization": 0.78,
63
+ "conversation": 0.82
64
+ },
65
+ "features": {
66
+ "tool_calling": true,
67
+ "json_mode": true,
68
+ "streaming": true,
69
+ "image_input": true,
70
+ "function_calling": true
71
+ },
72
+ "benchmark_sources": ["lmsys_arena", "mmlu_pro", "humaneval"],
73
+ "pricing_last_updated": "2025-01-01T00:00:00Z",
74
+ "live_patch": {}
75
+ },
76
+ "gpt-4.1": {
77
+ "provider": "openai",
78
+ "model_family": "gpt-4.1",
79
+ "context_window": 1047576,
80
+ "max_output_tokens": 32768,
81
+ "input_cost_per_1k": 0.002,
82
+ "output_cost_per_1k": 0.008,
83
+ "avg_latency_ms": 1600,
84
+ "tokens_per_second": 85,
85
+ "capabilities": {
86
+ "reasoning": 0.93,
87
+ "coding": 0.95,
88
+ "math": 0.88,
89
+ "creativity": 0.87,
90
+ "factuality": 0.90,
91
+ "instruction_following": 0.95,
92
+ "long_context": 0.97,
93
+ "multilingual": 0.85,
94
+ "tool_use": 0.95,
95
+ "summarization": 0.92,
96
+ "conversation": 0.90
97
+ },
98
+ "features": {
99
+ "tool_calling": true,
100
+ "json_mode": true,
101
+ "streaming": true,
102
+ "image_input": true,
103
+ "function_calling": true
104
+ },
105
+ "benchmark_sources": ["openai_evals", "swe_bench", "humaneval"],
106
+ "pricing_last_updated": "2025-01-01T00:00:00Z",
107
+ "live_patch": {}
108
+ },
109
+ "gpt-4.1-mini": {
110
+ "provider": "openai",
111
+ "model_family": "gpt-4.1",
112
+ "context_window": 1047576,
113
+ "max_output_tokens": 32768,
114
+ "input_cost_per_1k": 0.0004,
115
+ "output_cost_per_1k": 0.0016,
116
+ "avg_latency_ms": 750,
117
+ "tokens_per_second": 140,
118
+ "capabilities": {
119
+ "reasoning": 0.80,
120
+ "coding": 0.82,
121
+ "math": 0.75,
122
+ "creativity": 0.76,
123
+ "factuality": 0.78,
124
+ "instruction_following": 0.85,
125
+ "long_context": 0.92,
126
+ "multilingual": 0.74,
127
+ "tool_use": 0.83,
128
+ "summarization": 0.80,
129
+ "conversation": 0.83
130
+ },
131
+ "features": {
132
+ "tool_calling": true,
133
+ "json_mode": true,
134
+ "streaming": true,
135
+ "image_input": true,
136
+ "function_calling": true
137
+ },
138
+ "benchmark_sources": ["openai_evals"],
139
+ "pricing_last_updated": "2025-01-01T00:00:00Z",
140
+ "live_patch": {}
141
+ },
142
+ "claude-opus-4-5": {
143
+ "provider": "anthropic",
144
+ "model_family": "claude-4",
145
+ "context_window": 200000,
146
+ "max_output_tokens": 32000,
147
+ "input_cost_per_1k": 0.015,
148
+ "output_cost_per_1k": 0.075,
149
+ "avg_latency_ms": 2500,
150
+ "tokens_per_second": 65,
151
+ "capabilities": {
152
+ "reasoning": 0.96,
153
+ "coding": 0.95,
154
+ "math": 0.91,
155
+ "creativity": 0.95,
156
+ "factuality": 0.93,
157
+ "instruction_following": 0.96,
158
+ "long_context": 0.94,
159
+ "multilingual": 0.87,
160
+ "tool_use": 0.94,
161
+ "summarization": 0.95,
162
+ "conversation": 0.96
163
+ },
164
+ "features": {
165
+ "tool_calling": true,
166
+ "json_mode": true,
167
+ "streaming": true,
168
+ "image_input": true,
169
+ "function_calling": true
170
+ },
171
+ "benchmark_sources": ["lmsys_arena", "mmlu_pro", "swe_bench", "humaneval", "math_benchmark"],
172
+ "pricing_last_updated": "2025-01-01T00:00:00Z",
173
+ "live_patch": {}
174
+ },
175
+ "claude-sonnet-4-5": {
176
+ "provider": "anthropic",
177
+ "model_family": "claude-4",
178
+ "context_window": 200000,
179
+ "max_output_tokens": 16000,
180
+ "input_cost_per_1k": 0.003,
181
+ "output_cost_per_1k": 0.015,
182
+ "avg_latency_ms": 1400,
183
+ "tokens_per_second": 90,
184
+ "capabilities": {
185
+ "reasoning": 0.91,
186
+ "coding": 0.93,
187
+ "math": 0.86,
188
+ "creativity": 0.90,
189
+ "factuality": 0.90,
190
+ "instruction_following": 0.93,
191
+ "long_context": 0.91,
192
+ "multilingual": 0.84,
193
+ "tool_use": 0.92,
194
+ "summarization": 0.91,
195
+ "conversation": 0.92
196
+ },
197
+ "features": {
198
+ "tool_calling": true,
199
+ "json_mode": true,
200
+ "streaming": true,
201
+ "image_input": true,
202
+ "function_calling": true
203
+ },
204
+ "benchmark_sources": ["lmsys_arena", "mmlu_pro", "swe_bench", "humaneval"],
205
+ "pricing_last_updated": "2025-01-01T00:00:00Z",
206
+ "live_patch": {}
207
+ },
208
+ "claude-haiku-3-5": {
209
+ "provider": "anthropic",
210
+ "model_family": "claude-3.5",
211
+ "context_window": 200000,
212
+ "max_output_tokens": 8192,
213
+ "input_cost_per_1k": 0.0008,
214
+ "output_cost_per_1k": 0.004,
215
+ "avg_latency_ms": 700,
216
+ "tokens_per_second": 150,
217
+ "capabilities": {
218
+ "reasoning": 0.74,
219
+ "coding": 0.77,
220
+ "math": 0.68,
221
+ "creativity": 0.72,
222
+ "factuality": 0.73,
223
+ "instruction_following": 0.80,
224
+ "long_context": 0.78,
225
+ "multilingual": 0.72,
226
+ "tool_use": 0.78,
227
+ "summarization": 0.76,
228
+ "conversation": 0.80
229
+ },
230
+ "features": {
231
+ "tool_calling": true,
232
+ "json_mode": true,
233
+ "streaming": true,
234
+ "image_input": true,
235
+ "function_calling": true
236
+ },
237
+ "benchmark_sources": ["lmsys_arena", "humaneval"],
238
+ "pricing_last_updated": "2025-01-01T00:00:00Z",
239
+ "live_patch": {}
240
+ },
241
+ "gemini-2.5-pro": {
242
+ "provider": "google",
243
+ "model_family": "gemini-2.5",
244
+ "context_window": 1048576,
245
+ "max_output_tokens": 65536,
246
+ "input_cost_per_1k": 0.00125,
247
+ "output_cost_per_1k": 0.010,
248
+ "avg_latency_ms": 2000,
249
+ "tokens_per_second": 75,
250
+ "capabilities": {
251
+ "reasoning": 0.94,
252
+ "coding": 0.92,
253
+ "math": 0.93,
254
+ "creativity": 0.88,
255
+ "factuality": 0.91,
256
+ "instruction_following": 0.92,
257
+ "long_context": 0.98,
258
+ "multilingual": 0.90,
259
+ "tool_use": 0.89,
260
+ "summarization": 0.92,
261
+ "conversation": 0.89
262
+ },
263
+ "features": {
264
+ "tool_calling": true,
265
+ "json_mode": true,
266
+ "streaming": true,
267
+ "image_input": true,
268
+ "function_calling": true
269
+ },
270
+ "benchmark_sources": ["lmsys_arena", "mmlu_pro", "math_benchmark", "humaneval"],
271
+ "pricing_last_updated": "2025-01-01T00:00:00Z",
272
+ "live_patch": {}
273
+ },
274
+ "gemini-2.5-flash": {
275
+ "provider": "google",
276
+ "model_family": "gemini-2.5",
277
+ "context_window": 1048576,
278
+ "max_output_tokens": 65536,
279
+ "input_cost_per_1k": 0.00015,
280
+ "output_cost_per_1k": 0.0006,
281
+ "avg_latency_ms": 800,
282
+ "tokens_per_second": 130,
283
+ "capabilities": {
284
+ "reasoning": 0.83,
285
+ "coding": 0.82,
286
+ "math": 0.84,
287
+ "creativity": 0.80,
288
+ "factuality": 0.82,
289
+ "instruction_following": 0.85,
290
+ "long_context": 0.95,
291
+ "multilingual": 0.84,
292
+ "tool_use": 0.82,
293
+ "summarization": 0.83,
294
+ "conversation": 0.84
295
+ },
296
+ "features": {
297
+ "tool_calling": true,
298
+ "json_mode": true,
299
+ "streaming": true,
300
+ "image_input": true,
301
+ "function_calling": true
302
+ },
303
+ "benchmark_sources": ["lmsys_arena", "mmlu_pro", "humaneval"],
304
+ "pricing_last_updated": "2025-01-01T00:00:00Z",
305
+ "live_patch": {}
306
+ },
307
+
308
+ "mistral-large-latest": {
309
+ "provider": "mistral",
310
+ "model_family": "mistral-large",
311
+ "context_window": 128000,
312
+ "max_output_tokens": 8192,
313
+ "input_cost_per_1k": 0.003,
314
+ "output_cost_per_1k": 0.009,
315
+ "avg_latency_ms": 1600,
316
+ "tokens_per_second": 75,
317
+ "capabilities": {
318
+ "reasoning": 0.82,
319
+ "coding": 0.82,
320
+ "math": 0.78,
321
+ "creativity": 0.78,
322
+ "factuality": 0.80,
323
+ "instruction_following": 0.84,
324
+ "long_context": 0.78,
325
+ "multilingual": 0.88,
326
+ "tool_use": 0.82,
327
+ "summarization": 0.82,
328
+ "conversation": 0.82
329
+ },
330
+ "features": {
331
+ "tool_calling": true,
332
+ "json_mode": true,
333
+ "streaming": true,
334
+ "image_input": false,
335
+ "function_calling": true
336
+ },
337
+ "benchmark_sources": ["lmsys_arena", "mmlu_pro", "humaneval"],
338
+ "pricing_last_updated": "2025-01-01T00:00:00Z",
339
+ "live_patch": {}
340
+ },
341
+ "mistral-small-latest": {
342
+ "provider": "mistral",
343
+ "model_family": "mistral-small",
344
+ "context_window": 32000,
345
+ "max_output_tokens": 8192,
346
+ "input_cost_per_1k": 0.0001,
347
+ "output_cost_per_1k": 0.0003,
348
+ "avg_latency_ms": 700,
349
+ "tokens_per_second": 140,
350
+ "capabilities": {
351
+ "reasoning": 0.68,
352
+ "coding": 0.68,
353
+ "math": 0.62,
354
+ "creativity": 0.66,
355
+ "factuality": 0.65,
356
+ "instruction_following": 0.72,
357
+ "long_context": 0.60,
358
+ "multilingual": 0.80,
359
+ "tool_use": 0.68,
360
+ "summarization": 0.70,
361
+ "conversation": 0.72
362
+ },
363
+ "features": {
364
+ "tool_calling": true,
365
+ "json_mode": true,
366
+ "streaming": true,
367
+ "image_input": false,
368
+ "function_calling": true
369
+ },
370
+ "benchmark_sources": ["lmsys_arena"],
371
+ "pricing_last_updated": "2025-01-01T00:00:00Z",
372
+ "live_patch": {}
373
+ },
374
+ "deepseek-chat": {
375
+ "provider": "deepseek",
376
+ "model_family": "deepseek-v3",
377
+ "context_window": 64000,
378
+ "max_output_tokens": 8192,
379
+ "input_cost_per_1k": 0.00014,
380
+ "output_cost_per_1k": 0.00028,
381
+ "avg_latency_ms": 1200,
382
+ "tokens_per_second": 95,
383
+ "capabilities": {
384
+ "reasoning": 0.87,
385
+ "coding": 0.90,
386
+ "math": 0.91,
387
+ "creativity": 0.78,
388
+ "factuality": 0.82,
389
+ "instruction_following": 0.85,
390
+ "long_context": 0.72,
391
+ "multilingual": 0.75,
392
+ "tool_use": 0.82,
393
+ "summarization": 0.82,
394
+ "conversation": 0.82
395
+ },
396
+ "features": {
397
+ "tool_calling": true,
398
+ "json_mode": true,
399
+ "streaming": true,
400
+ "image_input": false,
401
+ "function_calling": true
402
+ },
403
+ "benchmark_sources": ["lmsys_arena", "humaneval", "math_benchmark", "mmlu_pro"],
404
+ "pricing_last_updated": "2025-01-01T00:00:00Z",
405
+ "live_patch": {}
406
+ },
407
+ "deepseek-reasoner": {
408
+ "provider": "deepseek",
409
+ "model_family": "deepseek-r1",
410
+ "context_window": 64000,
411
+ "max_output_tokens": 8192,
412
+ "input_cost_per_1k": 0.00055,
413
+ "output_cost_per_1k": 0.00219,
414
+ "avg_latency_ms": 3500,
415
+ "tokens_per_second": 40,
416
+ "capabilities": {
417
+ "reasoning": 0.95,
418
+ "coding": 0.91,
419
+ "math": 0.96,
420
+ "creativity": 0.72,
421
+ "factuality": 0.88,
422
+ "instruction_following": 0.83,
423
+ "long_context": 0.70,
424
+ "multilingual": 0.72,
425
+ "tool_use": 0.75,
426
+ "summarization": 0.78,
427
+ "conversation": 0.72
428
+ },
429
+ "features": {
430
+ "tool_calling": false,
431
+ "json_mode": true,
432
+ "streaming": true,
433
+ "image_input": false,
434
+ "function_calling": false
435
+ },
436
+ "benchmark_sources": ["aime", "math_benchmark", "humaneval", "mmlu_pro"],
437
+ "pricing_last_updated": "2025-01-01T00:00:00Z",
438
+ "live_patch": {}
439
+ },
440
+ "llama3.3-70b": {
441
+ "provider": "ollama",
442
+ "model_family": "llama3",
443
+ "context_window": 128000,
444
+ "max_output_tokens": 8192,
445
+ "input_cost_per_1k": 0.0,
446
+ "output_cost_per_1k": 0.0,
447
+ "avg_latency_ms": 2000,
448
+ "tokens_per_second": 50,
449
+ "capabilities": {
450
+ "reasoning": 0.80,
451
+ "coding": 0.79,
452
+ "math": 0.74,
453
+ "creativity": 0.78,
454
+ "factuality": 0.76,
455
+ "instruction_following": 0.82,
456
+ "long_context": 0.76,
457
+ "multilingual": 0.72,
458
+ "tool_use": 0.76,
459
+ "summarization": 0.80,
460
+ "conversation": 0.82
461
+ },
462
+ "features": {
463
+ "tool_calling": true,
464
+ "json_mode": true,
465
+ "streaming": true,
466
+ "image_input": false,
467
+ "function_calling": true
468
+ },
469
+ "benchmark_sources": ["lmsys_arena", "mmlu_pro", "humaneval"],
470
+ "pricing_last_updated": "2025-01-01T00:00:00Z",
471
+ "live_patch": {}
472
+ },
473
+ "llama3.2-vision": {
474
+ "provider": "ollama",
475
+ "model_family": "llama3",
476
+ "context_window": 128000,
477
+ "max_output_tokens": 8192,
478
+ "input_cost_per_1k": 0.0,
479
+ "output_cost_per_1k": 0.0,
480
+ "avg_latency_ms": 2500,
481
+ "tokens_per_second": 40,
482
+ "capabilities": {
483
+ "reasoning": 0.70,
484
+ "coding": 0.66,
485
+ "math": 0.62,
486
+ "creativity": 0.70,
487
+ "factuality": 0.68,
488
+ "instruction_following": 0.74,
489
+ "long_context": 0.70,
490
+ "multilingual": 0.65,
491
+ "tool_use": 0.65,
492
+ "summarization": 0.72,
493
+ "conversation": 0.75
494
+ },
495
+ "features": {
496
+ "tool_calling": false,
497
+ "json_mode": true,
498
+ "streaming": true,
499
+ "image_input": true,
500
+ "function_calling": false
501
+ },
502
+ "benchmark_sources": ["lmsys_arena"],
503
+ "pricing_last_updated": "2025-01-01T00:00:00Z",
504
+ "live_patch": {}
505
+ }
506
+ }
507
+ }
docs/design.md ADDED
@@ -0,0 +1,361 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # LLMOpt UI Design Specification
2
+
3
+ ## Project Overview
4
+ **LLMOpt** is an enterprise-grade LLM middleware that intelligently routes queries to the most cost-effective model. The UI must communicate: intelligence, efficiency, cost savings, and observability β€” all in real time.
5
+
6
+ ---
7
+
8
+ ## Aesthetic Direction: "Dark Industrial Dashboard"
9
+
10
+ **Concept**: Think Bloomberg Terminal meets cyberpunk command center. Utilitarian precision with electric accents. Every pixel earns its place. Data-dense but crystal clear.
11
+
12
+ **Mood**: Authoritative. Efficient. Technical. Like a cockpit for LLM operations.
13
+
14
+ **One unforgettable thing**: A real-time animated pipeline that lights up as a query flows through each stage β€” users *watch* the optimization happen.
15
+
16
+ ---
17
+
18
+ ## Color Palette
19
+
20
+ ```
21
+ --bg-base: #0A0B0E /* Near-black base */
22
+ --bg-surface: #111318 /* Card/panel surface */
23
+ --bg-elevated: #1A1D26 /* Elevated panels */
24
+ --bg-border: #252A38 /* Borders */
25
+
26
+ --accent-cyan: #00E5FF /* Primary accent β€” pipeline glow */
27
+ --accent-green: #00FF94 /* Success, savings, cache hits */
28
+ --accent-amber: #FFB300 /* Warnings, "balanced" tier */
29
+ --accent-red: #FF3D57 /* Errors, expensive routes */
30
+ --accent-purple: #7C4DFF /* ML / AI stage indicators */
31
+
32
+ --text-primary: #E8ECF4 /* Main text */
33
+ --text-secondary: #7A8299 /* Labels, metadata */
34
+ --text-muted: #3D4357 /* Disabled / placeholder */
35
+
36
+ --gradient-glow: linear-gradient(135deg, #00E5FF22, #7C4DFF11)
37
+ ```
38
+
39
+ ---
40
+
41
+ ## Typography
42
+
43
+ ```
44
+ Display / Headers : "JetBrains Mono" (monospace β€” fits the terminal DNA)
45
+ Body / UI Labels : "DM Sans" (clean, readable, modern)
46
+ Data / Numbers : "JetBrains Mono" (monospace alignment for metrics)
47
+ Code Blocks : "Fira Code" with ligatures
48
+
49
+ Sizes:
50
+ --text-xs: 11px
51
+ --text-sm: 13px
52
+ --text-base: 15px
53
+ --text-lg: 18px
54
+ --text-xl: 24px
55
+ --text-2xl: 32px
56
+ --text-3xl: 48px
57
+ ```
58
+
59
+ ---
60
+
61
+ ## Layout Structure
62
+
63
+ ```
64
+ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
65
+ β”‚ TOPBAR: Logo | Nav Tabs | Status Indicators β”‚
66
+ β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
67
+ β”‚ β”‚ β”‚
68
+ β”‚ LEFT PANEL β”‚ MAIN CONTENT AREA β”‚
69
+ β”‚ (280px) β”‚ β”‚
70
+ β”‚ β”‚ [Query Input + Pipeline Visualizer] β”‚
71
+ β”‚ β€’ Config β”‚ [Response Output] β”‚
72
+ β”‚ β€’ Budget β”‚ [Explainability Card] β”‚
73
+ β”‚ β€’ Providers β”‚ β”‚
74
+ β”‚ β€’ History β”‚ β”‚
75
+ β”‚ β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
76
+ β”‚ β”‚ METRICS STRIP (bottom) β”‚
77
+ β”‚ β”‚ Cost | Tokens | Latency | Savings β”‚
78
+ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
79
+ ```
80
+
81
+ ---
82
+
83
+ ## Page / View Breakdown
84
+
85
+ ### 1. `/` β€” Playground (Main View)
86
+
87
+ The core query interface. This is what users interact with daily.
88
+
89
+ **Components:**
90
+
91
+ #### Query Input Box
92
+ - Large dark textarea with subtle cyan border-glow on focus
93
+ - Font: JetBrains Mono
94
+ - Placeholder: `// Enter your query...`
95
+ - Right side: Budget Mode selector (3 pills: `CHEAP` / `BALANCED` / `QUALITY`)
96
+ - Bottom bar inside textarea: token count estimate, `[RUN]` button (cyan, full-right)
97
+
98
+ #### Pipeline Visualizer (HERO COMPONENT)
99
+ A horizontal animated flow diagram that activates on query submission:
100
+
101
+ ```
102
+ [CACHE] ──► [NLI ANALYZE] ──► [GBR ESTIMATE] ──► [BAYESIAN OPT] ──► [COMPRESS] ──► [ROUTE] ──► [LLM]
103
+ ```
104
+
105
+ - Each stage is a pill/node with icon + label
106
+ - Inactive: `--bg-elevated` fill, `--text-muted` text
107
+ - Active (processing): Cyan pulsing border + glow, animated spinner inside
108
+ - Complete: Green fill, checkmark icon, latency badge underneath (e.g., `12ms`)
109
+ - Skipped (cache hit): Amber fill with "CACHED" label β€” flow skips to end
110
+ - Connecting lines animate left-to-right as each stage completes
111
+
112
+ **Stage Icons:**
113
+ | Stage | Icon |
114
+ |-------|------|
115
+ | Cache | ⚑ (lightning) |
116
+ | NLI Analyze | πŸ” |
117
+ | GBR Estimate | πŸ“Š |
118
+ | Bayesian Opt | βš™οΈ |
119
+ | Compress | πŸ—œοΈ |
120
+ | Route | πŸ”€ |
121
+ | LLM | πŸ€– |
122
+
123
+ #### Response Panel
124
+ - Appears below pipeline after completion
125
+ - Markdown rendering with syntax highlighting (dark theme)
126
+ - Header strip: `Model: claude-3-5-haiku` | `Provider: Anthropic` | copy button
127
+ - Subtle fade-in animation on arrival
128
+
129
+ #### Explainability Card (collapsible)
130
+ - Monospace font block styled like a terminal output
131
+ - Cyan `>` prefix on each line
132
+ - Shows: complexity score, domain, selected model, scoring rationale, cost saved
133
+ - Toggle with `[EXPLAIN]` button next to Run
134
+
135
+ ---
136
+
137
+ ### 2. `/analytics` β€” Observability Dashboard
138
+
139
+ **Components:**
140
+
141
+ #### KPI Row (top 4 cards)
142
+ ```
143
+ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
144
+ β”‚ Total Saved β”‚ β”‚ Avg Latency β”‚ β”‚ Cache Hit % β”‚ β”‚ Total Queriesβ”‚
145
+ β”‚ $12.48 β”‚ β”‚ 840ms β”‚ β”‚ 34% β”‚ β”‚ 1,204 β”‚
146
+ β”‚ ↑ 18% today β”‚ β”‚ ↓ 12% β”‚ β”‚ ↑ 5% β”‚ β”‚ β”‚
147
+ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
148
+ ```
149
+ - Micro sparklines inside each card (7-day trend)
150
+ - Green arrows = good, Red = bad
151
+
152
+ #### Model Usage Breakdown
153
+ - Horizontal stacked bar chart
154
+ - Each provider has a distinct color segment
155
+ - Hover shows: model name, % of queries, avg cost
156
+
157
+ #### Cost Over Time
158
+ - Area chart, cyan fill with glow
159
+ - X-axis: time (last 7 days / 30 days toggle)
160
+ - Y-axis: USD
161
+ - Dotted line showing "cost if all GPT-4o" β€” dramatic visual of savings
162
+
163
+ #### Query Log Table
164
+ ```
165
+ Timestamp | Query Preview | Complexity | Model Used | Cost | Latency | Score
166
+ ```
167
+ - Zebra striping with `--bg-surface` / `--bg-elevated`
168
+ - Complexity shown as colored bar (green β†’ amber β†’ red)
169
+ - Clickable rows expand to show full explainability output
170
+
171
+ ---
172
+
173
+ ### 3. `/models` β€” Model Registry
174
+
175
+ **Components:**
176
+
177
+ #### Model Cards Grid (2-col)
178
+ Each card:
179
+ - Model name (large, monospace)
180
+ - Provider badge (colored pill)
181
+ - Capability score as radial gauge (0–1)
182
+ - Pricing: Input / Output per 1k tokens
183
+ - "Best For" tag
184
+ - Toggle: Enable / Disable this model
185
+
186
+ #### Comparison Table
187
+ - Sortable columns: Capability, Input Cost, Output Cost, Best For
188
+ - Highlight the "Best Value" row with cyan left border
189
+
190
+ ---
191
+
192
+ ### 4. `/settings` β€” Configuration
193
+
194
+ **Components:**
195
+ - API Key inputs per provider (masked, with test button)
196
+ - Redis URL config
197
+ - Budget weight sliders (Ξ± Cost / Ξ² Tokens / Ξ³ Quality) with live formula display
198
+ - Compression toggle + threshold slider
199
+ - Evaluation (LLM-as-Judge) toggle
200
+
201
+ ---
202
+
203
+ ## Component Design Details
204
+
205
+ ### Sidebar Navigation
206
+ ```
207
+ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
208
+ β”‚ ⚑ LLMOpt β”‚ ← Logo: monospace, cyan accent
209
+ β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
210
+ β”‚ β–Έ Playground β”‚ ← Active: cyan left border + bg highlight
211
+ β”‚ β–Έ Analytics β”‚
212
+ β”‚ β–Έ Models β”‚
213
+ β”‚ β–Έ Settings β”‚
214
+ β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
215
+ β”‚ SYSTEM STATUS β”‚
216
+ β”‚ ● Redis OK β”‚ ← Green dot
217
+ β”‚ ● ML Deps OK β”‚
218
+ β”‚ ● Cache 34% β”‚
219
+ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
220
+ ```
221
+
222
+ ### Budget Mode Pills
223
+ ```
224
+ [ CHEAP ] [ BALANCED ] [ QUALITY ]
225
+ ```
226
+ - Inactive: `--bg-elevated` + `--text-secondary`
227
+ - Active CHEAP: Green fill
228
+ - Active BALANCED: Amber fill
229
+ - Active QUALITY: Cyan fill
230
+
231
+ ### Metric Cards
232
+ ```css
233
+ .metric-card {
234
+ background: var(--bg-surface);
235
+ border: 1px solid var(--bg-border);
236
+ border-radius: 8px;
237
+ padding: 20px 24px;
238
+ position: relative;
239
+ overflow: hidden;
240
+ }
241
+ .metric-card::before {
242
+ content: '';
243
+ position: absolute;
244
+ top: 0; left: 0; right: 0;
245
+ height: 2px;
246
+ background: var(--accent-cyan); /* or green/amber/purple per card */
247
+ }
248
+ ```
249
+
250
+ ### Status Dots
251
+ ```css
252
+ .dot-live {
253
+ width: 8px; height: 8px;
254
+ border-radius: 50%;
255
+ background: var(--accent-green);
256
+ box-shadow: 0 0 8px var(--accent-green);
257
+ animation: pulse 2s infinite;
258
+ }
259
+ ```
260
+
261
+ ---
262
+
263
+ ## Animation Spec
264
+
265
+ ### Pipeline Stage Activation
266
+ ```
267
+ Trigger: query submitted
268
+ Sequence:
269
+ t=0ms β†’ CACHE node: border glows cyan, spinner starts
270
+ t=~200ms β†’ CACHE completes (hit/miss), NLI node activates
271
+ t=~400ms β†’ NLI completes, GBR node activates
272
+ ...and so on until ROUTE
273
+ Final → Response panel fades in (opacity 0→1, translateY 8px→0, 300ms ease)
274
+ ```
275
+
276
+ ### Page Load
277
+ - Sidebar slides in from left (translateX -100% β†’ 0, 400ms ease-out)
278
+ - KPI cards stagger in with 80ms delay each (opacity 0→1, translateY 16px→0)
279
+ - Chart areas draw from left (width 0β†’100%, 600ms ease-in-out)
280
+
281
+ ### Hover States
282
+ - Cards: `border-color` transitions to `--accent-cyan` at 30% opacity
283
+ - Buttons: subtle scale(1.02) + glow intensification
284
+ - Table rows: `--bg-elevated` background fill
285
+
286
+ ---
287
+
288
+ ## Responsive Breakpoints
289
+
290
+ ```
291
+ Desktop (β‰₯1280px) : Full 2-panel layout as described
292
+ Tablet (β‰₯768px) : Sidebar collapses to icon rail (48px)
293
+ Mobile (<768px) : Full-screen single column, bottom tab nav
294
+ ```
295
+
296
+ ---
297
+
298
+ ## Tech Stack Recommendation
299
+
300
+ ```
301
+ Framework : React 18 + TypeScript
302
+ Styling : Tailwind CSS + CSS custom properties for theming
303
+ Charts : Recharts (area, bar, sparklines)
304
+ Animation : Framer Motion (pipeline, page transitions)
305
+ Markdown : react-markdown + react-syntax-highlighter
306
+ Icons : Lucide React
307
+ API Client : axios / fetch with React Query for caching
308
+ State : Zustand (lightweight global state)
309
+ ```
310
+
311
+ ---
312
+
313
+ ## Key UX Principles
314
+
315
+ 1. **Show, don't tell** β€” the pipeline animation IS the explainability
316
+ 2. **Every number has context** β€” cost shown alongside "vs GPT-4o baseline"
317
+ 3. **Progressive disclosure** β€” simple by default, deep data on demand
318
+ 4. **Zero loading skeletons** β€” use optimistic UI and instant local feedback
319
+ 5. **Error states are designed** β€” not afterthoughts. Red glow on failed stages, clear recovery path.
320
+
321
+ ---
322
+
323
+ ## Sample Data / Placeholders
324
+
325
+ Use these for mockups:
326
+
327
+ ```json
328
+ {
329
+ "query": "Write a recursive Fibonacci function in Rust",
330
+ "model_used": "claude-3-5-haiku-20241022",
331
+ "provider": "anthropic",
332
+ "complexity_score": 0.62,
333
+ "complexity_tier": "hard",
334
+ "estimated_cost": 0.001452,
335
+ "tokens_saved": 28,
336
+ "compression_ratio": 0.21,
337
+ "latency_ms": 1140,
338
+ "evaluation": {
339
+ "overall": 9.5,
340
+ "accuracy": 10.0,
341
+ "feedback": "The code is idiomatic and correctly implements recursion."
342
+ }
343
+ }
344
+ ```
345
+
346
+ ---
347
+
348
+ ## Deliverables Checklist for Agent
349
+
350
+ - [ ] `App.tsx` β€” root layout with sidebar + router
351
+ - [ ] `Playground.tsx` β€” main query interface
352
+ - [ ] `PipelineVisualizer.tsx` β€” animated stage flow
353
+ - [ ] `ResponsePanel.tsx` β€” markdown response display
354
+ - [ ] `ExplainCard.tsx` β€” monospace terminal-style explanation
355
+ - [ ] `Analytics.tsx` β€” dashboard with charts
356
+ - [ ] `ModelRegistry.tsx` β€” model cards + table
357
+ - [ ] `Settings.tsx` β€” config form
358
+ - [ ] `theme.css` β€” all CSS variables
359
+ - [ ] `components/MetricCard.tsx`
360
+ - [ ] `components/BudgetPills.tsx`
361
+ - [ ] `components/StatusDot.tsx`
frontend/.gitignore ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Logs
2
+ logs
3
+ *.log
4
+ npm-debug.log*
5
+ yarn-debug.log*
6
+ yarn-error.log*
7
+ pnpm-debug.log*
8
+ lerna-debug.log*
9
+
10
+ node_modules
11
+ dist
12
+ dist-ssr
13
+ *.local
14
+
15
+ # Editor directories and files
16
+ .vscode/*
17
+ !.vscode/extensions.json
18
+ .idea
19
+ .DS_Store
20
+ *.suo
21
+ *.ntvs*
22
+ *.njsproj
23
+ *.sln
24
+ *.sw?
frontend/README.md ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # React + TypeScript + Vite
2
+
3
+ This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
4
+
5
+ Currently, two official plugins are available:
6
+
7
+ - [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react/README.md) uses [Babel](https://babeljs.io/) for Fast Refresh
8
+ - [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh
9
+
10
+ ## Expanding the ESLint configuration
11
+
12
+ If you are developing a production application, we recommend updating the configuration to enable type aware lint rules:
13
+
14
+ - Configure the top-level `parserOptions` property like this:
15
+
16
+ ```js
17
+ export default tseslint.config({
18
+ languageOptions: {
19
+ // other options...
20
+ parserOptions: {
21
+ project: ['./tsconfig.node.json', './tsconfig.app.json'],
22
+ tsconfigRootDir: import.meta.dirname,
23
+ },
24
+ },
25
+ })
26
+ ```
27
+
28
+ - Replace `tseslint.configs.recommended` to `tseslint.configs.recommendedTypeChecked` or `tseslint.configs.strictTypeChecked`
29
+ - Optionally add `...tseslint.configs.stylisticTypeChecked`
30
+ - Install [eslint-plugin-react](https://github.com/jsx-eslint/eslint-plugin-react) and update the config:
31
+
32
+ ```js
33
+ // eslint.config.js
34
+ import react from 'eslint-plugin-react'
35
+
36
+ export default tseslint.config({
37
+ // Set the react version
38
+ settings: { react: { version: '18.3' } },
39
+ plugins: {
40
+ // Add the react plugin
41
+ react,
42
+ },
43
+ rules: {
44
+ // other rules...
45
+ // Enable its recommended rules
46
+ ...react.configs.recommended.rules,
47
+ ...react.configs['jsx-runtime'].rules,
48
+ },
49
+ })
50
+ ```
frontend/eslint.config.js ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import js from '@eslint/js'
2
+ import globals from 'globals'
3
+ import reactHooks from 'eslint-plugin-react-hooks'
4
+ import reactRefresh from 'eslint-plugin-react-refresh'
5
+ import tseslint from 'typescript-eslint'
6
+
7
+ export default tseslint.config(
8
+ { ignores: ['dist'] },
9
+ {
10
+ extends: [js.configs.recommended, ...tseslint.configs.recommended],
11
+ files: ['**/*.{ts,tsx}'],
12
+ languageOptions: {
13
+ ecmaVersion: 2020,
14
+ globals: globals.browser,
15
+ },
16
+ plugins: {
17
+ 'react-hooks': reactHooks,
18
+ 'react-refresh': reactRefresh,
19
+ },
20
+ rules: {
21
+ ...reactHooks.configs.recommended.rules,
22
+ 'react-refresh/only-export-components': [
23
+ 'warn',
24
+ { allowConstantExport: true },
25
+ ],
26
+ },
27
+ },
28
+ )
frontend/index.html ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!doctype html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>LLMOpt β€” Adaptive LLM Inference Optimization</title>
7
+ <meta name="description" content="LLMOpt is an enterprise-grade LLM gateway that intelligently routes queries to the most cost-effective model, saving costs while maintaining quality." />
8
+ <link rel="preconnect" href="https://fonts.googleapis.com" />
9
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
10
+ <link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;500;600;700&family=DM+Sans:wght@300;400;500;600;700&family=Fira+Code:wght@400;500&display=swap" rel="stylesheet" />
11
+ </head>
12
+ <body>
13
+ <div id="root"></div>
14
+ <script type="module" src="/src/main.tsx"></script>
15
+ </body>
16
+ </html>
frontend/package-lock.json ADDED
The diff for this file is too large to render. See raw diff
 
frontend/package.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "frontend",
3
+ "private": true,
4
+ "version": "0.0.0",
5
+ "type": "module",
6
+ "scripts": {
7
+ "dev": "vite",
8
+ "build": "tsc -b && vite build",
9
+ "lint": "eslint .",
10
+ "preview": "vite preview"
11
+ },
12
+ "dependencies": {
13
+ "@types/react-syntax-highlighter": "^15.5.13",
14
+ "framer-motion": "^12.40.0",
15
+ "lucide-react": "^1.16.0",
16
+ "react": "^18.3.1",
17
+ "react-dom": "^18.3.1",
18
+ "react-markdown": "^10.1.0",
19
+ "react-router-dom": "^7.15.1",
20
+ "react-syntax-highlighter": "^16.1.1",
21
+ "recharts": "^3.8.1",
22
+ "zustand": "^5.0.13"
23
+ },
24
+ "devDependencies": {
25
+ "@eslint/js": "^9.13.0",
26
+ "@types/node": "^25.9.1",
27
+ "@types/react": "^18.3.12",
28
+ "@types/react-dom": "^18.3.1",
29
+ "@vitejs/plugin-react": "^4.3.3",
30
+ "eslint": "^9.13.0",
31
+ "eslint-plugin-react-hooks": "^5.0.0",
32
+ "eslint-plugin-react-refresh": "^0.4.14",
33
+ "globals": "^15.11.0",
34
+ "typescript": "~5.6.2",
35
+ "typescript-eslint": "^8.11.0",
36
+ "vite": "^5.4.10"
37
+ }
38
+ }
frontend/public/vite.svg ADDED
frontend/src/App.css ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #root {
2
+ max-width: 1280px;
3
+ margin: 0 auto;
4
+ padding: 2rem;
5
+ text-align: center;
6
+ }
7
+
8
+ .logo {
9
+ height: 6em;
10
+ padding: 1.5em;
11
+ will-change: filter;
12
+ transition: filter 300ms;
13
+ }
14
+ .logo:hover {
15
+ filter: drop-shadow(0 0 2em #646cffaa);
16
+ }
17
+ .logo.react:hover {
18
+ filter: drop-shadow(0 0 2em #61dafbaa);
19
+ }
20
+
21
+ @keyframes logo-spin {
22
+ from {
23
+ transform: rotate(0deg);
24
+ }
25
+ to {
26
+ transform: rotate(360deg);
27
+ }
28
+ }
29
+
30
+ @media (prefers-reduced-motion: no-preference) {
31
+ a:nth-of-type(2) .logo {
32
+ animation: logo-spin infinite 20s linear;
33
+ }
34
+ }
35
+
36
+ .card {
37
+ padding: 2em;
38
+ }
39
+
40
+ .read-the-docs {
41
+ color: #888;
42
+ }
frontend/src/App.tsx ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { BrowserRouter, Routes, Route, NavLink, useLocation } from 'react-router-dom';
2
+ import { motion, AnimatePresence } from 'framer-motion';
3
+ import { useEffect } from 'react';
4
+ import { Zap, LayoutDashboard, BarChart3, Database, Settings, LogOut, ChevronLeft, ChevronRight } from 'lucide-react';
5
+ import { useAppStore } from './store';
6
+ import { api, getStoredSession, setStoredSession } from './api';
7
+
8
+ import Playground from './pages/Playground';
9
+ import Analytics from './pages/Analytics';
10
+ import ModelRegistry from './pages/ModelRegistry';
11
+ import SettingsPage from './pages/Settings';
12
+ import LoginPage from './pages/Login';
13
+
14
+ function Sidebar({ collapsed, setCollapsed }: { collapsed: boolean; setCollapsed: (v: boolean) => void }) {
15
+ const { health, auth, setAuth } = useAppStore();
16
+ const location = useLocation();
17
+
18
+ const navItems = [
19
+ { to: '/', icon: <LayoutDashboard size={18} />, label: 'Playground', exact: true },
20
+ { to: '/analytics', icon: <BarChart3 size={18} />, label: 'Analytics' },
21
+ { to: '/models', icon: <Database size={18} />, label: 'Models' },
22
+ { to: '/settings', icon: <Settings size={18} />, label: 'Settings' },
23
+ ];
24
+
25
+ const handleLogout = async () => {
26
+ try {
27
+ await api.logout();
28
+ } catch (_) { /* ignore */ }
29
+ setStoredSession(null);
30
+ setAuth({ isLoggedIn: false, sessionId: null });
31
+ };
32
+
33
+ const dotClass = (s: string) =>
34
+ s === 'ok' ? 'dot dot-live' : s === 'error' ? 'dot dot-error' : 'dot dot-muted';
35
+
36
+ return (
37
+ <motion.aside
38
+ className={`sidebar${collapsed ? ' collapsed' : ''}`}
39
+ initial={false}
40
+ animate={{ width: collapsed ? 56 : 280 }}
41
+ transition={{ duration: 0.25, ease: 'easeInOut' }}
42
+ >
43
+ {/* Logo */}
44
+ <div className="sidebar-logo">
45
+ <div className="sidebar-logo-icon"><Zap size={22} fill="currentColor" /></div>
46
+ {!collapsed && (
47
+ <div className="sidebar-logo-text">LLM<span>Opt</span></div>
48
+ )}
49
+ <button
50
+ onClick={() => setCollapsed(!collapsed)}
51
+ style={{
52
+ marginLeft: 'auto',
53
+ background: 'transparent',
54
+ border: 'none',
55
+ color: 'var(--text-muted)',
56
+ cursor: 'pointer',
57
+ display: 'flex',
58
+ alignItems: 'center',
59
+ padding: '4px',
60
+ borderRadius: '4px',
61
+ flexShrink: 0,
62
+ }}
63
+ >
64
+ {collapsed ? <ChevronRight size={16} /> : <ChevronLeft size={16} />}
65
+ </button>
66
+ </div>
67
+
68
+ {/* Nav */}
69
+ <nav className="sidebar-nav">
70
+ {!collapsed && <div className="sidebar-section-label">Navigation</div>}
71
+ {navItems.map((item) => {
72
+ const isActive = item.exact
73
+ ? location.pathname === item.to
74
+ : location.pathname.startsWith(item.to) && item.to !== '/';
75
+ return (
76
+ <NavLink
77
+ key={item.to}
78
+ to={item.to}
79
+ className={`sidebar-nav-item${isActive ? ' active' : ''}`}
80
+ data-tooltip={collapsed ? item.label : undefined}
81
+ >
82
+ <span className="sidebar-nav-icon">{item.icon}</span>
83
+ {!collapsed && <span>{item.label}</span>}
84
+ </NavLink>
85
+ );
86
+ })}
87
+
88
+ <div style={{ flex: 1 }} />
89
+
90
+ {auth.isLoggedIn && (
91
+ <button
92
+ className="sidebar-nav-item"
93
+ onClick={handleLogout}
94
+ data-tooltip={collapsed ? 'Sign Out' : undefined}
95
+ >
96
+ <span className="sidebar-nav-icon"><LogOut size={18} /></span>
97
+ {!collapsed && <span>Sign Out</span>}
98
+ </button>
99
+ )}
100
+ </nav>
101
+
102
+ {/* Status */}
103
+ {!collapsed && (
104
+ <div className="sidebar-status">
105
+ <div className="sidebar-status-title">System Status</div>
106
+ <div className="sidebar-status-item">
107
+ <span className={dotClass(health.api)} />
108
+ <span>API</span>
109
+ <span style={{ marginLeft: 'auto', color: health.api === 'ok' ? 'var(--accent-green)' : 'var(--accent-red)' }}>
110
+ {health.api.toUpperCase()}
111
+ </span>
112
+ </div>
113
+ <div className="sidebar-status-item">
114
+ <span className={dotClass(health.redis)} />
115
+ <span>Redis</span>
116
+ <span style={{ marginLeft: 'auto', color: health.redis === 'ok' ? 'var(--accent-green)' : 'var(--text-muted)' }}>
117
+ {health.redis.toUpperCase()}
118
+ </span>
119
+ </div>
120
+ <div className="sidebar-status-item">
121
+ <span className={dotClass(health.ml_deps)} />
122
+ <span>ML Deps</span>
123
+ <span style={{ marginLeft: 'auto', color: health.ml_deps === 'ok' ? 'var(--accent-green)' : 'var(--text-muted)' }}>
124
+ {health.ml_deps.toUpperCase()}
125
+ </span>
126
+ </div>
127
+ </div>
128
+ )}
129
+ </motion.aside>
130
+ );
131
+ }
132
+
133
+ function AppShell() {
134
+ const { auth, setAuth, sidebarCollapsed, setSidebarCollapsed, setConnectedProviders } = useAppStore();
135
+ const location = useLocation();
136
+
137
+ // Restore session from localStorage on startup
138
+ useEffect(() => {
139
+ const stored = getStoredSession();
140
+ if (stored) {
141
+ setAuth({ isLoggedIn: true, sessionId: stored });
142
+ }
143
+ }, []);
144
+
145
+ // Poll health
146
+ useEffect(() => {
147
+ const check = async () => {
148
+ try {
149
+ await api.health();
150
+ } catch { /* ignore */ }
151
+ };
152
+ check();
153
+ const t = setInterval(check, 30000);
154
+ return () => clearInterval(t);
155
+ }, []);
156
+
157
+ // Poll connected providers if logged in
158
+ useEffect(() => {
159
+ if (!auth.isLoggedIn) return;
160
+ const check = async () => {
161
+ try {
162
+ const data = await api.getKeys();
163
+ setConnectedProviders(data.connected_providers);
164
+ } catch (_) {}
165
+ };
166
+ check();
167
+ }, [auth.isLoggedIn, setConnectedProviders]);
168
+
169
+ if (!auth.isLoggedIn) {
170
+ return (
171
+ <AnimatePresence mode="wait">
172
+ <motion.div
173
+ key="login"
174
+ initial={{ opacity: 0 }}
175
+ animate={{ opacity: 1 }}
176
+ exit={{ opacity: 0 }}
177
+ transition={{ duration: 0.3 }}
178
+ >
179
+ <LoginPage />
180
+ </motion.div>
181
+ </AnimatePresence>
182
+ );
183
+ }
184
+
185
+ return (
186
+ <div className="app-layout">
187
+ <Sidebar collapsed={sidebarCollapsed} setCollapsed={setSidebarCollapsed} />
188
+ <main className={`main-content${sidebarCollapsed ? ' sidebar-collapsed' : ''}`}>
189
+ <AnimatePresence mode="wait">
190
+ <motion.div
191
+ key={location.pathname}
192
+ initial={{ opacity: 0, y: 8 }}
193
+ animate={{ opacity: 1, y: 0 }}
194
+ exit={{ opacity: 0, y: -8 }}
195
+ transition={{ duration: 0.2 }}
196
+ style={{ flex: 1, overflow: 'hidden', display: 'flex', flexDirection: 'column', minHeight: 0 }}
197
+ >
198
+ <Routes>
199
+ <Route path="/" element={<Playground />} />
200
+ <Route path="/analytics" element={<Analytics />} />
201
+ <Route path="/models" element={<ModelRegistry />} />
202
+ <Route path="/settings" element={<SettingsPage />} />
203
+ </Routes>
204
+ </motion.div>
205
+ </AnimatePresence>
206
+ </main>
207
+ </div>
208
+ );
209
+ }
210
+
211
+ export default function App() {
212
+ return (
213
+ <BrowserRouter basename="/ui">
214
+ <AppShell />
215
+ </BrowserRouter>
216
+ );
217
+ }
frontend/src/api.ts ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // API client for LLMOpt backend
2
+ // Session ID is stored in localStorage and sent as Authorization: Bearer <token>
3
+ // This avoids httponly cookie issues on localhost
4
+ import type {
5
+ GenerateRequest,
6
+ GenerateResponse,
7
+ ExplainResponse,
8
+ HistoryItem,
9
+ DashboardStats,
10
+ ModelSpec,
11
+ } from './types';
12
+
13
+ const BASE = ''; // same-origin (served by FastAPI or proxied by Vite)
14
+
15
+ // Get session token from localStorage
16
+ export function getStoredSession(): string | null {
17
+ return localStorage.getItem('llmopt_session');
18
+ }
19
+
20
+ export function setStoredSession(id: string | null) {
21
+ if (id) localStorage.setItem('llmopt_session', id);
22
+ else localStorage.removeItem('llmopt_session');
23
+ }
24
+
25
+ async function request<T>(
26
+ path: string,
27
+ options: RequestInit = {},
28
+ requiresAuth = true,
29
+ ): Promise<T> {
30
+ const headers: Record<string, string> = {
31
+ 'Content-Type': 'application/json',
32
+ ...(options.headers as Record<string, string> || {}),
33
+ };
34
+
35
+ // Inject session token as Bearer header
36
+ if (requiresAuth) {
37
+ const session = getStoredSession();
38
+ if (session) {
39
+ headers['Authorization'] = `Bearer ${session}`;
40
+ }
41
+ }
42
+
43
+ const res = await fetch(`${BASE}${path}`, {
44
+ credentials: 'include',
45
+ ...options,
46
+ headers,
47
+ });
48
+
49
+ if (!res.ok) {
50
+ const body = await res.json().catch(() => ({ detail: res.statusText }));
51
+ const err = new Error(body.detail || `HTTP ${res.status}`);
52
+ (err as any).status = res.status;
53
+ throw err;
54
+ }
55
+
56
+ return res.json();
57
+ }
58
+
59
+ export const api = {
60
+ health: () => request<{ status: string; version: string }>('/health', {}, false),
61
+
62
+ generate: (req: GenerateRequest) =>
63
+ request<GenerateResponse>('/generate', {
64
+ method: 'POST',
65
+ body: JSON.stringify(req),
66
+ }),
67
+
68
+ explain: (
69
+ query: string,
70
+ budget_mode: string,
71
+ params?: {
72
+ alpha?: number;
73
+ beta?: number;
74
+ gamma?: number;
75
+ compression_enabled?: boolean;
76
+ exclude_providers?: string[];
77
+ only_providers?: string[];
78
+ }
79
+ ) =>
80
+ request<ExplainResponse>('/explain', {
81
+ method: 'POST',
82
+ body: JSON.stringify({ query, budget_mode, ...params }),
83
+ }), // explain requires auth to access session keys
84
+
85
+ models: () =>
86
+ request<{ models: ModelSpec[] }>('/models', {}, false),
87
+
88
+ // Auth
89
+ register: (email: string, password: string) =>
90
+ request<{ message: string }>('/auth/register', {
91
+ method: 'POST',
92
+ body: JSON.stringify({ email, password }),
93
+ }, false),
94
+
95
+ login: (email: string, password: string) =>
96
+ request<{ message: string; session_id: string }>('/auth/login', {
97
+ method: 'POST',
98
+ body: JSON.stringify({ email, password }),
99
+ }, false),
100
+
101
+ logout: () =>
102
+ request<{ message: string }>('/auth/logout', { method: 'POST' }),
103
+
104
+ getKeys: () =>
105
+ request<{ connected_providers: string[] }>('/auth/keys'),
106
+
107
+ updateKeys: (api_keys: Record<string, string>) =>
108
+ request<{ message: string }>('/auth/keys', {
109
+ method: 'POST',
110
+ body: JSON.stringify({ api_keys }),
111
+ }),
112
+
113
+ deleteKey: (provider: string) =>
114
+ request<{ message: string }>(`/auth/keys/${provider}`, {
115
+ method: 'DELETE',
116
+ }),
117
+
118
+
119
+ getDashboardStats: () =>
120
+ request<DashboardStats>('/auth/dashboard-stats'),
121
+
122
+ getHistory: () =>
123
+ request<HistoryItem[]>('/auth/history'),
124
+
125
+ // Streaming
126
+ stream: async (req: GenerateRequest, onChunk: (chunk: string) => void) => {
127
+ const session = getStoredSession();
128
+ const headers: Record<string, string> = { 'Content-Type': 'application/json' };
129
+ if (session) headers['Authorization'] = `Bearer ${session}`;
130
+
131
+ const res = await fetch('/stream', {
132
+ method: 'POST',
133
+ credentials: 'include',
134
+ headers,
135
+ body: JSON.stringify(req),
136
+ });
137
+
138
+ if (!res.ok) {
139
+ const body = await res.json().catch(() => ({ detail: res.statusText }));
140
+ throw new Error(body.detail || `HTTP ${res.status}`);
141
+ }
142
+
143
+ const reader = res.body?.getReader();
144
+ const decoder = new TextDecoder();
145
+ if (!reader) return;
146
+
147
+ while (true) {
148
+ const { done, value } = await reader.read();
149
+ if (done) break;
150
+ onChunk(decoder.decode(value, { stream: true }));
151
+ }
152
+ },
153
+ };
frontend/src/assets/react.svg ADDED
frontend/src/index.css ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ :root {
2
+ font-family: Inter, system-ui, Avenir, Helvetica, Arial, sans-serif;
3
+ line-height: 1.5;
4
+ font-weight: 400;
5
+
6
+ color-scheme: light dark;
7
+ color: rgba(255, 255, 255, 0.87);
8
+ background-color: #242424;
9
+
10
+ font-synthesis: none;
11
+ text-rendering: optimizeLegibility;
12
+ -webkit-font-smoothing: antialiased;
13
+ -moz-osx-font-smoothing: grayscale;
14
+ }
15
+
16
+ a {
17
+ font-weight: 500;
18
+ color: #646cff;
19
+ text-decoration: inherit;
20
+ }
21
+ a:hover {
22
+ color: #535bf2;
23
+ }
24
+
25
+ body {
26
+ margin: 0;
27
+ display: flex;
28
+ place-items: center;
29
+ min-width: 320px;
30
+ min-height: 100vh;
31
+ }
32
+
33
+ h1 {
34
+ font-size: 3.2em;
35
+ line-height: 1.1;
36
+ }
37
+
38
+ button {
39
+ border-radius: 8px;
40
+ border: 1px solid transparent;
41
+ padding: 0.6em 1.2em;
42
+ font-size: 1em;
43
+ font-weight: 500;
44
+ font-family: inherit;
45
+ background-color: #1a1a1a;
46
+ cursor: pointer;
47
+ transition: border-color 0.25s;
48
+ }
49
+ button:hover {
50
+ border-color: #646cff;
51
+ }
52
+ button:focus,
53
+ button:focus-visible {
54
+ outline: 4px auto -webkit-focus-ring-color;
55
+ }
56
+
57
+ @media (prefers-color-scheme: light) {
58
+ :root {
59
+ color: #213547;
60
+ background-color: #ffffff;
61
+ }
62
+ a:hover {
63
+ color: #747bff;
64
+ }
65
+ button {
66
+ background-color: #f9f9f9;
67
+ }
68
+ }
frontend/src/main.tsx ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import { StrictMode } from 'react'
2
+ import { createRoot } from 'react-dom/client'
3
+ import './theme.css'
4
+ import App from './App.tsx'
5
+
6
+ createRoot(document.getElementById('root')!).render(
7
+ <StrictMode>
8
+ <App />
9
+ </StrictMode>,
10
+ )
frontend/src/pages/Analytics.tsx ADDED
@@ -0,0 +1,385 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useEffect, useState } from 'react';
2
+ import { motion } from 'framer-motion';
3
+ import { BarChart3, TrendingUp, TrendingDown } from 'lucide-react';
4
+ import {
5
+ AreaChart,
6
+ Area,
7
+ BarChart,
8
+ Bar,
9
+ XAxis,
10
+ YAxis,
11
+ CartesianGrid,
12
+ Tooltip,
13
+ ResponsiveContainer,
14
+ Cell,
15
+ Legend,
16
+ } from 'recharts';
17
+ import { api } from '../api';
18
+ import type { DashboardStats, HistoryItem } from '../types';
19
+
20
+ // ─── Mock time-series data (since backend doesn't expose it yet) ──────────────
21
+
22
+ function generateDailyData(days: number, base: number, variance: number) {
23
+ const now = new Date();
24
+ return Array.from({ length: days }, (_, i) => {
25
+ const d = new Date(now);
26
+ d.setDate(d.getDate() - (days - 1 - i));
27
+ return {
28
+ date: d.toLocaleDateString('en-US', { month: 'short', day: 'numeric' }),
29
+ actual: Math.max(0, base + (Math.random() - 0.5) * variance),
30
+ baseline: base * 8,
31
+ };
32
+ });
33
+ }
34
+
35
+ const PROVIDER_COLORS: Record<string, string> = {
36
+ openai: '#00E5FF',
37
+ anthropic: '#7C4DFF',
38
+ google: '#00FF94',
39
+ ollama: '#FFB300',
40
+ cohere: '#FF3D57',
41
+ other: '#7A8299',
42
+ };
43
+
44
+ // ─── Custom Tooltip ───────────────────────────────────────────────────────────
45
+
46
+ const CustomTooltip = ({ active, payload, label }: any) => {
47
+ if (!active || !payload?.length) return null;
48
+ return (
49
+ <div style={{
50
+ background: 'var(--bg-elevated)',
51
+ border: '1px solid var(--bg-border)',
52
+ borderRadius: '8px',
53
+ padding: '12px 16px',
54
+ fontSize: 'var(--text-xs)',
55
+ fontFamily: 'JetBrains Mono, monospace',
56
+ }}>
57
+ <div style={{ color: 'var(--text-secondary)', marginBottom: 8 }}>{label}</div>
58
+ {payload.map((p: any) => (
59
+ <div key={p.name} style={{ color: p.color, marginBottom: 4 }}>
60
+ {p.name}: ${typeof p.value === 'number' ? p.value.toFixed(4) : p.value}
61
+ </div>
62
+ ))}
63
+ </div>
64
+ );
65
+ };
66
+
67
+ // ─── KPI Card ────────────────────────────────────────────────────────────────
68
+
69
+ function KPICard({
70
+ label,
71
+ value,
72
+ delta,
73
+ deltaPositive,
74
+ color,
75
+ delay,
76
+ }: {
77
+ label: string;
78
+ value: string;
79
+ delta?: string;
80
+ deltaPositive?: boolean;
81
+ color: string;
82
+ delay: number;
83
+ }) {
84
+ return (
85
+ <motion.div
86
+ className={`metric-card ${color}`}
87
+ initial={{ opacity: 0, y: 16 }}
88
+ animate={{ opacity: 1, y: 0 }}
89
+ transition={{ delay, duration: 0.4 }}
90
+ >
91
+ <div className="metric-card-label">{label}</div>
92
+ <div className="metric-card-value">{value}</div>
93
+ {delta && (
94
+ <div className={`metric-card-delta ${deltaPositive ? 'delta-up' : 'delta-down'}`}>
95
+ {deltaPositive ? <TrendingUp size={12} /> : <TrendingDown size={12} />}
96
+ {delta}
97
+ </div>
98
+ )}
99
+ </motion.div>
100
+ );
101
+ }
102
+
103
+ // ─── Query Log Table ──────────────────────────────────────────────────────────
104
+
105
+ function ComplexityBar({ score }: { score: number }) {
106
+ const color =
107
+ score < 0.4 ? 'var(--accent-green)' :
108
+ score < 0.7 ? 'var(--accent-amber)' :
109
+ 'var(--accent-red)';
110
+
111
+ return (
112
+ <div className="complexity-bar">
113
+ <div className="complexity-bar-track">
114
+ <div
115
+ className="complexity-bar-fill"
116
+ style={{ width: `${score * 100}%`, background: color }}
117
+ />
118
+ </div>
119
+ <span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)', color }}>
120
+ {(score * 100).toFixed(0)}
121
+ </span>
122
+ </div>
123
+ );
124
+ }
125
+
126
+
127
+
128
+ function QueryLogTable({ items }: { items: HistoryItem[] }) {
129
+ const [expanded, setExpanded] = useState<number | null>(null);
130
+
131
+ if (items.length === 0) {
132
+ return (
133
+ <div className="empty-state">
134
+ <div className="empty-state-icon">πŸ“‹</div>
135
+ <div className="empty-state-title">No Query History</div>
136
+ <div className="empty-state-desc">Run queries in the Playground to see them here.</div>
137
+ </div>
138
+ );
139
+ }
140
+
141
+ return (
142
+ <div style={{ overflowX: 'auto' }}>
143
+ <table className="data-table">
144
+ <thead>
145
+ <tr>
146
+ <th>Time</th>
147
+ <th>Query</th>
148
+ <th>Complexity</th>
149
+ <th>Model</th>
150
+ <th>Cost</th>
151
+ <th>Latency</th>
152
+ <th>Tier</th>
153
+ </tr>
154
+ </thead>
155
+ <tbody>
156
+ {items.map((item) => (
157
+ <>
158
+ <tr key={item.id} onClick={() => setExpanded(expanded === item.id ? null : item.id)}>
159
+ <td>
160
+ <span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)', color: 'var(--text-muted)' }}>
161
+ {item.time_ago}
162
+ </span>
163
+ </td>
164
+ <td>
165
+ <div className="truncate" style={{ maxWidth: 240, fontSize: 'var(--text-sm)' }}>
166
+ {item.query}
167
+ </div>
168
+ </td>
169
+ <td><ComplexityBar score={item.complexity_score || 0} /></td>
170
+ <td>
171
+ <div style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)', color: 'var(--accent-cyan)' }}>
172
+ {item.model_used?.split('-').slice(-2).join('-') || 'β€”'}
173
+ </div>
174
+ </td>
175
+ <td>
176
+ <span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)', color: 'var(--accent-green)' }}>
177
+ ${(item.estimated_cost || 0).toFixed(6)}
178
+ </span>
179
+ </td>
180
+ <td>
181
+ <span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)' }}>
182
+ {item.latency_ms?.toFixed(0) || 'β€”'}ms
183
+ </span>
184
+ </td>
185
+ <td>
186
+ <span className={`badge badge-${item.complexity_tier === 'easy' ? 'green' : item.complexity_tier === 'hard' ? 'red' : 'amber'}`}>
187
+ {item.complexity_tier || 'std'}
188
+ </span>
189
+ </td>
190
+ </tr>
191
+ {expanded === item.id && (
192
+ <tr key={`${item.id}-exp`}>
193
+ <td colSpan={7} style={{ padding: 0 }}>
194
+ <div style={{
195
+ padding: 'var(--sp-4) var(--sp-5)',
196
+ background: 'var(--bg-base)',
197
+ borderTop: '1px solid var(--bg-border)',
198
+ fontFamily: 'JetBrains Mono, monospace',
199
+ fontSize: 'var(--text-xs)',
200
+ lineHeight: 1.8,
201
+ }}>
202
+ <div style={{ color: 'var(--accent-cyan)' }}>{'>'} Full query:</div>
203
+ <div style={{ color: 'var(--text-secondary)', margin: '4px 0 12px', whiteSpace: 'pre-wrap' }}>{item.query}</div>
204
+ <div style={{ color: 'var(--accent-cyan)' }}>{'>'} Response snippet:</div>
205
+ <div style={{ color: 'var(--text-secondary)', margin: '4px 0', whiteSpace: 'pre-wrap' }}>
206
+ {(item.response || '').slice(0, 400)}{item.response?.length > 400 ? '...' : ''}
207
+ </div>
208
+ <div style={{ display: 'flex', gap: 'var(--sp-6)', marginTop: 12, color: 'var(--text-muted)' }}>
209
+ <span>Tokens in: {item.input_tokens}</span>
210
+ <span>Tokens out: {item.output_tokens}</span>
211
+ <span>Saved: {item.tokens_saved}</span>
212
+ <span>Cost saved: ${(item.cost_saved || 0).toFixed(6)}</span>
213
+ </div>
214
+ </div>
215
+ </td>
216
+ </tr>
217
+ )}
218
+ </>
219
+ ))}
220
+ </tbody>
221
+ </table>
222
+ </div>
223
+ );
224
+ }
225
+
226
+ // ─── Analytics Page ───────────────────────────────────────────────────────────
227
+
228
+ export default function Analytics() {
229
+ const [stats, setStats] = useState<DashboardStats | null>(null);
230
+ const [history, setHistory] = useState<HistoryItem[]>([]);
231
+ const [loading, setLoading] = useState(true);
232
+ const [error, setError] = useState('');
233
+ const [chartRange, setChartRange] = useState<7 | 30>(7);
234
+
235
+ const costData = generateDailyData(chartRange, 0.05, 0.08);
236
+ const modelData = stats
237
+ ? Object.entries(stats.distribution).map(([name, pct]) => ({
238
+ name: name.charAt(0).toUpperCase() + name.slice(1),
239
+ value: pct,
240
+ fill: PROVIDER_COLORS[name] || PROVIDER_COLORS.other,
241
+ }))
242
+ : [];
243
+
244
+ useEffect(() => {
245
+ const load = async () => {
246
+ try {
247
+ const [s, h] = await Promise.all([
248
+ api.getDashboardStats(),
249
+ api.getHistory(),
250
+ ]);
251
+ setStats(s);
252
+ setHistory(h);
253
+ } catch (e: any) {
254
+ setError(e.message || 'Failed to load analytics');
255
+ } finally {
256
+ setLoading(false);
257
+ }
258
+ };
259
+ load();
260
+ }, []);
261
+
262
+ if (loading) {
263
+ return (
264
+ <div style={{ display: 'flex', alignItems: 'center', justifyContent: 'center', height: '100%' }}>
265
+ <span className="spinner" style={{ width: 32, height: 32, borderWidth: 3 }} />
266
+ </div>
267
+ );
268
+ }
269
+
270
+ return (
271
+ <div style={{ display: 'flex', flexDirection: 'column', flex: 1, minHeight: 0, overflow: 'hidden' }}>
272
+ <div className="topbar">
273
+ <div className="topbar-breadcrumb">
274
+ <BarChart3 size={14} style={{ color: 'var(--accent-cyan)' }} />
275
+ <strong>Analytics</strong>
276
+ <span style={{ color: 'var(--text-muted)' }}>/ Observability Dashboard</span>
277
+ </div>
278
+ </div>
279
+
280
+ <div className="page-content" style={{ display: 'flex', flexDirection: 'column', gap: 'var(--sp-5)', flex: 1, overflowY: 'auto' }}>
281
+ {error && <div className="auth-error">⚠ {error}</div>}
282
+
283
+ {/* KPI Row */}
284
+ <div className="grid-4">
285
+ <KPICard label="Total Saved" value={stats?.routing_savings || '$0.00'} delta="from routing" deltaPositive color="green" delay={0} />
286
+ <KPICard label="Queries Run" value={String(stats?.prompts_improved || 0)} color="cyan" delay={0.08} />
287
+ <KPICard label="Avg Quality Boost" value={stats?.avg_boost || '0%'} delta="complexity-adjusted" deltaPositive color="purple" delay={0.16} />
288
+ <KPICard label="Tokens Saved" value={stats?.tokens_saved || '0'} delta="vs uncompressed" deltaPositive color="amber" delay={0.24} />
289
+ </div>
290
+
291
+ {/* Cost Over Time + Model Distribution */}
292
+ <div className="grid-2">
293
+ <div className="card">
294
+ <div className="card-header">
295
+ <div>
296
+ <div className="card-title">Cost Over Time</div>
297
+ <div className="card-subtitle">Actual vs GPT-4o baseline (USD)</div>
298
+ </div>
299
+ <div style={{ display: 'flex', gap: 'var(--sp-2)' }}>
300
+ {([7, 30] as const).map((d) => (
301
+ <button
302
+ key={d}
303
+ className={`btn btn-ghost btn-sm${chartRange === d ? ' active' : ''}`}
304
+ style={chartRange === d ? { borderColor: 'var(--accent-cyan)', color: 'var(--accent-cyan)' } : {}}
305
+ onClick={() => setChartRange(d)}
306
+ >
307
+ {d}d
308
+ </button>
309
+ ))}
310
+ </div>
311
+ </div>
312
+ <div className="card-body">
313
+ <ResponsiveContainer width="100%" height={200}>
314
+ <AreaChart data={costData} margin={{ top: 5, right: 5, bottom: 5, left: 5 }}>
315
+ <defs>
316
+ <linearGradient id="costGrad" x1="0" y1="0" x2="0" y2="1">
317
+ <stop offset="5%" stopColor="#00E5FF" stopOpacity={0.3} />
318
+ <stop offset="95%" stopColor="#00E5FF" stopOpacity={0} />
319
+ </linearGradient>
320
+ <linearGradient id="baseGrad" x1="0" y1="0" x2="0" y2="1">
321
+ <stop offset="5%" stopColor="#FF3D57" stopOpacity={0.1} />
322
+ <stop offset="95%" stopColor="#FF3D57" stopOpacity={0} />
323
+ </linearGradient>
324
+ </defs>
325
+ <CartesianGrid strokeDasharray="3 3" stroke="var(--bg-border)" />
326
+ <XAxis dataKey="date" tick={{ fill: 'var(--text-muted)', fontSize: 11 }} />
327
+ <YAxis tick={{ fill: 'var(--text-muted)', fontSize: 11 }} tickFormatter={(v) => `$${v.toFixed(2)}`} />
328
+ <Tooltip content={<CustomTooltip />} />
329
+ <Legend wrapperStyle={{ fontSize: 12, color: 'var(--text-secondary)' }} />
330
+ <Area type="monotone" dataKey="baseline" name="GPT-4o Baseline" stroke="#FF3D57" strokeDasharray="5 5" fill="url(#baseGrad)" strokeWidth={2} />
331
+ <Area type="monotone" dataKey="actual" name="LLMOpt Actual" stroke="#00E5FF" fill="url(#costGrad)" strokeWidth={2} dot={{ fill: '#00E5FF', r: 3 }} />
332
+ </AreaChart>
333
+ </ResponsiveContainer>
334
+ </div>
335
+ </div>
336
+
337
+ <div className="card">
338
+ <div className="card-header">
339
+ <div>
340
+ <div className="card-title">Model Usage</div>
341
+ <div className="card-subtitle">Distribution by provider (%)</div>
342
+ </div>
343
+ </div>
344
+ <div className="card-body">
345
+ {modelData.length > 0 ? (
346
+ <ResponsiveContainer width="100%" height={200}>
347
+ <BarChart data={modelData} layout="vertical" margin={{ top: 5, right: 20, bottom: 5, left: 60 }}>
348
+ <CartesianGrid strokeDasharray="3 3" stroke="var(--bg-border)" horizontal={false} />
349
+ <XAxis type="number" tick={{ fill: 'var(--text-muted)', fontSize: 11 }} tickFormatter={(v) => `${v}%`} />
350
+ <YAxis type="category" dataKey="name" tick={{ fill: 'var(--text-secondary)', fontSize: 12, fontFamily: 'JetBrains Mono' }} />
351
+ <Tooltip
352
+ formatter={(v: any) => [`${v}%`, 'Share']}
353
+ contentStyle={{ background: 'var(--bg-elevated)', border: '1px solid var(--bg-border)', borderRadius: 8, fontSize: 12 }}
354
+ />
355
+ <Bar dataKey="value" radius={[0, 4, 4, 0]}>
356
+ {modelData.map((entry, i) => (
357
+ <Cell key={i} fill={entry.fill} />
358
+ ))}
359
+ </Bar>
360
+ </BarChart>
361
+ </ResponsiveContainer>
362
+ ) : (
363
+ <div className="empty-state" style={{ padding: 'var(--sp-8)' }}>
364
+ <div className="empty-state-title">No data yet</div>
365
+ <div className="empty-state-desc">Run queries to see model distribution.</div>
366
+ </div>
367
+ )}
368
+ </div>
369
+ </div>
370
+ </div>
371
+
372
+ {/* Query Log */}
373
+ <div className="card">
374
+ <div className="card-header">
375
+ <div>
376
+ <div className="card-title">Query Log</div>
377
+ <div className="card-subtitle">Last 20 requests β€” click to expand</div>
378
+ </div>
379
+ </div>
380
+ <QueryLogTable items={history} />
381
+ </div>
382
+ </div>
383
+ </div>
384
+ );
385
+ }
frontend/src/pages/Login.tsx ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState } from 'react';
2
+ import { motion, AnimatePresence } from 'framer-motion';
3
+ import { Zap } from 'lucide-react';
4
+ import { api, setStoredSession } from '../api';
5
+ import { useAppStore } from '../store';
6
+
7
+ type Mode = 'login' | 'register';
8
+
9
+ export default function LoginPage() {
10
+ const { setAuth, setConnectedProviders } = useAppStore();
11
+ const [mode, setMode] = useState<Mode>('login');
12
+ const [email, setEmail] = useState('');
13
+ const [password, setPassword] = useState('');
14
+ const [loading, setLoading] = useState(false);
15
+ const [error, setError] = useState('');
16
+
17
+ const handleSubmit = async (e: React.FormEvent) => {
18
+ e.preventDefault();
19
+ if (!email.trim() || !password.trim()) return;
20
+ setLoading(true);
21
+ setError('');
22
+
23
+ try {
24
+ if (mode === 'register') {
25
+ await api.register(email, password);
26
+ // Auto-login after register
27
+ }
28
+ const data = await api.login(email, password);
29
+ setStoredSession(data.session_id);
30
+ setAuth({ isLoggedIn: true, sessionId: data.session_id });
31
+
32
+ // Fetch connected providers
33
+ try {
34
+ const keys = await api.getKeys();
35
+ setConnectedProviders(keys.connected_providers);
36
+ } catch (_) {}
37
+ } catch (e: any) {
38
+ setError(e.message || 'Authentication failed');
39
+ } finally {
40
+ setLoading(false);
41
+ }
42
+ };
43
+
44
+ const handleGoogleLogin = () => {
45
+ window.location.href = '/auth/login/google';
46
+ };
47
+
48
+ const handleGithubLogin = () => {
49
+ window.location.href = '/auth/login/github';
50
+ };
51
+
52
+ return (
53
+ <div className="auth-page">
54
+ {/* Background */}
55
+ <div className="auth-bg-grid" />
56
+ <div className="auth-bg-glow" />
57
+ <div className="auth-bg-glow-2" />
58
+
59
+ {/* Floating particles */}
60
+ {[...Array(6)].map((_, i) => (
61
+ <motion.div
62
+ key={i}
63
+ style={{
64
+ position: 'absolute',
65
+ width: `${4 + i * 2}px`,
66
+ height: `${4 + i * 2}px`,
67
+ borderRadius: '50%',
68
+ background: i % 2 === 0 ? 'var(--accent-cyan)' : 'var(--accent-purple)',
69
+ opacity: 0.3,
70
+ left: `${15 + i * 14}%`,
71
+ top: `${20 + (i % 3) * 25}%`,
72
+ }}
73
+ animate={{
74
+ y: [0, -20, 0],
75
+ opacity: [0.3, 0.6, 0.3],
76
+ }}
77
+ transition={{
78
+ duration: 3 + i * 0.5,
79
+ repeat: Infinity,
80
+ ease: 'easeInOut',
81
+ delay: i * 0.4,
82
+ }}
83
+ />
84
+ ))}
85
+
86
+ <AnimatePresence mode="wait">
87
+ <motion.div
88
+ key={mode}
89
+ className="auth-card"
90
+ initial={{ opacity: 0, y: 24, scale: 0.97 }}
91
+ animate={{ opacity: 1, y: 0, scale: 1 }}
92
+ exit={{ opacity: 0, y: -16, scale: 0.97 }}
93
+ transition={{ duration: 0.3 }}
94
+ >
95
+ {/* Logo */}
96
+ <div className="auth-logo">
97
+ <div className="auth-logo-icon"><Zap size={28} fill="currentColor" /></div>
98
+ <div className="auth-logo-text">LLM<span>Opt</span></div>
99
+ </div>
100
+
101
+ <div className="auth-title">
102
+ {mode === 'login' ? 'Welcome back' : 'Create account'}
103
+ </div>
104
+ <div className="auth-subtitle">
105
+ {mode === 'login'
106
+ ? 'Sign in to your LLMOpt workspace'
107
+ : 'Start optimizing your LLM costs today'}
108
+ </div>
109
+
110
+ {/* OAuth */}
111
+ <div style={{ display: 'flex', gap: 'var(--sp-3)', marginBottom: 'var(--sp-4)' }}>
112
+ <button className="oauth-btn" onClick={handleGoogleLogin}>
113
+ <svg width="18" height="18" viewBox="0 0 24 24">
114
+ <path d="M22.56 12.25c0-.78-.07-1.53-.2-2.25H12v4.26h5.92c-.26 1.37-1.04 2.53-2.21 3.31v2.77h3.57c2.08-1.92 3.28-4.74 3.28-8.09z" fill="#4285F4"/>
115
+ <path d="M12 23c2.97 0 5.46-.98 7.28-2.66l-3.57-2.77c-.98.66-2.23 1.06-3.71 1.06-2.86 0-5.29-1.93-6.16-4.53H2.18v2.84C3.99 20.53 7.7 23 12 23z" fill="#34A853"/>
116
+ <path d="M5.84 14.09c-.22-.66-.35-1.36-.35-2.09s.13-1.43.35-2.09V7.07H2.18C1.43 8.55 1 10.22 1 12s.43 3.45 1.18 4.93l2.85-2.22.81-.62z" fill="#FBBC05"/>
117
+ <path d="M12 5.38c1.62 0 3.06.56 4.21 1.64l3.15-3.15C17.45 2.09 14.97 1 12 1 7.7 1 3.99 3.47 2.18 7.07l3.66 2.84c.87-2.6 3.3-4.53 6.16-4.53z" fill="#EA4335"/>
118
+ </svg>
119
+ Continue with Google
120
+ </button>
121
+ <button className="oauth-btn" onClick={handleGithubLogin}>
122
+ <svg width="18" height="18" viewBox="0 0 24 24" fill="currentColor"><path d="M12 2C6.477 2 2 6.477 2 12c0 4.42 2.865 8.167 6.839 9.49.5.092.682-.217.682-.482 0-.237-.008-.866-.013-1.7-2.782.603-3.369-1.342-3.369-1.342-.454-1.155-1.11-1.462-1.11-1.462-.908-.62.069-.608.069-.608 1.003.07 1.531 1.03 1.531 1.03.892 1.529 2.341 1.087 2.91.832.092-.647.35-1.088.636-1.338-2.22-.253-4.555-1.11-4.555-4.943 0-1.091.39-1.984 1.029-2.683-.103-.253-.446-1.27.098-2.647 0 0 .84-.268 2.75 1.026A9.578 9.578 0 0112 6.836c.85.004 1.705.115 2.504.337 1.909-1.294 2.747-1.026 2.747-1.026.546 1.377.202 2.394.1 2.647.64.699 1.028 1.592 1.028 2.683 0 3.842-2.339 4.687-4.566 4.935.359.309.678.919.678 1.852 0 1.336-.012 2.415-.012 2.743 0 .267.18.578.688.48C19.138 20.163 22 16.418 22 12c0-5.523-4.477-10-10-10z"/></svg>
123
+ GitHub
124
+ </button>
125
+ </div>
126
+
127
+ <div className="auth-divider">
128
+ <div className="auth-divider-line" />
129
+ <div className="auth-divider-text">or</div>
130
+ <div className="auth-divider-line" />
131
+ </div>
132
+
133
+ {/* Form */}
134
+ <form className="auth-form" onSubmit={handleSubmit}>
135
+ <div className="input-group">
136
+ <label className="input-label" htmlFor="auth-email">Email</label>
137
+ <input
138
+ id="auth-email"
139
+ type="email"
140
+ value={email}
141
+ onChange={(e) => setEmail(e.target.value)}
142
+ placeholder="you@company.com"
143
+ autoComplete="email"
144
+ required
145
+ />
146
+ </div>
147
+ <div className="input-group">
148
+ <label className="input-label" htmlFor="auth-password">Password</label>
149
+ <input
150
+ id="auth-password"
151
+ type="password"
152
+ value={password}
153
+ onChange={(e) => setPassword(e.target.value)}
154
+ placeholder="β€’β€’β€’β€’β€’β€’β€’β€’"
155
+ autoComplete={mode === 'login' ? 'current-password' : 'new-password'}
156
+ required
157
+ />
158
+ </div>
159
+
160
+ {error && (
161
+ <motion.div
162
+ className="auth-error"
163
+ initial={{ opacity: 0 }}
164
+ animate={{ opacity: 1 }}
165
+ >
166
+ {error}
167
+ </motion.div>
168
+ )}
169
+
170
+ <button
171
+ id="auth-submit-btn"
172
+ type="submit"
173
+ className="btn btn-primary btn-lg"
174
+ disabled={loading}
175
+ style={{ width: '100%', marginTop: 'var(--sp-2)' }}
176
+ >
177
+ {loading ? (
178
+ <>
179
+ <span className="spinner" />
180
+ {mode === 'login' ? 'Signing in...' : 'Creating account...'}
181
+ </>
182
+ ) : (
183
+ mode === 'login' ? 'Sign In' : 'Create Account'
184
+ )}
185
+ </button>
186
+ </form>
187
+
188
+ <div className="auth-footer">
189
+ {mode === 'login' ? (
190
+ <>
191
+ Don&apos;t have an account?{' '}
192
+ <span className="auth-link" onClick={() => { setMode('register'); setError(''); }}>
193
+ Sign up
194
+ </span>
195
+ </>
196
+ ) : (
197
+ <>
198
+ Already have an account?{' '}
199
+ <span className="auth-link" onClick={() => { setMode('login'); setError(''); }}>
200
+ Sign in
201
+ </span>
202
+ </>
203
+ )}
204
+ </div>
205
+ </motion.div>
206
+ </AnimatePresence>
207
+ </div>
208
+ );
209
+ }
frontend/src/pages/ModelRegistry.tsx ADDED
@@ -0,0 +1,354 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useEffect, useState } from 'react';
2
+ import { motion } from 'framer-motion';
3
+ import { Database, Star, ArrowUpDown } from 'lucide-react';
4
+ import { api } from '../api';
5
+ import type { ModelSpec } from '../types';
6
+
7
+ const PROVIDER_COLORS: Record<string, string> = {
8
+ openai: 'badge-cyan',
9
+ anthropic: 'badge-purple',
10
+ google: 'badge-green',
11
+ ollama: 'badge-amber',
12
+ mistral: 'badge-muted',
13
+ deepseek: 'badge-red',
14
+ cohere: 'badge-muted',
15
+ };
16
+
17
+ function providerBadge(provider: string) {
18
+ const p = provider?.toLowerCase();
19
+ for (const key of Object.keys(PROVIDER_COLORS)) {
20
+ if (p?.includes(key)) return PROVIDER_COLORS[key];
21
+ }
22
+ return 'badge-muted';
23
+ }
24
+
25
+ function CapabilityGauge({ score }: { score: number }) {
26
+ const r = 24;
27
+ const circ = 2 * Math.PI * r;
28
+ const dash = circ * score;
29
+ const color = score >= 0.8 ? 'var(--accent-cyan)' : score >= 0.6 ? 'var(--accent-amber)' : 'var(--accent-red)';
30
+ return (
31
+ <svg width={60} height={60}>
32
+ <circle cx={30} cy={30} r={r} fill="none" stroke="var(--bg-border)" strokeWidth={4} />
33
+ <circle
34
+ cx={30} cy={30} r={r}
35
+ fill="none"
36
+ stroke={color}
37
+ strokeWidth={4}
38
+ strokeDasharray={`${dash} ${circ - dash}`}
39
+ strokeLinecap="round"
40
+ transform="rotate(-90 30 30)"
41
+ style={{ transition: 'stroke-dasharray 0.6s ease' }}
42
+ />
43
+ <text
44
+ x={30} y={35}
45
+ textAnchor="middle"
46
+ fill="var(--text-primary)"
47
+ style={{
48
+ fontSize: '11px',
49
+ fontFamily: 'JetBrains Mono, monospace',
50
+ fontWeight: 700,
51
+ }}
52
+ >
53
+ {(score * 100).toFixed(0)}
54
+ </text>
55
+ </svg>
56
+ );
57
+ }
58
+
59
+ function ModelCard({ model, index }: { model: ModelSpec; index: number }) {
60
+ // Best value: high capability + low cost
61
+ const isBestValue = model.capability_score > 0.7 && model.input_cost_per_1k < 0.002;
62
+ const isLocalFree = model.provider === 'ollama';
63
+
64
+ return (
65
+ <motion.div
66
+ className="model-card"
67
+ style={isBestValue ? { borderColor: 'var(--accent-cyan)', boxShadow: '0 0 20px rgba(0,229,255,0.08)' } : {}}
68
+ initial={{ opacity: 0, y: 16 }}
69
+ animate={{ opacity: 1, y: 0 }}
70
+ transition={{ delay: index * 0.04, duration: 0.35 }}
71
+ >
72
+ {isBestValue && (
73
+ <div style={{ display: 'flex', alignItems: 'center', gap: 6, marginBottom: 'var(--sp-2)' }}>
74
+ <Star size={12} fill="var(--accent-cyan)" color="var(--accent-cyan)" />
75
+ <span style={{ fontSize: 'var(--text-xs)', color: 'var(--accent-cyan)', fontFamily: 'JetBrains Mono, monospace', fontWeight: 700 }}>
76
+ BEST VALUE
77
+ </span>
78
+ </div>
79
+ )}
80
+
81
+ <div style={{ display: 'flex', alignItems: 'flex-start', justifyContent: 'space-between', gap: 'var(--sp-3)' }}>
82
+ <div style={{ flex: 1 }}>
83
+ <div className="model-card-name">{model.model_name}</div>
84
+ <div style={{ marginTop: 'var(--sp-2)', display: 'flex', gap: 'var(--sp-2)', flexWrap: 'wrap' }}>
85
+ <span className={`badge ${providerBadge(model.provider)}`}>{model.provider}</span>
86
+ {isLocalFree && <span className="badge badge-green">FREE LOCAL</span>}
87
+ </div>
88
+ </div>
89
+ <CapabilityGauge score={model.capability_score} />
90
+ </div>
91
+
92
+ <div className="model-card-pricing">
93
+ <div className="model-card-price-item">
94
+ <div className="model-card-price-label">Input /1k</div>
95
+ <div className="model-card-price-value">
96
+ {isLocalFree ? <span style={{ color: 'var(--accent-green)' }}>FREE</span> : `$${model.input_cost_per_1k.toFixed(5)}`}
97
+ </div>
98
+ </div>
99
+ <div className="model-card-price-item">
100
+ <div className="model-card-price-label">Output /1k</div>
101
+ <div className="model-card-price-value">
102
+ {isLocalFree ? <span style={{ color: 'var(--accent-green)' }}>FREE</span> : `$${model.output_cost_per_1k.toFixed(5)}`}
103
+ </div>
104
+ </div>
105
+ <div className="model-card-price-item">
106
+ <div className="model-card-price-label">Context</div>
107
+ <div className="model-card-price-value" style={{ color: 'var(--accent-purple)' }}>
108
+ {model.context_window >= 1000000
109
+ ? `${(model.context_window / 1000000).toFixed(0)}M`
110
+ : `${(model.context_window / 1000).toFixed(0)}k`}
111
+ </div>
112
+ </div>
113
+ </div>
114
+
115
+ {/* Capability breakdown bars */}
116
+ <div style={{ display: 'flex', flexDirection: 'column', gap: 6 }}>
117
+ {[
118
+ { label: 'Reasoning', value: model.reasoning_score, color: 'var(--accent-cyan)' },
119
+ { label: 'Coding', value: model.coding_score, color: 'var(--accent-purple)' },
120
+ { label: 'Math', value: model.math_score, color: 'var(--accent-amber)' },
121
+ ].map(({ label, value, color }) => (
122
+ <div key={label} style={{ display: 'flex', alignItems: 'center', gap: 8 }}>
123
+ <span style={{ fontSize: 'var(--text-xs)', color: 'var(--text-muted)', width: 62, flexShrink: 0 }}>{label}</span>
124
+ <div style={{ flex: 1, height: 4, background: 'var(--bg-border)', borderRadius: 2, overflow: 'hidden' }}>
125
+ <div style={{ width: `${value * 100}%`, height: '100%', background: color, borderRadius: 2 }} />
126
+ </div>
127
+ <span style={{ fontSize: 'var(--text-xs)', color, fontFamily: 'JetBrains Mono', width: 28, textAlign: 'right' }}>
128
+ {(value * 100).toFixed(0)}
129
+ </span>
130
+ </div>
131
+ ))}
132
+ </div>
133
+
134
+ {model.notes && (
135
+ <div style={{ fontSize: 'var(--text-xs)', color: 'var(--text-secondary)', borderTop: '1px solid var(--bg-border)', paddingTop: 'var(--sp-3)', lineHeight: 1.5 }}>
136
+ {model.notes}
137
+ </div>
138
+ )}
139
+ </motion.div>
140
+ );
141
+ }
142
+
143
+ type SortKey = 'capability_score' | 'input_cost_per_1k' | 'output_cost_per_1k' | 'max_complexity';
144
+
145
+ export default function ModelRegistry() {
146
+ const [models, setModels] = useState<ModelSpec[]>([]);
147
+ const [loading, setLoading] = useState(true);
148
+ const [error, setError] = useState('');
149
+ const [sortKey, setSortKey] = useState<SortKey>('capability_score');
150
+ const [sortAsc, setSortAsc] = useState(false);
151
+ const [view, setView] = useState<'grid' | 'table'>('grid');
152
+ const [filter, setFilter] = useState('');
153
+
154
+ useEffect(() => {
155
+ const load = async () => {
156
+ try {
157
+ const data = await api.models();
158
+ setModels(data.models || []);
159
+ } catch (e: any) {
160
+ setError(e.message || 'Failed to load models');
161
+ } finally {
162
+ setLoading(false);
163
+ }
164
+ };
165
+ load();
166
+ }, []);
167
+
168
+ const filtered = models.filter(
169
+ (m) =>
170
+ m.model_name.toLowerCase().includes(filter.toLowerCase()) ||
171
+ m.provider.toLowerCase().includes(filter.toLowerCase())
172
+ );
173
+
174
+ const sorted = [...filtered].sort((a, b) => {
175
+ const d = a[sortKey] - b[sortKey];
176
+ return sortAsc ? d : -d;
177
+ });
178
+
179
+ const toggleSort = (k: SortKey) => {
180
+ if (sortKey === k) setSortAsc((v) => !v);
181
+ else { setSortKey(k); setSortAsc(false); }
182
+ };
183
+
184
+ if (loading) {
185
+ return (
186
+ <div style={{ display: 'flex', alignItems: 'center', justifyContent: 'center', height: '100%' }}>
187
+ <span className="spinner" style={{ width: 32, height: 32, borderWidth: 3 }} />
188
+ </div>
189
+ );
190
+ }
191
+
192
+ return (
193
+ <div style={{ display: 'flex', flexDirection: 'column', flex: 1, minHeight: 0, overflow: 'hidden' }}>
194
+ <div className="topbar">
195
+ <div className="topbar-breadcrumb">
196
+ <Database size={14} style={{ color: 'var(--accent-cyan)' }} />
197
+ <strong>Model Registry</strong>
198
+ <span style={{ color: 'var(--text-muted)' }}>/ {models.length} models registered</span>
199
+ </div>
200
+ <div className="topbar-actions">
201
+ <input
202
+ type="text"
203
+ value={filter}
204
+ onChange={(e) => setFilter(e.target.value)}
205
+ placeholder="Filter models..."
206
+ style={{ width: 180, padding: '6px 12px', fontSize: 'var(--text-xs)' }}
207
+ />
208
+ <button
209
+ className="btn btn-ghost btn-sm"
210
+ style={view === 'grid' ? { borderColor: 'var(--accent-cyan)', color: 'var(--accent-cyan)' } : {}}
211
+ onClick={() => setView('grid')}
212
+ >
213
+ Grid
214
+ </button>
215
+ <button
216
+ className="btn btn-ghost btn-sm"
217
+ style={view === 'table' ? { borderColor: 'var(--accent-cyan)', color: 'var(--accent-cyan)' } : {}}
218
+ onClick={() => setView('table')}
219
+ >
220
+ Table
221
+ </button>
222
+ </div>
223
+ </div>
224
+
225
+ <div className="page-content">
226
+ {error && <div className="auth-error mb-4">⚠ {error}</div>}
227
+
228
+ {sorted.length === 0 && !error ? (
229
+ <div className="empty-state">
230
+ <div className="empty-state-icon"><Database size={48} /></div>
231
+ <div className="empty-state-title">No Models Found</div>
232
+ <div className="empty-state-desc">
233
+ {filter ? `No models match "${filter}"` : 'The model registry is empty.'}
234
+ </div>
235
+ </div>
236
+ ) : view === 'grid' ? (
237
+ <div className="grid-2">
238
+ {sorted.map((m, i) => <ModelCard key={m.model_name} model={m} index={i} />)}
239
+ </div>
240
+ ) : (
241
+ <div className="card">
242
+ <div style={{ overflowX: 'auto' }}>
243
+ <table className="data-table">
244
+ <thead>
245
+ <tr>
246
+ <th>Model</th>
247
+ <th>Provider</th>
248
+ <th
249
+ style={{ cursor: 'pointer', userSelect: 'none' }}
250
+ onClick={() => toggleSort('capability_score')}
251
+ >
252
+ <div style={{ display: 'flex', alignItems: 'center', gap: 4 }}>
253
+ Capability <ArrowUpDown size={12} />
254
+ </div>
255
+ </th>
256
+ <th
257
+ style={{ cursor: 'pointer', userSelect: 'none' }}
258
+ onClick={() => toggleSort('input_cost_per_1k')}
259
+ >
260
+ <div style={{ display: 'flex', alignItems: 'center', gap: 4 }}>
261
+ Input /1k <ArrowUpDown size={12} />
262
+ </div>
263
+ </th>
264
+ <th
265
+ style={{ cursor: 'pointer', userSelect: 'none' }}
266
+ onClick={() => toggleSort('output_cost_per_1k')}
267
+ >
268
+ <div style={{ display: 'flex', alignItems: 'center', gap: 4 }}>
269
+ Output /1k <ArrowUpDown size={12} />
270
+ </div>
271
+ </th>
272
+ <th
273
+ style={{ cursor: 'pointer', userSelect: 'none' }}
274
+ onClick={() => toggleSort('max_complexity')}
275
+ >
276
+ <div style={{ display: 'flex', alignItems: 'center', gap: 4 }}>
277
+ Max Complexity <ArrowUpDown size={12} />
278
+ </div>
279
+ </th>
280
+ <th>Context</th>
281
+ <th>Notes</th>
282
+ </tr>
283
+ </thead>
284
+ <tbody>
285
+ {sorted.map((m) => {
286
+ const isBest = m.capability_score > 0.7 && m.input_cost_per_1k < 0.002;
287
+ return (
288
+ <tr
289
+ key={m.model_name}
290
+ style={isBest ? { borderLeft: '3px solid var(--accent-cyan)' } : {}}
291
+ >
292
+ <td>
293
+ <div style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-sm)', color: 'var(--text-primary)', display: 'flex', alignItems: 'center', gap: 8 }}>
294
+ {isBest && <Star size={11} fill="var(--accent-cyan)" color="var(--accent-cyan)" />}
295
+ {m.model_name}
296
+ </div>
297
+ </td>
298
+ <td>
299
+ <span className={`badge ${providerBadge(m.provider)}`}>{m.provider}</span>
300
+ </td>
301
+ <td>
302
+ <div style={{ display: 'flex', alignItems: 'center', gap: 8 }}>
303
+ <div style={{ width: 60, height: 4, background: 'var(--bg-border)', borderRadius: 2, overflow: 'hidden' }}>
304
+ <div style={{ width: `${m.capability_score * 100}%`, height: '100%', background: 'var(--accent-cyan)', borderRadius: 2 }} />
305
+ </div>
306
+ <span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)', color: 'var(--accent-cyan)' }}>
307
+ {(m.capability_score * 100).toFixed(0)}
308
+ </span>
309
+ </div>
310
+ </td>
311
+ <td>
312
+ <span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)', color: m.input_cost_per_1k === 0 ? 'var(--accent-green)' : 'var(--text-primary)' }}>
313
+ {m.input_cost_per_1k === 0 ? 'FREE' : `$${m.input_cost_per_1k.toFixed(5)}`}
314
+ </span>
315
+ </td>
316
+ <td>
317
+ <span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)', color: m.output_cost_per_1k === 0 ? 'var(--accent-green)' : 'var(--text-primary)' }}>
318
+ {m.output_cost_per_1k === 0 ? 'FREE' : `$${m.output_cost_per_1k.toFixed(5)}`}
319
+ </span>
320
+ </td>
321
+ <td>
322
+ <div style={{ display: 'flex', alignItems: 'center', gap: 6 }}>
323
+ <div style={{ width: 50, height: 4, background: 'var(--bg-border)', borderRadius: 2, overflow: 'hidden' }}>
324
+ <div style={{ width: `${m.max_complexity * 100}%`, height: '100%', background: 'var(--accent-purple)', borderRadius: 2 }} />
325
+ </div>
326
+ <span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)', color: 'var(--accent-purple)' }}>
327
+ {(m.max_complexity * 100).toFixed(0)}
328
+ </span>
329
+ </div>
330
+ </td>
331
+ <td>
332
+ <span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-xs)', color: 'var(--accent-purple)' }}>
333
+ {m.context_window >= 1000000
334
+ ? `${(m.context_window / 1000000).toFixed(0)}M`
335
+ : `${(m.context_window / 1000).toFixed(0)}k`}
336
+ </span>
337
+ </td>
338
+ <td style={{ maxWidth: 200 }}>
339
+ <span style={{ fontSize: 'var(--text-xs)', color: 'var(--text-secondary)' }} title={m.notes}>
340
+ {m.notes?.length > 50 ? m.notes.slice(0, 50) + '…' : m.notes}
341
+ </span>
342
+ </td>
343
+ </tr>
344
+ );
345
+ })}
346
+ </tbody>
347
+ </table>
348
+ </div>
349
+ </div>
350
+ )}
351
+ </div>
352
+ </div>
353
+ );
354
+ }
frontend/src/pages/Playground.tsx ADDED
@@ -0,0 +1,606 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState, useCallback } from 'react';
2
+ import { motion, AnimatePresence } from 'framer-motion';
3
+ import { Play, Zap, Eye, EyeOff, Copy, Check } from 'lucide-react';
4
+ import { useAppStore } from '../store';
5
+ import { api } from '../api';
6
+ import type { GenerateResponse, ExplainResponse, PipelineStage, PipelineStageStatus, ComplexityTier } from '../types';
7
+ import ReactMarkdown from 'react-markdown';
8
+
9
+ // ─── Tier Badge ────────────────────────────────────────────────────────────────
10
+
11
+ const TIER_COLORS: Record<ComplexityTier, string> = {
12
+ trivial: '#00ff94',
13
+ easy: '#00e5ff',
14
+ medium: '#ffc700',
15
+ hard: '#ff6b35',
16
+ expert: '#e040fb',
17
+ };
18
+
19
+ function TierBadge({ tier }: { tier: ComplexityTier }) {
20
+ const color = TIER_COLORS[tier] || 'var(--text-muted)';
21
+ return (
22
+ <span style={{
23
+ display: 'inline-block',
24
+ padding: '2px 10px',
25
+ borderRadius: 12,
26
+ border: `1px solid ${color}`,
27
+ color,
28
+ fontSize: 'var(--text-xs)',
29
+ fontFamily: 'JetBrains Mono, monospace',
30
+ fontWeight: 700,
31
+ letterSpacing: 1,
32
+ textTransform: 'uppercase',
33
+ background: `${color}15`,
34
+ }}>
35
+ {tier}
36
+ </span>
37
+ );
38
+ }
39
+
40
+ // ─── Pipeline Visualizer ─────────────────────────────────────────────────────
41
+
42
+ const PIPELINE_STAGES: { id: string; label: string; icon: string; desc: string }[] = [
43
+ { id: 'analyze', label: 'Query Analyzer', icon: 'πŸ”', desc: 'Detecting domain, complexity signals, token count' },
44
+ { id: 'estimate', label: 'Complexity Estimator', icon: '🧠', desc: 'ML model scoring query complexity C(q) ∈ [0,1]' },
45
+ { id: 'optimize', label: 'Optimization Engine', icon: '⚑', desc: 'Selecting optimal model via budget constraints' },
46
+ { id: 'compress', label: 'Prompt Optimizer', icon: 'πŸ“¦', desc: 'Compressing tokens and injecting system prompt' },
47
+ { id: 'route', label: 'Model Router', icon: 'πŸš€', desc: 'Routing request to provider API' },
48
+ ];
49
+
50
+ function PipelineVisualizer({
51
+ stages,
52
+ explainData,
53
+ }: {
54
+ stages: PipelineStage[];
55
+ explainData?: ExplainResponse | null;
56
+ }) {
57
+ return (
58
+ <div className="pipeline-wrapper">
59
+ {PIPELINE_STAGES.map((def, i) => {
60
+ const stage = stages.find((s) => s.id === def.id);
61
+ const status: PipelineStageStatus = stage?.status || 'idle';
62
+
63
+ // Extract real detail from explain data
64
+ let detail = def.desc;
65
+ if (explainData) {
66
+ if (def.id === 'analyze') {
67
+ detail = `Domain: ${explainData.features.primary_domain} | Tokens: ${explainData.features.token_count} | Output: ${explainData.features.estimated_output_length}`;
68
+ } else if (def.id === 'estimate') {
69
+ detail = `Score: ${explainData.complexity.score.toFixed(3)} | Tier: ${explainData.complexity.tier} | Reasoning req: ${explainData.complexity.required_reasoning.toFixed(2)}`;
70
+ } else if (def.id === 'optimize') {
71
+ const opt = explainData.optimization;
72
+ detail = `Selected: ${opt.selected_model} | Budget: ${opt.budget_mode} | Compression: ${opt.compression_enabled ? 'yes' : 'no'}`;
73
+ } else if (def.id === 'compress') {
74
+ const p = explainData.optimized_prompt;
75
+ const saved = p.tokens_saved || 0;
76
+ detail = `Tokens before: ${p.tokens_before} β†’ after: ${p.tokens_after} | Saved: ${saved} tokens`;
77
+ } else if (def.id === 'route') {
78
+ const opt = explainData.optimization;
79
+ detail = `Provider: ${opt.provider} | Max tokens: ${opt.estimated_output_tokens} | Style: ${opt.system_prompt_style}`;
80
+ }
81
+ }
82
+
83
+ return (
84
+ <div key={def.id} className={`pipeline-stage ${status}`}>
85
+ <div className="pipeline-stage-icon">
86
+ {status === 'active' ? (
87
+ <span className="spinner" style={{ width: 16, height: 16, borderWidth: 2 }} />
88
+ ) : status === 'complete' ? (
89
+ <span style={{ color: 'var(--accent-green)', fontSize: 18 }}>βœ“</span>
90
+ ) : status === 'skipped' ? (
91
+ <span style={{ color: 'var(--text-muted)', fontSize: 14 }}>⊘</span>
92
+ ) : status === 'error' ? (
93
+ <span style={{ color: 'var(--accent-red)', fontSize: 18 }}>βœ•</span>
94
+ ) : (
95
+ <span style={{ fontSize: 14, opacity: 0.5 }}>{def.icon}</span>
96
+ )}
97
+ </div>
98
+ <div className="pipeline-stage-body">
99
+ <div className="pipeline-stage-label">{def.label}</div>
100
+ <div className="pipeline-stage-detail">{detail}</div>
101
+ </div>
102
+ {i < PIPELINE_STAGES.length - 1 && (
103
+ <div
104
+ className="pipeline-connector"
105
+ style={{
106
+ background: status === 'complete'
107
+ ? 'linear-gradient(180deg, var(--accent-cyan) 0%, var(--bg-border) 100%)'
108
+ : 'var(--bg-border)',
109
+ }}
110
+ />
111
+ )}
112
+ </div>
113
+ );
114
+ })}
115
+ </div>
116
+ );
117
+ }
118
+
119
+ // ─── Rationale Card ──────────────────────────────────────────────────────────
120
+
121
+ function RationaleCard({ explain }: { explain: ExplainResponse }) {
122
+ const { complexity, optimization, optimized_prompt, features } = explain;
123
+
124
+ return (
125
+ <div className="rationale-card">
126
+ <div className="rationale-card-title">
127
+ <Zap size={14} />
128
+ LLMOpt Decision Rationale
129
+ </div>
130
+
131
+ {/* Complexity breakdown */}
132
+ <div className="rationale-section">
133
+ <div className="rationale-label">Complexity Analysis</div>
134
+ <div style={{ display: 'flex', alignItems: 'center', gap: 12, marginBottom: 12 }}>
135
+ <TierBadge tier={complexity.tier} />
136
+ <span style={{ fontFamily: 'JetBrains Mono', fontSize: 'var(--text-sm)', color: 'var(--text-primary)' }}>
137
+ Score: <strong style={{ color: 'var(--accent-cyan)' }}>{complexity.score.toFixed(3)}</strong>
138
+ </span>
139
+ </div>
140
+ <div style={{ display: 'flex', flexDirection: 'column', gap: 8 }}>
141
+ {[
142
+ { label: 'Reasoning req.', value: complexity.required_reasoning, color: 'var(--accent-cyan)' },
143
+ { label: 'Coding req.', value: complexity.required_coding, color: 'var(--accent-purple)' },
144
+ { label: 'Math req.', value: complexity.required_math, color: 'var(--accent-amber)' },
145
+ ].map(({ label, value, color }) => (
146
+ <div key={label} style={{ display: 'flex', alignItems: 'center', gap: 8 }}>
147
+ <span style={{ fontSize: 'var(--text-xs)', color: 'var(--text-muted)', width: 100, flexShrink: 0 }}>{label}</span>
148
+ <div style={{ flex: 1, height: 4, background: 'var(--bg-border)', borderRadius: 2 }}>
149
+ <div style={{ width: `${value * 100}%`, height: '100%', background: color, borderRadius: 2, transition: 'width 0.6s ease' }} />
150
+ </div>
151
+ <span style={{ fontSize: 'var(--text-xs)', color, fontFamily: 'JetBrains Mono', width: 32, textAlign: 'right' }}>
152
+ {(value * 100).toFixed(0)}
153
+ </span>
154
+ </div>
155
+ ))}
156
+ </div>
157
+ </div>
158
+
159
+ {/* Feature flags */}
160
+ <div className="rationale-section">
161
+ <div className="rationale-label">Detected Features</div>
162
+ <div style={{ display: 'flex', flexWrap: 'wrap', gap: 6 }}>
163
+ {[
164
+ { key: 'domain_code', label: 'Code' },
165
+ { key: 'domain_math', label: 'Math' },
166
+ { key: 'domain_science', label: 'Science' },
167
+ { key: 'domain_reasoning', label: 'Reasoning' },
168
+ { key: 'domain_creative', label: 'Creative' },
169
+ { key: 'multi_step', label: 'Multi-step' },
170
+ { key: 'requires_comparison', label: 'Comparison' },
171
+ { key: 'requires_analysis', label: 'Analysis' },
172
+ { key: 'has_math_notation', label: 'Math notation' },
173
+ { key: 'has_code_block', label: 'Code block' },
174
+ ].filter((f) => features[f.key as keyof typeof features] === true).map((f) => (
175
+ <span key={f.key} className="badge badge-cyan" style={{ fontSize: '10px' }}>
176
+ {f.label}
177
+ </span>
178
+ ))}
179
+ {Object.entries(features).filter(([k]) =>
180
+ k.startsWith('domain_') || k.startsWith('requires_') || k.startsWith('has_')
181
+ ).every(([, v]) => v !== true) && (
182
+ <span style={{ fontSize: 'var(--text-xs)', color: 'var(--text-muted)' }}>No special features detected</span>
183
+ )}
184
+ </div>
185
+ </div>
186
+
187
+ {/* Routing decision */}
188
+ <div className="rationale-section">
189
+ <div className="rationale-label">Routing Decision</div>
190
+ <div style={{ display: 'grid', gridTemplateColumns: '1fr 1fr', gap: 8 }}>
191
+ {[
192
+ { label: 'Selected Model', value: optimization.selected_model, mono: true },
193
+ { label: 'Provider', value: optimization.provider, mono: true },
194
+ { label: 'Budget Mode', value: optimization.budget_mode, mono: false },
195
+ { label: 'System Prompt', value: optimization.system_prompt_style, mono: false },
196
+ { label: 'Compression', value: optimization.compression_enabled ? 'enabled' : 'disabled', mono: false },
197
+ { label: 'Fallback', value: optimization.fallback_model || 'N/A', mono: true },
198
+ ].map(({ label, value, mono }) => (
199
+ <div key={label}>
200
+ <div style={{ fontSize: '10px', color: 'var(--text-muted)', marginBottom: 2 }}>{label}</div>
201
+ <div style={{
202
+ fontFamily: mono ? 'JetBrains Mono, monospace' : 'inherit',
203
+ fontSize: 'var(--text-xs)',
204
+ color: mono ? 'var(--accent-cyan)' : 'var(--text-primary)',
205
+ }}>
206
+ {value}
207
+ </div>
208
+ </div>
209
+ ))}
210
+ </div>
211
+ </div>
212
+
213
+ {/* Compression */}
214
+ {optimized_prompt.tokens_saved > 0 && (
215
+ <div className="rationale-section">
216
+ <div className="rationale-label">Prompt Compression</div>
217
+ <div style={{ display: 'flex', alignItems: 'center', gap: 8 }}>
218
+ <span style={{ fontFamily: 'JetBrains Mono', fontSize: 'var(--text-xs)', color: 'var(--text-muted)' }}>
219
+ {optimized_prompt.tokens_before}
220
+ </span>
221
+ <span style={{ color: 'var(--accent-green)', fontSize: 'var(--text-xs)' }}>β†’</span>
222
+ <span style={{ fontFamily: 'JetBrains Mono', fontSize: 'var(--text-xs)', color: 'var(--accent-green)' }}>
223
+ {optimized_prompt.tokens_after} tokens
224
+ </span>
225
+ <span style={{ marginLeft: 'auto', color: 'var(--accent-amber)', fontFamily: 'JetBrains Mono', fontSize: 'var(--text-xs)' }}>
226
+ -{optimized_prompt.tokens_saved} saved ({(optimized_prompt.compression_ratio * 100).toFixed(1)}%)
227
+ </span>
228
+ </div>
229
+ </div>
230
+ )}
231
+
232
+ {/* Rationale bullets */}
233
+ {optimization.rationale?.length > 0 && (
234
+ <div className="rationale-section">
235
+ <div className="rationale-label">Optimizer Rationale</div>
236
+ <ul style={{ margin: 0, padding: '0 0 0 16px', display: 'flex', flexDirection: 'column', gap: 4 }}>
237
+ {optimization.rationale.map((r, i) => (
238
+ <li key={i} style={{ fontSize: 'var(--text-xs)', color: 'var(--text-secondary)' }}>{r}</li>
239
+ ))}
240
+ </ul>
241
+ </div>
242
+ )}
243
+ </div>
244
+ );
245
+ }
246
+
247
+ // ─── Metrics Bar ─────────────────────────────────────────────────────────────
248
+
249
+ function MetricsBar({ result }: { result: GenerateResponse }) {
250
+ return (
251
+ <div className="metrics-bar">
252
+ <div className="metric-item">
253
+ <div className="metric-label">Model</div>
254
+ <div className="metric-value" style={{ fontFamily: 'JetBrains Mono', fontSize: 'var(--text-xs)' }}>{result.model_used}</div>
255
+ </div>
256
+ <div className="metric-divider" />
257
+ <div className="metric-item">
258
+ <div className="metric-label">Tokens</div>
259
+ <div className="metric-value">{result.total_tokens.toLocaleString()}</div>
260
+ </div>
261
+ <div className="metric-divider" />
262
+ <div className="metric-item">
263
+ <div className="metric-label">Cost</div>
264
+ <div className="metric-value" style={{ color: 'var(--accent-green)' }}>${result.estimated_cost.toFixed(6)}</div>
265
+ </div>
266
+ <div className="metric-divider" />
267
+ <div className="metric-item">
268
+ <div className="metric-label">Saved</div>
269
+ <div className="metric-value" style={{ color: 'var(--accent-amber)' }}>${result.cost_saved.toFixed(6)}</div>
270
+ </div>
271
+ <div className="metric-divider" />
272
+ <div className="metric-item">
273
+ <div className="metric-label">Latency</div>
274
+ <div className="metric-value">{result.latency_ms.toFixed(0)}ms</div>
275
+ </div>
276
+ <div className="metric-divider" />
277
+ <div className="metric-item">
278
+ <div className="metric-label">Complexity</div>
279
+ <div className="metric-value">
280
+ <TierBadge tier={result.complexity_tier} />
281
+ </div>
282
+ </div>
283
+ {result.tokens_saved > 0 && (
284
+ <>
285
+ <div className="metric-divider" />
286
+ <div className="metric-item">
287
+ <div className="metric-label">Tokens Compressed</div>
288
+ <div className="metric-value" style={{ color: 'var(--accent-purple)' }}>-{result.tokens_saved}</div>
289
+ </div>
290
+ </>
291
+ )}
292
+ </div>
293
+ );
294
+ }
295
+
296
+ // ─── Main Playground Page ────────────────────────────────────────────────────
297
+
298
+ export default function Playground() {
299
+ const {
300
+ budgetMode,
301
+ alphaWeight,
302
+ betaWeight,
303
+ gammaWeight,
304
+ compressionEnabled,
305
+ evaluationEnabled,
306
+ connectedProviders,
307
+ } = useAppStore();
308
+ const [query, setQuery] = useState('');
309
+ const [selectedProviders, setSelectedProviders] = useState<string[]>([]);
310
+ const [loading, setLoading] = useState(false);
311
+ const [error, setError] = useState('');
312
+ const [result, setResult] = useState<GenerateResponse | null>(null);
313
+ const [explain, setExplain] = useState<ExplainResponse | null>(null);
314
+ const [showExplain, setShowExplain] = useState(false);
315
+ const [copied, setCopied] = useState(false);
316
+ const [stages, setStages] = useState<PipelineStage[]>([]);
317
+
318
+ const setStageStatus = useCallback((id: string, status: PipelineStageStatus, detail?: string) => {
319
+ setStages((prev) => {
320
+ const existing = prev.find((s) => s.id === id);
321
+ if (existing) {
322
+ return prev.map((s) => s.id === id ? { ...s, status, detail: detail ?? s.detail } : s);
323
+ }
324
+ return [...prev, { id, label: id, icon: '', status, detail }];
325
+ });
326
+ }, []);
327
+
328
+ const resetStages = useCallback(() => {
329
+ setStages(PIPELINE_STAGES.map((s) => ({ id: s.id, label: s.label, icon: s.icon, status: 'idle' as PipelineStageStatus })));
330
+ }, []);
331
+
332
+ // Animate stages based on real pipeline latency
333
+ const runPipelineAnimation = useCallback(async () => {
334
+ const timings = [300, 600, 400, 300, 0]; // ms per stage (route waits for real API)
335
+ const ids = ['analyze', 'estimate', 'optimize', 'compress', 'route'];
336
+
337
+ for (let i = 0; i < ids.length - 1; i++) {
338
+ setStageStatus(ids[i], 'active');
339
+ await new Promise((r) => setTimeout(r, timings[i]));
340
+ setStageStatus(ids[i], 'complete');
341
+ }
342
+ setStageStatus('route', 'active');
343
+ }, [setStageStatus]);
344
+
345
+ const handleSubmit = async () => {
346
+ if (!query.trim() || loading) return;
347
+ setLoading(true);
348
+ setError('');
349
+ setResult(null);
350
+ setExplain(null);
351
+ resetStages();
352
+
353
+ const providerConstraints = selectedProviders.length > 0 ? selectedProviders : undefined;
354
+
355
+ try {
356
+ // 1. Run explain in parallel with pipeline animation to get real routing data
357
+ const explainPromise = api.explain(query, budgetMode, {
358
+ alpha: alphaWeight,
359
+ beta: betaWeight,
360
+ gamma: gammaWeight,
361
+ compression_enabled: compressionEnabled,
362
+ only_providers: providerConstraints,
363
+ });
364
+
365
+ // 2. Start animation
366
+ await runPipelineAnimation();
367
+
368
+ // 3. Get explain data (ready by now or wait a bit more)
369
+ const explainData = await explainPromise;
370
+ setExplain(explainData);
371
+ setShowExplain(false);
372
+
373
+ // 4. Complete route stage after getting result
374
+ setStageStatus('route', 'active');
375
+
376
+ // 5. Generate with real API
377
+ const genResult = await api.generate({
378
+ query,
379
+ budget_mode: budgetMode,
380
+ alpha: alphaWeight,
381
+ beta: betaWeight,
382
+ gamma: gammaWeight,
383
+ compression_enabled: compressionEnabled,
384
+ evaluate: evaluationEnabled,
385
+ only_providers: providerConstraints,
386
+ });
387
+
388
+ setStageStatus('route', 'complete');
389
+ setResult(genResult);
390
+ } catch (e: any) {
391
+ const status = (e as any).status;
392
+ if (status === 401) {
393
+ setError('Please add your API keys in Settings to generate responses.');
394
+ } else if (status === 503) {
395
+ setError('Redis is unavailable β€” sessions require Redis. Try adding ?session= to the request or run Redis locally.');
396
+ } else {
397
+ setError(e.message || 'Generation failed');
398
+ }
399
+ setStageStatus('route', 'error');
400
+ } finally {
401
+ setLoading(false);
402
+ }
403
+ };
404
+
405
+ const handleCopy = () => {
406
+ if (result?.response) {
407
+ navigator.clipboard.writeText(result.response);
408
+ setCopied(true);
409
+ setTimeout(() => setCopied(false), 2000);
410
+ }
411
+ };
412
+
413
+ const examplePrompts = [
414
+ 'Explain quicksort with Python code and time complexity analysis',
415
+ 'What is the derivative of xΒ²Β·sin(x)?',
416
+ 'Write a haiku about machine learning',
417
+ 'Design a distributed rate limiter for 1M RPS',
418
+ 'Summarize the French Revolution in 3 bullet points',
419
+ ];
420
+
421
+ return (
422
+ <div style={{ display: 'flex', flexDirection: 'column', flex: 1, minHeight: 0, overflow: 'hidden' }}>
423
+ <div className="topbar">
424
+ <div className="topbar-breadcrumb">
425
+ <Zap size={14} style={{ color: 'var(--accent-cyan)' }} />
426
+ <strong>Playground</strong>
427
+ <span style={{ color: 'var(--text-muted)' }}>/ Query Optimizer</span>
428
+ </div>
429
+ <div className="topbar-actions">
430
+ {connectedProviders.length > 0 && (
431
+ <div style={{ display: 'flex', alignItems: 'center', gap: '6px', marginRight: '16px', borderRight: '1px solid var(--bg-border)', paddingRight: '16px' }}>
432
+ <span style={{ fontSize: 'var(--text-xs)', color: 'var(--text-muted)' }}>Routing Pool:</span>
433
+ {connectedProviders.map((prov) => {
434
+ const isSelected = selectedProviders.includes(prov);
435
+ const isActive = selectedProviders.length === 0 || isSelected;
436
+ return (
437
+ <button
438
+ key={prov}
439
+ className={`btn btn-xs`}
440
+ style={{
441
+ textTransform: 'capitalize',
442
+ fontSize: '10px',
443
+ padding: '2px 8px',
444
+ borderColor: isActive ? 'var(--accent-cyan)' : 'var(--bg-border)',
445
+ color: isActive ? 'var(--accent-cyan)' : 'var(--text-muted)',
446
+ opacity: isActive ? 1 : 0.4,
447
+ transition: 'all 0.2s ease',
448
+ background: 'transparent',
449
+ }}
450
+ onClick={() => {
451
+ setSelectedProviders(prev =>
452
+ prev.includes(prov)
453
+ ? prev.filter(p => p !== prov)
454
+ : [...prev, prov]
455
+ );
456
+ }}
457
+ >
458
+ {prov}
459
+ </button>
460
+ );
461
+ })}
462
+ </div>
463
+ )}
464
+ <span style={{ fontSize: 'var(--text-xs)', color: 'var(--text-muted)' }}>Budget:</span>
465
+ {(['cheap', 'balanced', 'quality'] as const).map((m) => (
466
+ <button
467
+ key={m}
468
+ className={`btn btn-ghost btn-sm ${budgetMode === m ? 'active' : ''}`}
469
+ style={budgetMode === m ? { borderColor: 'var(--accent-cyan)', color: 'var(--accent-cyan)' } : {}}
470
+ onClick={() => useAppStore.getState().setBudgetMode(m)}
471
+ >
472
+ {m}
473
+ </button>
474
+ ))}
475
+ </div>
476
+ </div>
477
+
478
+
479
+ <div className="page-content" style={{ display: 'flex', gap: 'var(--sp-4)', alignItems: 'flex-start', overflow: 'auto' }}>
480
+ {/* Left panel: Input + Pipeline + Examples */}
481
+ <div style={{ flex: 1, display: 'flex', flexDirection: 'column', gap: 'var(--sp-4)', minWidth: 0 }}>
482
+ {/* Query input */}
483
+ <div className="card">
484
+ <div className="card-header">Query</div>
485
+ <textarea
486
+ id="playground-query-input"
487
+ value={query}
488
+ onChange={(e) => setQuery(e.target.value)}
489
+ onKeyDown={(e) => {
490
+ if (e.key === 'Enter' && (e.ctrlKey || e.metaKey)) handleSubmit();
491
+ }}
492
+ placeholder="Ask anything β€” LLMOpt will analyze complexity, route to the optimal model, and compress the prompt to save cost..."
493
+ style={{ minHeight: 120, resize: 'vertical', fontFamily: 'inherit' }}
494
+ />
495
+ <div style={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between', marginTop: 'var(--sp-3)' }}>
496
+ <span style={{ fontSize: 'var(--text-xs)', color: 'var(--text-muted)' }}>
497
+ Ctrl+Enter to run
498
+ </span>
499
+ <button
500
+ id="playground-submit-btn"
501
+ className="btn btn-primary"
502
+ onClick={handleSubmit}
503
+ disabled={loading || !query.trim()}
504
+ >
505
+ {loading ? (
506
+ <><span className="spinner" /> Optimizing…</>
507
+ ) : (
508
+ <><Play size={14} /> Run</>
509
+ )}
510
+ </button>
511
+ </div>
512
+ </div>
513
+
514
+ {/* Example prompts */}
515
+ {!result && !loading && (
516
+ <div className="card">
517
+ <div className="card-header">Example Prompts</div>
518
+ <div style={{ display: 'flex', flexDirection: 'column', gap: 'var(--sp-2)' }}>
519
+ {examplePrompts.map((p) => (
520
+ <button
521
+ key={p}
522
+ className="example-prompt-btn"
523
+ onClick={() => setQuery(p)}
524
+ >
525
+ <span className="example-prompt-icon">β†’</span>
526
+ {p}
527
+ </button>
528
+ ))}
529
+ </div>
530
+ </div>
531
+ )}
532
+
533
+ {/* Error */}
534
+ {error && (
535
+ <motion.div
536
+ className="auth-error"
537
+ initial={{ opacity: 0, y: -8 }}
538
+ animate={{ opacity: 1, y: 0 }}
539
+ >
540
+ ⚠ {error}
541
+ </motion.div>
542
+ )}
543
+
544
+ {/* Result */}
545
+ <AnimatePresence>
546
+ {result && (
547
+ <motion.div
548
+ className="card"
549
+ initial={{ opacity: 0, y: 12 }}
550
+ animate={{ opacity: 1, y: 0 }}
551
+ exit={{ opacity: 0, y: -8 }}
552
+ >
553
+ <div className="card-header" style={{ display: 'flex', justifyContent: 'space-between', alignItems: 'center' }}>
554
+ <span>Response</span>
555
+ <div style={{ display: 'flex', gap: 'var(--sp-2)' }}>
556
+ <button className="btn btn-ghost btn-sm" onClick={() => setShowExplain((v) => !v)}>
557
+ {showExplain ? <EyeOff size={12} /> : <Eye size={12} />}
558
+ {showExplain ? 'Hide Explain' : 'Explain'}
559
+ </button>
560
+ <button className="btn btn-ghost btn-sm" onClick={handleCopy}>
561
+ {copied ? <Check size={12} /> : <Copy size={12} />}
562
+ {copied ? 'Copied!' : 'Copy'}
563
+ </button>
564
+ </div>
565
+ </div>
566
+ <MetricsBar result={result} />
567
+ <div className="response-content">
568
+ <ReactMarkdown>{result.response}</ReactMarkdown>
569
+ </div>
570
+
571
+ {/* Inline explain panel */}
572
+ <AnimatePresence>
573
+ {showExplain && explain && (
574
+ <motion.div
575
+ initial={{ opacity: 0, height: 0 }}
576
+ animate={{ opacity: 1, height: 'auto' }}
577
+ exit={{ opacity: 0, height: 0 }}
578
+ style={{ overflow: 'hidden' }}
579
+ >
580
+ <div style={{ borderTop: '1px solid var(--bg-border)', paddingTop: 'var(--sp-4)', marginTop: 'var(--sp-4)' }}>
581
+ <RationaleCard explain={explain} />
582
+ </div>
583
+ </motion.div>
584
+ )}
585
+ </AnimatePresence>
586
+ </motion.div>
587
+ )}
588
+ </AnimatePresence>
589
+ </div>
590
+
591
+ {/* Right panel: Pipeline */}
592
+ <div style={{ width: 280, flexShrink: 0 }}>
593
+ <div className="card" style={{ position: 'sticky', top: 0 }}>
594
+ <div className="card-header">Optimization Pipeline</div>
595
+ <PipelineVisualizer stages={stages} explainData={explain} />
596
+ {!loading && stages.length === 0 && (
597
+ <div style={{ padding: 'var(--sp-4)', textAlign: 'center', color: 'var(--text-muted)', fontSize: 'var(--text-xs)' }}>
598
+ Run a query to see the pipeline in action
599
+ </div>
600
+ )}
601
+ </div>
602
+ </div>
603
+ </div>
604
+ </div>
605
+ );
606
+ }
frontend/src/pages/Settings.tsx ADDED
@@ -0,0 +1,349 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState, useEffect } from 'react';
2
+ import { Settings, Eye, EyeOff, CheckCircle } from 'lucide-react';
3
+ import { api } from '../api';
4
+ import { useAppStore } from '../store';
5
+
6
+ interface KeyEntry {
7
+ provider: string;
8
+ label: string;
9
+ placeholder: string;
10
+ connected: boolean;
11
+ }
12
+
13
+ const PROVIDERS: Omit<KeyEntry, 'connected'>[] = [
14
+ { provider: 'openai', label: 'OpenAI', placeholder: 'sk-...' },
15
+ { provider: 'anthropic', label: 'Anthropic', placeholder: 'sk-ant-...' },
16
+ { provider: 'google', label: 'Google AI', placeholder: 'AI...' },
17
+ { provider: 'deepseek', label: 'DeepSeek', placeholder: 'sk-...' },
18
+ { provider: 'mistral', label: 'Mistral', placeholder: '...' },
19
+ { provider: 'cohere', label: 'Cohere', placeholder: 'co-...' },
20
+ { provider: 'ollama', label: 'Ollama URL', placeholder: 'http://localhost:11434' },
21
+ ];
22
+
23
+ function ApiKeyRow({
24
+ entry,
25
+ onSave,
26
+ onDelete,
27
+ }: {
28
+ entry: KeyEntry;
29
+ onSave: (provider: string, key: string) => Promise<void>;
30
+ onDelete: (provider: string) => Promise<void>;
31
+ }) {
32
+ const [value, setValue] = useState('');
33
+ const [visible, setVisible] = useState(false);
34
+ const [saving, setSaving] = useState(false);
35
+ const [saved, setSaved] = useState(false);
36
+ const [deleting, setDeleting] = useState(false);
37
+ const [confirmDelete, setConfirmDelete] = useState(false);
38
+
39
+ const handleSave = async () => {
40
+ if (!value.trim()) return;
41
+ setSaving(true);
42
+ try {
43
+ await onSave(entry.provider, value);
44
+ setSaved(true);
45
+ setValue('');
46
+ setTimeout(() => setSaved(false), 3000);
47
+ } finally {
48
+ setSaving(false);
49
+ }
50
+ };
51
+
52
+ const handleDelete = async () => {
53
+ if (!confirmDelete) {
54
+ setConfirmDelete(true);
55
+ setTimeout(() => setConfirmDelete(false), 3000);
56
+ return;
57
+ }
58
+ setDeleting(true);
59
+ try {
60
+ await onDelete(entry.provider);
61
+ setValue('');
62
+ setConfirmDelete(false);
63
+ } finally {
64
+ setDeleting(false);
65
+ }
66
+ };
67
+
68
+ return (
69
+ <div className="settings-row">
70
+ <div className="settings-row-info">
71
+ <div className="settings-row-label">{entry.label}</div>
72
+ <div className="settings-row-desc">
73
+ {entry.connected ? (
74
+ <span style={{ color: 'var(--accent-green)', display: 'flex', alignItems: 'center', gap: 4 }}>
75
+ <CheckCircle size={12} /> Connected
76
+ </span>
77
+ ) : (
78
+ <span style={{ color: 'var(--text-muted)' }}>No key set</span>
79
+ )}
80
+ </div>
81
+ </div>
82
+ <div className="settings-key-input-wrapper">
83
+ <div style={{ position: 'relative', flex: 1 }}>
84
+ <input
85
+ type={visible ? 'text' : 'password'}
86
+ value={value}
87
+ onChange={(e) => setValue(e.target.value)}
88
+ placeholder={entry.placeholder}
89
+ onKeyDown={(e) => e.key === 'Enter' && handleSave()}
90
+ style={{ paddingRight: '40px' }}
91
+ />
92
+ <button
93
+ onClick={() => setVisible((v) => !v)}
94
+ style={{
95
+ position: 'absolute', right: 10, top: '50%', transform: 'translateY(-50%)',
96
+ background: 'none', border: 'none', cursor: 'pointer',
97
+ color: 'var(--text-muted)', display: 'flex',
98
+ }}
99
+ >
100
+ {visible ? <EyeOff size={14} /> : <Eye size={14} />}
101
+ </button>
102
+ </div>
103
+ <button
104
+ className="btn btn-ghost btn-sm"
105
+ onClick={handleSave}
106
+ disabled={saving || !value.trim()}
107
+ style={saved ? { borderColor: 'var(--accent-green)', color: 'var(--accent-green)' } : {}}
108
+ >
109
+ {saving ? <span className="spinner" /> : saved ? 'βœ“ Saved' : 'Save'}
110
+ </button>
111
+ {entry.connected && (
112
+ <button
113
+ className="btn btn-ghost btn-sm"
114
+ onClick={handleDelete}
115
+ disabled={deleting}
116
+ style={{
117
+ borderColor: confirmDelete ? 'var(--accent-red)' : 'var(--bg-border)',
118
+ color: 'var(--accent-red)',
119
+ }}
120
+ >
121
+ {deleting ? <span className="spinner" /> : confirmDelete ? 'Sure?' : 'Clear'}
122
+ </button>
123
+ )}
124
+ </div>
125
+ </div>
126
+ );
127
+ }
128
+
129
+ interface SliderRowProps {
130
+ label: string;
131
+ desc: string;
132
+ value: number;
133
+ min?: number;
134
+ max?: number;
135
+ step?: number;
136
+ onChange: (v: number) => void;
137
+ unit?: string;
138
+ }
139
+
140
+ function SliderRow({ label, desc, value, min = 0, max = 1, step = 0.01, onChange, unit = '' }: SliderRowProps) {
141
+ return (
142
+ <div className="settings-row" style={{ flexDirection: 'column', alignItems: 'flex-start', gap: 'var(--sp-3)' }}>
143
+ <div style={{ display: 'flex', justifyContent: 'space-between', width: '100%', alignItems: 'center' }}>
144
+ <div>
145
+ <div className="settings-row-label">{label}</div>
146
+ <div className="settings-row-desc">{desc}</div>
147
+ </div>
148
+ <span style={{ fontFamily: 'JetBrains Mono, monospace', fontSize: 'var(--text-sm)', color: 'var(--accent-cyan)', minWidth: 48, textAlign: 'right' }}>
149
+ {value.toFixed(2)}{unit}
150
+ </span>
151
+ </div>
152
+ <input
153
+ type="range"
154
+ min={min}
155
+ max={max}
156
+ step={step}
157
+ value={value}
158
+ onChange={(e) => onChange(parseFloat(e.target.value))}
159
+ style={{ width: '100%' }}
160
+ />
161
+ </div>
162
+ );
163
+ }
164
+
165
+ function ToggleRow({
166
+ label, desc, value, onChange,
167
+ }: {
168
+ label: string; desc: string; value: boolean; onChange: (v: boolean) => void;
169
+ }) {
170
+ return (
171
+ <div className="settings-row">
172
+ <div className="settings-row-info">
173
+ <div className="settings-row-label">{label}</div>
174
+ <div className="settings-row-desc">{desc}</div>
175
+ </div>
176
+ <label className="toggle-switch">
177
+ <input type="checkbox" checked={value} onChange={(e) => onChange(e.target.checked)} />
178
+ <span className="toggle-slider" />
179
+ </label>
180
+ </div>
181
+ );
182
+ }
183
+
184
+ export default function SettingsPage() {
185
+ const {
186
+ connectedProviders,
187
+ setConnectedProviders,
188
+ alphaWeight,
189
+ setAlphaWeight,
190
+ betaWeight,
191
+ setBetaWeight,
192
+ gammaWeight,
193
+ setGammaWeight,
194
+ compressionEnabled,
195
+ setCompressionEnabled,
196
+ compressionThreshold,
197
+ setCompressionThreshold,
198
+ evaluationEnabled,
199
+ setEvaluationEnabled,
200
+ redisUrl,
201
+ setRedisUrl,
202
+ } = useAppStore();
203
+
204
+ useEffect(() => {
205
+ // Refresh connected providers
206
+ api.getKeys().then((d) => setConnectedProviders(d.connected_providers)).catch(() => {});
207
+ }, []);
208
+
209
+ const handleSaveKey = async (provider: string, key: string) => {
210
+ await api.updateKeys({ [provider]: key });
211
+ const data = await api.getKeys();
212
+ setConnectedProviders(data.connected_providers);
213
+ };
214
+
215
+ const handleDeleteKey = async (provider: string) => {
216
+ await api.deleteKey(provider);
217
+ const data = await api.getKeys();
218
+ setConnectedProviders(data.connected_providers);
219
+ };
220
+
221
+ const providerEntries: KeyEntry[] = PROVIDERS.map((p) => ({
222
+ ...p,
223
+ connected: connectedProviders.includes(p.provider),
224
+ }));
225
+
226
+ const totalWeight = alphaWeight + betaWeight + gammaWeight;
227
+ const formulaDisplay = `score = ${alphaWeight.toFixed(2)}Β·cost + ${betaWeight.toFixed(2)}Β·tokens + ${gammaWeight.toFixed(2)}Β·quality`;
228
+
229
+ return (
230
+ <div style={{ display: 'flex', flexDirection: 'column', flex: 1, minHeight: 0, overflow: 'hidden' }}>
231
+ <div className="topbar">
232
+ <div className="topbar-breadcrumb">
233
+ <Settings size={14} style={{ color: 'var(--accent-cyan)' }} />
234
+ <strong>Settings</strong>
235
+ <span style={{ color: 'var(--text-muted)' }}>/ Configuration</span>
236
+ </div>
237
+ </div>
238
+
239
+ <div className="page-content" style={{ display: 'flex', flexDirection: 'column', gap: 'var(--sp-4)', flex: 1, overflowY: 'auto' }}>
240
+ {/* API Keys */}
241
+ <div className="settings-section">
242
+ <div className="settings-section-header">
243
+ <div className="settings-section-title">API Keys</div>
244
+ <div className="settings-section-desc">Provider credentials are encrypted and stored server-side in your session</div>
245
+ </div>
246
+ {providerEntries.map((entry) => (
247
+ <ApiKeyRow key={entry.provider} entry={entry} onSave={handleSaveKey} onDelete={handleDeleteKey} />
248
+ ))}
249
+ </div>
250
+
251
+
252
+ {/* Budget Weights */}
253
+ <div className="settings-section">
254
+ <div className="settings-section-header">
255
+ <div className="settings-section-title">Budget Optimization Weights</div>
256
+ <div className="settings-section-desc">
257
+ Control how the routing optimizer balances cost, tokens, and quality
258
+ </div>
259
+ </div>
260
+ <div style={{ padding: 'var(--sp-4) var(--sp-5)' }}>
261
+ <div style={{
262
+ background: 'var(--bg-base)',
263
+ border: '1px solid var(--bg-border)',
264
+ borderRadius: 'var(--radius-md)',
265
+ padding: 'var(--sp-3) var(--sp-4)',
266
+ fontFamily: 'Fira Code, monospace',
267
+ fontSize: 'var(--text-sm)',
268
+ color: 'var(--accent-cyan)',
269
+ marginBottom: 'var(--sp-4)',
270
+ }}>
271
+ {formulaDisplay}
272
+ {Math.abs(totalWeight - 1) > 0.01 && (
273
+ <span style={{ color: 'var(--accent-amber)', marginLeft: 12 }}>
274
+ ⚠ sum = {totalWeight.toFixed(2)} (should be 1.0)
275
+ </span>
276
+ )}
277
+ </div>
278
+ </div>
279
+ <SliderRow label="Ξ± β€” Cost Weight" desc="Penalize expensive routes" value={alphaWeight} onChange={setAlphaWeight} />
280
+ <SliderRow label="Ξ² β€” Token Weight" desc="Penalize high token usage" value={betaWeight} onChange={setBetaWeight} />
281
+ <SliderRow label="Ξ³ β€” Quality Weight" desc="Reward high-capability models" value={gammaWeight} onChange={setGammaWeight} />
282
+ </div>
283
+
284
+ {/* Compression */}
285
+ <div className="settings-section">
286
+ <div className="settings-section-header">
287
+ <div className="settings-section-title">Prompt Compression</div>
288
+ <div className="settings-section-desc">Automatically compress prompts to reduce token usage and cost</div>
289
+ </div>
290
+ <ToggleRow
291
+ label="Enable Compression"
292
+ desc="Apply LLM-based prompt compression before routing"
293
+ value={compressionEnabled}
294
+ onChange={setCompressionEnabled}
295
+ />
296
+ {compressionEnabled && (
297
+ <SliderRow
298
+ label="Compression Threshold"
299
+ desc="Minimum compression ratio to apply (lower = more aggressive)"
300
+ value={compressionThreshold}
301
+ min={0.05}
302
+ max={0.5}
303
+ step={0.01}
304
+ onChange={setCompressionThreshold}
305
+ unit=" ratio"
306
+ />
307
+ )}
308
+ </div>
309
+
310
+ {/* Evaluation */}
311
+ <div className="settings-section">
312
+ <div className="settings-section-header">
313
+ <div className="settings-section-title">LLM-as-Judge Evaluation</div>
314
+ <div className="settings-section-desc">Use a secondary LLM to evaluate response quality (adds cost)</div>
315
+ </div>
316
+ <ToggleRow
317
+ label="Enable Evaluation"
318
+ desc="Score each response using an independent judge model"
319
+ value={evaluationEnabled}
320
+ onChange={setEvaluationEnabled}
321
+ />
322
+ </div>
323
+
324
+ {/* Infrastructure */}
325
+ <div className="settings-section">
326
+ <div className="settings-section-header">
327
+ <div className="settings-section-title">Infrastructure</div>
328
+ <div className="settings-section-desc">Connection settings for cache and queue</div>
329
+ </div>
330
+ <div className="settings-row">
331
+ <div className="settings-row-info">
332
+ <div className="settings-row-label">Redis URL</div>
333
+ <div className="settings-row-desc">Used for session storage and response caching</div>
334
+ </div>
335
+ <div className="settings-key-input-wrapper">
336
+ <input
337
+ type="text"
338
+ value={redisUrl}
339
+ onChange={(e) => setRedisUrl(e.target.value)}
340
+ placeholder="redis://localhost:6379"
341
+ />
342
+ </div>
343
+ </div>
344
+ </div>
345
+
346
+ </div>
347
+ </div>
348
+ );
349
+ }
frontend/src/store.ts ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { create } from 'zustand';
2
+ import type { BudgetMode, GenerateResponse, HealthStatus } from './types';
3
+
4
+ const getLocal = <T>(key: string, def: T): T => {
5
+ const val = localStorage.getItem(key);
6
+ if (val === null) return def;
7
+ try {
8
+ return JSON.parse(val) as T;
9
+ } catch {
10
+ return val as unknown as T;
11
+ }
12
+ };
13
+
14
+ interface AuthState {
15
+ isLoggedIn: boolean;
16
+ sessionId: string | null;
17
+ }
18
+
19
+ interface AppState {
20
+ auth: AuthState;
21
+ health: HealthStatus;
22
+ budgetMode: BudgetMode;
23
+ connectedProviders: string[];
24
+ sidebarCollapsed: boolean;
25
+ setBudgetMode: (mode: BudgetMode) => void;
26
+ setAuth: (auth: Partial<AuthState>) => void;
27
+ setHealth: (health: Partial<HealthStatus>) => void;
28
+ setConnectedProviders: (providers: string[]) => void;
29
+ setSidebarCollapsed: (v: boolean) => void;
30
+ lastResult: GenerateResponse | null;
31
+ setLastResult: (r: GenerateResponse | null) => void;
32
+
33
+ // Settings
34
+ alphaWeight: number;
35
+ betaWeight: number;
36
+ gammaWeight: number;
37
+ compressionEnabled: boolean;
38
+ compressionThreshold: number;
39
+ evaluationEnabled: boolean;
40
+ redisUrl: string;
41
+ setAlphaWeight: (v: number) => void;
42
+ setBetaWeight: (v: number) => void;
43
+ setGammaWeight: (v: number) => void;
44
+ setCompressionEnabled: (v: boolean) => void;
45
+ setCompressionThreshold: (v: number) => void;
46
+ setEvaluationEnabled: (v: boolean) => void;
47
+ setRedisUrl: (v: string) => void;
48
+ }
49
+
50
+ export const useAppStore = create<AppState>((set) => ({
51
+ auth: {
52
+ isLoggedIn: false,
53
+ sessionId: null,
54
+ },
55
+ health: {
56
+ redis: 'unknown',
57
+ ml_deps: 'unknown',
58
+ api: 'ok',
59
+ },
60
+ budgetMode: 'balanced',
61
+ connectedProviders: [],
62
+ sidebarCollapsed: false,
63
+ lastResult: null,
64
+ setBudgetMode: (mode) => set({ budgetMode: mode }),
65
+ setAuth: (auth) => set((s) => ({ auth: { ...s.auth, ...auth } })),
66
+ setHealth: (health) => set((s) => ({ health: { ...s.health, ...health } })),
67
+ setConnectedProviders: (providers) => set({ connectedProviders: providers }),
68
+ setSidebarCollapsed: (v) => set({ sidebarCollapsed: v }),
69
+ setLastResult: (r) => set({ lastResult: r }),
70
+
71
+ // Settings initial state from localStorage
72
+ alphaWeight: getLocal('llmopt_alpha', 0.4),
73
+ betaWeight: getLocal('llmopt_beta', 0.3),
74
+ gammaWeight: getLocal('llmopt_gamma', 0.3),
75
+ compressionEnabled: getLocal('llmopt_compression', true),
76
+ compressionThreshold: getLocal('llmopt_compression_threshold', 0.15),
77
+ evaluationEnabled: getLocal('llmopt_evaluation', false),
78
+ redisUrl: getLocal('llmopt_redis_url', 'redis://localhost:6379'),
79
+
80
+ // Settings setters
81
+ setAlphaWeight: (v) => { localStorage.setItem('llmopt_alpha', JSON.stringify(v)); set({ alphaWeight: v }); },
82
+ setBetaWeight: (v) => { localStorage.setItem('llmopt_beta', JSON.stringify(v)); set({ betaWeight: v }); },
83
+ setGammaWeight: (v) => { localStorage.setItem('llmopt_gamma', JSON.stringify(v)); set({ gammaWeight: v }); },
84
+ setCompressionEnabled: (v) => { localStorage.setItem('llmopt_compression', JSON.stringify(v)); set({ compressionEnabled: v }); },
85
+ setCompressionThreshold: (v) => { localStorage.setItem('llmopt_compression_threshold', JSON.stringify(v)); set({ compressionThreshold: v }); },
86
+ setEvaluationEnabled: (v) => { localStorage.setItem('llmopt_evaluation', JSON.stringify(v)); set({ evaluationEnabled: v }); },
87
+ setRedisUrl: (v) => { localStorage.setItem('llmopt_redis_url', v); set({ redisUrl: v }); },
88
+ }));
frontend/src/theme.css ADDED
@@ -0,0 +1,1982 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* ============================================================
2
+ LLMOpt Design System β€” CSS Custom Properties
3
+ Aesthetic: Dark Industrial Dashboard ("Bloomberg meets cyberpunk")
4
+ ============================================================ */
5
+
6
+ /* ---- Google Fonts (loaded in index.html) ---- */
7
+
8
+ :root {
9
+ /* Backgrounds */
10
+ --bg-base: #0A0B0E;
11
+ --bg-surface: #111318;
12
+ --bg-elevated: #1A1D26;
13
+ --bg-border: #252A38;
14
+
15
+ /* Accents */
16
+ --accent-cyan: #00E5FF;
17
+ --accent-green: #00FF94;
18
+ --accent-amber: #FFB300;
19
+ --accent-red: #FF3D57;
20
+ --accent-purple: #7C4DFF;
21
+
22
+ /* Text */
23
+ --text-primary: #E8ECF4;
24
+ --text-secondary: #C5CBE0;
25
+ --text-muted: #9098B0;
26
+
27
+ /* Gradients */
28
+ --gradient-glow: linear-gradient(135deg, #00E5FF22, #7C4DFF11);
29
+ --gradient-card: linear-gradient(145deg, #111318, #1A1D26);
30
+
31
+ /* Typography sizes */
32
+ --text-xs: 11px;
33
+ --text-sm: 13px;
34
+ --text-base: 15px;
35
+ --text-lg: 18px;
36
+ --text-xl: 24px;
37
+ --text-2xl: 32px;
38
+ --text-3xl: 48px;
39
+
40
+ /* Spacing */
41
+ --sp-1: 4px;
42
+ --sp-2: 8px;
43
+ --sp-3: 12px;
44
+ --sp-4: 16px;
45
+ --sp-5: 20px;
46
+ --sp-6: 24px;
47
+ --sp-8: 32px;
48
+ --sp-10: 40px;
49
+ --sp-12: 48px;
50
+
51
+ /* Border radius */
52
+ --radius-sm: 4px;
53
+ --radius-md: 8px;
54
+ --radius-lg: 12px;
55
+ --radius-xl: 16px;
56
+ --radius-full: 9999px;
57
+
58
+ /* Transitions */
59
+ --transition-fast: 150ms ease;
60
+ --transition-normal: 250ms ease;
61
+ --transition-slow: 400ms ease;
62
+
63
+ /* Z-index layers */
64
+ --z-sidebar: 100;
65
+ --z-modal: 200;
66
+ --z-toast: 300;
67
+
68
+ /* Sidebar */
69
+ --sidebar-w: 280px;
70
+ --sidebar-w-collapsed: 56px;
71
+ }
72
+
73
+ /* ============================================================
74
+ Reset & Base
75
+ ============================================================ */
76
+
77
+ *, *::before, *::after {
78
+ box-sizing: border-box;
79
+ margin: 0;
80
+ padding: 0;
81
+ }
82
+
83
+ html {
84
+ font-size: 16px;
85
+ scroll-behavior: smooth;
86
+ }
87
+
88
+ body {
89
+ background-color: var(--bg-base);
90
+ color: var(--text-primary);
91
+ font-family: 'DM Sans', -apple-system, BlinkMacSystemFont, sans-serif;
92
+ font-size: var(--text-base);
93
+ line-height: 1.6;
94
+ -webkit-font-smoothing: antialiased;
95
+ -moz-osx-font-smoothing: grayscale;
96
+ overflow-x: hidden;
97
+ }
98
+
99
+ /* ============================================================
100
+ Typography
101
+ ============================================================ */
102
+
103
+ h1, h2, h3, h4, h5, h6 {
104
+ font-family: 'JetBrains Mono', monospace;
105
+ line-height: 1.2;
106
+ letter-spacing: -0.02em;
107
+ }
108
+
109
+ code, pre, .mono {
110
+ font-family: 'Fira Code', 'JetBrains Mono', monospace;
111
+ }
112
+
113
+ /* ============================================================
114
+ Scrollbar
115
+ ============================================================ */
116
+
117
+ ::-webkit-scrollbar { width: 6px; height: 6px; }
118
+ ::-webkit-scrollbar-track { background: var(--bg-base); }
119
+ ::-webkit-scrollbar-thumb { background: var(--bg-border); border-radius: 3px; }
120
+ ::-webkit-scrollbar-thumb:hover { background: var(--text-muted); }
121
+
122
+ /* ============================================================
123
+ Layout
124
+ ============================================================ */
125
+
126
+ .app-layout {
127
+ display: flex;
128
+ height: 100vh;
129
+ overflow: hidden;
130
+ }
131
+
132
+ .main-content {
133
+ flex: 1;
134
+ display: flex;
135
+ flex-direction: column;
136
+ overflow: hidden;
137
+ margin-left: var(--sidebar-w);
138
+ transition: margin-left var(--transition-normal);
139
+ }
140
+
141
+ .main-content.sidebar-collapsed {
142
+ margin-left: var(--sidebar-w-collapsed);
143
+ }
144
+
145
+ .page-content {
146
+ flex: 1;
147
+ overflow-y: auto;
148
+ padding: var(--sp-6);
149
+ background: var(--bg-base);
150
+ }
151
+
152
+ /* ============================================================
153
+ Sidebar
154
+ ============================================================ */
155
+
156
+ .sidebar {
157
+ width: var(--sidebar-w);
158
+ min-width: var(--sidebar-w);
159
+ background: var(--bg-surface);
160
+ border-right: 1px solid var(--bg-border);
161
+ display: flex;
162
+ flex-direction: column;
163
+ position: fixed;
164
+ left: 0;
165
+ top: 0;
166
+ height: 100vh;
167
+ z-index: var(--z-sidebar);
168
+ transition: width var(--transition-normal), min-width var(--transition-normal);
169
+ overflow: hidden;
170
+ }
171
+
172
+ .sidebar.collapsed {
173
+ width: var(--sidebar-w-collapsed);
174
+ min-width: var(--sidebar-w-collapsed);
175
+ }
176
+
177
+ .sidebar-logo {
178
+ display: flex;
179
+ align-items: center;
180
+ gap: var(--sp-3);
181
+ padding: var(--sp-5) var(--sp-5);
182
+ border-bottom: 1px solid var(--bg-border);
183
+ min-height: 64px;
184
+ }
185
+
186
+ .sidebar-logo-icon {
187
+ color: var(--accent-cyan);
188
+ font-size: 22px;
189
+ flex-shrink: 0;
190
+ display: flex;
191
+ align-items: center;
192
+ }
193
+
194
+ .sidebar-logo-text {
195
+ font-family: 'JetBrains Mono', monospace;
196
+ font-size: var(--text-lg);
197
+ font-weight: 700;
198
+ color: var(--text-primary);
199
+ white-space: nowrap;
200
+ }
201
+
202
+ .sidebar-logo-text span {
203
+ color: var(--accent-cyan);
204
+ }
205
+
206
+ .sidebar-nav {
207
+ flex: 1;
208
+ padding: var(--sp-4) var(--sp-3);
209
+ display: flex;
210
+ flex-direction: column;
211
+ gap: var(--sp-1);
212
+ }
213
+
214
+ .sidebar-nav-item {
215
+ display: flex;
216
+ align-items: center;
217
+ gap: var(--sp-3);
218
+ padding: var(--sp-3) var(--sp-3);
219
+ border-radius: var(--radius-md);
220
+ color: var(--text-secondary);
221
+ text-decoration: none;
222
+ font-size: var(--text-sm);
223
+ font-weight: 500;
224
+ transition: background var(--transition-fast), color var(--transition-fast), border-color var(--transition-fast);
225
+ cursor: pointer;
226
+ border: none;
227
+ background: transparent;
228
+ width: 100%;
229
+ text-align: left;
230
+ border-left: 2px solid transparent;
231
+ white-space: nowrap;
232
+ }
233
+
234
+ .sidebar-nav-item:hover {
235
+ background: var(--bg-elevated);
236
+ color: var(--text-primary);
237
+ }
238
+
239
+ .sidebar-nav-item.active {
240
+ background: rgba(0, 229, 255, 0.08);
241
+ color: var(--accent-cyan);
242
+ border-left-color: var(--accent-cyan);
243
+ }
244
+
245
+ .sidebar-nav-icon {
246
+ flex-shrink: 0;
247
+ width: 20px;
248
+ display: flex;
249
+ align-items: center;
250
+ justify-content: center;
251
+ }
252
+
253
+ .sidebar-section-label {
254
+ font-size: var(--text-xs);
255
+ font-weight: 600;
256
+ text-transform: uppercase;
257
+ letter-spacing: 0.1em;
258
+ color: var(--text-muted);
259
+ padding: var(--sp-4) var(--sp-3) var(--sp-2);
260
+ white-space: nowrap;
261
+ }
262
+
263
+ .sidebar-status {
264
+ padding: var(--sp-4);
265
+ border-top: 1px solid var(--bg-border);
266
+ }
267
+
268
+ .sidebar-status-title {
269
+ font-size: var(--text-xs);
270
+ font-weight: 600;
271
+ text-transform: uppercase;
272
+ letter-spacing: 0.1em;
273
+ color: var(--text-muted);
274
+ margin-bottom: var(--sp-3);
275
+ }
276
+
277
+ .sidebar-status-item {
278
+ display: flex;
279
+ align-items: center;
280
+ gap: var(--sp-2);
281
+ padding: var(--sp-1) 0;
282
+ font-size: var(--text-xs);
283
+ font-family: 'JetBrains Mono', monospace;
284
+ color: var(--text-secondary);
285
+ }
286
+
287
+ /* ============================================================
288
+ Status Dot
289
+ ============================================================ */
290
+
291
+ .dot {
292
+ width: 8px;
293
+ height: 8px;
294
+ border-radius: 50%;
295
+ flex-shrink: 0;
296
+ }
297
+
298
+ .dot-live {
299
+ background: var(--accent-green);
300
+ box-shadow: 0 0 8px var(--accent-green);
301
+ animation: pulse 2s infinite;
302
+ }
303
+
304
+ .dot-warning {
305
+ background: var(--accent-amber);
306
+ box-shadow: 0 0 8px var(--accent-amber);
307
+ }
308
+
309
+ .dot-error {
310
+ background: var(--accent-red);
311
+ box-shadow: 0 0 8px var(--accent-red);
312
+ }
313
+
314
+ .dot-muted {
315
+ background: var(--text-muted);
316
+ }
317
+
318
+ @keyframes pulse {
319
+ 0%, 100% { opacity: 1; transform: scale(1); }
320
+ 50% { opacity: 0.7; transform: scale(0.9); }
321
+ }
322
+
323
+ /* ============================================================
324
+ Metric Cards
325
+ ============================================================ */
326
+
327
+ .metric-card {
328
+ background: var(--bg-surface);
329
+ border: 1px solid var(--bg-border);
330
+ border-radius: var(--radius-lg);
331
+ padding: var(--sp-5) var(--sp-6);
332
+ position: relative;
333
+ overflow: hidden;
334
+ transition: border-color var(--transition-normal), box-shadow var(--transition-normal);
335
+ }
336
+
337
+ .metric-card:hover {
338
+ border-color: rgba(0, 229, 255, 0.3);
339
+ box-shadow: 0 0 20px rgba(0, 229, 255, 0.05);
340
+ }
341
+
342
+ .metric-card::before {
343
+ content: '';
344
+ position: absolute;
345
+ top: 0; left: 0; right: 0;
346
+ height: 2px;
347
+ }
348
+
349
+ .metric-card.cyan::before { background: var(--accent-cyan); }
350
+ .metric-card.green::before { background: var(--accent-green); }
351
+ .metric-card.amber::before { background: var(--accent-amber); }
352
+ .metric-card.purple::before { background: var(--accent-purple); }
353
+
354
+ .metric-card-label {
355
+ font-size: var(--text-xs);
356
+ font-weight: 600;
357
+ text-transform: uppercase;
358
+ letter-spacing: 0.1em;
359
+ color: var(--text-secondary);
360
+ margin-bottom: var(--sp-2);
361
+ }
362
+
363
+ .metric-card-value {
364
+ font-family: 'JetBrains Mono', monospace;
365
+ font-size: var(--text-2xl);
366
+ font-weight: 700;
367
+ color: var(--text-primary);
368
+ line-height: 1;
369
+ margin-bottom: var(--sp-2);
370
+ }
371
+
372
+ .metric-card-delta {
373
+ font-size: var(--text-xs);
374
+ font-weight: 600;
375
+ display: flex;
376
+ align-items: center;
377
+ gap: 4px;
378
+ }
379
+
380
+ .delta-up { color: var(--accent-green); }
381
+ .delta-down { color: var(--accent-red); }
382
+ .delta-neutral { color: var(--text-secondary); }
383
+
384
+ /* ============================================================
385
+ Buttons
386
+ ============================================================ */
387
+
388
+ .btn {
389
+ display: inline-flex;
390
+ align-items: center;
391
+ justify-content: center;
392
+ gap: var(--sp-2);
393
+ padding: var(--sp-2) var(--sp-5);
394
+ border-radius: var(--radius-md);
395
+ font-family: 'JetBrains Mono', monospace;
396
+ font-size: var(--text-sm);
397
+ font-weight: 600;
398
+ letter-spacing: 0.05em;
399
+ cursor: pointer;
400
+ border: none;
401
+ transition: all var(--transition-fast);
402
+ text-transform: uppercase;
403
+ white-space: nowrap;
404
+ }
405
+
406
+ .btn:disabled {
407
+ opacity: 0.4;
408
+ cursor: not-allowed;
409
+ }
410
+
411
+ .btn-primary {
412
+ background: var(--accent-cyan);
413
+ color: #000;
414
+ }
415
+
416
+ .btn-primary:hover:not(:disabled) {
417
+ background: #33EAFF;
418
+ box-shadow: 0 0 20px rgba(0, 229, 255, 0.4);
419
+ transform: scale(1.02);
420
+ }
421
+
422
+ .btn-ghost {
423
+ background: transparent;
424
+ color: var(--text-secondary);
425
+ border: 1px solid var(--bg-border);
426
+ }
427
+
428
+ .btn-ghost:hover:not(:disabled) {
429
+ border-color: var(--accent-cyan);
430
+ color: var(--accent-cyan);
431
+ }
432
+
433
+ .btn-danger {
434
+ background: rgba(255, 61, 87, 0.1);
435
+ color: var(--accent-red);
436
+ border: 1px solid rgba(255, 61, 87, 0.3);
437
+ }
438
+
439
+ .btn-danger:hover:not(:disabled) {
440
+ background: var(--accent-red);
441
+ color: #fff;
442
+ }
443
+
444
+ .btn-sm {
445
+ padding: 6px 12px;
446
+ font-size: var(--text-xs);
447
+ }
448
+
449
+ .btn-lg {
450
+ padding: var(--sp-3) var(--sp-8);
451
+ font-size: var(--text-base);
452
+ }
453
+
454
+ /* ============================================================
455
+ Budget Pills
456
+ ============================================================ */
457
+
458
+ .budget-pills {
459
+ display: flex;
460
+ gap: var(--sp-2);
461
+ }
462
+
463
+ .budget-pill {
464
+ padding: 6px 14px;
465
+ border-radius: var(--radius-full);
466
+ font-family: 'JetBrains Mono', monospace;
467
+ font-size: var(--text-xs);
468
+ font-weight: 700;
469
+ letter-spacing: 0.08em;
470
+ cursor: pointer;
471
+ border: 1px solid var(--bg-border);
472
+ background: var(--bg-elevated);
473
+ color: var(--text-secondary);
474
+ transition: all var(--transition-fast);
475
+ text-transform: uppercase;
476
+ }
477
+
478
+ .budget-pill:hover {
479
+ border-color: var(--text-secondary);
480
+ color: var(--text-primary);
481
+ }
482
+
483
+ .budget-pill.cheap.active { background: rgba(0, 255, 148, 0.15); border-color: var(--accent-green); color: var(--accent-green); }
484
+ .budget-pill.balanced.active { background: rgba(255, 179, 0, 0.15); border-color: var(--accent-amber); color: var(--accent-amber); }
485
+ .budget-pill.quality.active { background: rgba(0, 229, 255, 0.15); border-color: var(--accent-cyan); color: var(--accent-cyan); }
486
+
487
+ /* ============================================================
488
+ Form elements
489
+ ============================================================ */
490
+
491
+ .input-group {
492
+ display: flex;
493
+ flex-direction: column;
494
+ gap: var(--sp-2);
495
+ }
496
+
497
+ .input-label {
498
+ font-size: var(--text-xs);
499
+ font-weight: 600;
500
+ text-transform: uppercase;
501
+ letter-spacing: 0.08em;
502
+ color: var(--text-secondary);
503
+ }
504
+
505
+ input[type="text"],
506
+ input[type="email"],
507
+ input[type="password"],
508
+ input[type="url"],
509
+ input[type="number"],
510
+ textarea,
511
+ select {
512
+ width: 100%;
513
+ background: var(--bg-elevated);
514
+ border: 1px solid var(--bg-border);
515
+ border-radius: var(--radius-md);
516
+ color: var(--text-primary);
517
+ font-family: 'JetBrains Mono', monospace;
518
+ font-size: var(--text-sm);
519
+ padding: var(--sp-3) var(--sp-4);
520
+ transition: border-color var(--transition-fast), box-shadow var(--transition-fast);
521
+ outline: none;
522
+ resize: vertical;
523
+ }
524
+
525
+ input[type="text"]:focus,
526
+ input[type="email"]:focus,
527
+ input[type="password"]:focus,
528
+ input[type="url"]:focus,
529
+ input[type="number"]:focus,
530
+ textarea:focus,
531
+ select:focus {
532
+ border-color: var(--accent-cyan);
533
+ box-shadow: 0 0 0 3px rgba(0, 229, 255, 0.1);
534
+ }
535
+
536
+ input::placeholder,
537
+ textarea::placeholder {
538
+ color: var(--text-muted);
539
+ }
540
+
541
+ select {
542
+ cursor: pointer;
543
+ appearance: none;
544
+ background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' fill='none' viewBox='0 0 24 24' stroke='%237A8299'%3E%3Cpath stroke-linecap='round' stroke-linejoin='round' stroke-width='2' d='M19 9l-7 7-7-7'/%3E%3C/svg%3E");
545
+ background-repeat: no-repeat;
546
+ background-position: right 12px center;
547
+ background-size: 16px;
548
+ padding-right: 40px;
549
+ }
550
+
551
+ /* ============================================================
552
+ Toggle Switch
553
+ ============================================================ */
554
+
555
+ .toggle-switch {
556
+ position: relative;
557
+ display: inline-block;
558
+ width: 44px;
559
+ height: 24px;
560
+ flex-shrink: 0;
561
+ }
562
+
563
+ .toggle-switch input {
564
+ opacity: 0;
565
+ width: 0;
566
+ height: 0;
567
+ }
568
+
569
+ .toggle-slider {
570
+ position: absolute;
571
+ cursor: pointer;
572
+ top: 0; left: 0; right: 0; bottom: 0;
573
+ background: var(--bg-border);
574
+ border-radius: var(--radius-full);
575
+ transition: var(--transition-fast);
576
+ }
577
+
578
+ .toggle-slider::before {
579
+ content: '';
580
+ position: absolute;
581
+ height: 18px;
582
+ width: 18px;
583
+ left: 3px;
584
+ bottom: 3px;
585
+ background: var(--text-secondary);
586
+ border-radius: 50%;
587
+ transition: var(--transition-fast);
588
+ }
589
+
590
+ .toggle-switch input:checked + .toggle-slider {
591
+ background: rgba(0, 229, 255, 0.2);
592
+ border: 1px solid var(--accent-cyan);
593
+ }
594
+
595
+ .toggle-switch input:checked + .toggle-slider::before {
596
+ transform: translateX(20px);
597
+ background: var(--accent-cyan);
598
+ }
599
+
600
+ /* ============================================================
601
+ Range Slider
602
+ ============================================================ */
603
+
604
+ input[type="range"] {
605
+ -webkit-appearance: none;
606
+ appearance: none;
607
+ width: 100%;
608
+ height: 4px;
609
+ background: var(--bg-border);
610
+ border-radius: 2px;
611
+ outline: none;
612
+ padding: 0;
613
+ }
614
+
615
+ input[type="range"]::-webkit-slider-thumb {
616
+ -webkit-appearance: none;
617
+ appearance: none;
618
+ width: 16px;
619
+ height: 16px;
620
+ border-radius: 50%;
621
+ background: var(--accent-cyan);
622
+ cursor: pointer;
623
+ box-shadow: 0 0 8px rgba(0, 229, 255, 0.5);
624
+ }
625
+
626
+ input[type="range"]::-moz-range-thumb {
627
+ width: 16px;
628
+ height: 16px;
629
+ border-radius: 50%;
630
+ background: var(--accent-cyan);
631
+ cursor: pointer;
632
+ border: none;
633
+ }
634
+
635
+ /* ============================================================
636
+ Pipeline Visualizer
637
+ ============================================================ */
638
+
639
+ .pipeline-wrapper {
640
+ background: var(--bg-surface);
641
+ border: 1px solid var(--bg-border);
642
+ border-radius: var(--radius-lg);
643
+ padding: var(--sp-5);
644
+ overflow-x: auto;
645
+ }
646
+
647
+ .pipeline-stages {
648
+ display: flex;
649
+ align-items: center;
650
+ gap: 0;
651
+ min-width: max-content;
652
+ padding: var(--sp-2) 0;
653
+ }
654
+
655
+ .pipeline-stage {
656
+ display: flex;
657
+ flex-direction: column;
658
+ align-items: center;
659
+ gap: var(--sp-2);
660
+ min-width: 90px;
661
+ }
662
+
663
+ .pipeline-node {
664
+ display: flex;
665
+ flex-direction: column;
666
+ align-items: center;
667
+ justify-content: center;
668
+ gap: 4px;
669
+ width: 80px;
670
+ height: 64px;
671
+ border-radius: var(--radius-md);
672
+ border: 1px solid var(--bg-border);
673
+ background: var(--bg-elevated);
674
+ color: var(--text-muted);
675
+ font-size: var(--text-xs);
676
+ font-family: 'JetBrains Mono', monospace;
677
+ font-weight: 600;
678
+ text-transform: uppercase;
679
+ letter-spacing: 0.05em;
680
+ transition: all var(--transition-normal);
681
+ position: relative;
682
+ cursor: default;
683
+ text-align: center;
684
+ }
685
+
686
+ .pipeline-node-icon {
687
+ font-size: 18px;
688
+ line-height: 1;
689
+ }
690
+
691
+ .pipeline-node.active {
692
+ border-color: var(--accent-cyan);
693
+ box-shadow: 0 0 20px rgba(0, 229, 255, 0.3);
694
+ color: var(--accent-cyan);
695
+ animation: nodePulse 1s ease-in-out infinite;
696
+ }
697
+
698
+ .pipeline-node.complete {
699
+ border-color: var(--accent-green);
700
+ background: rgba(0, 255, 148, 0.08);
701
+ color: var(--accent-green);
702
+ }
703
+
704
+ .pipeline-node.skipped {
705
+ border-color: var(--accent-amber);
706
+ background: rgba(255, 179, 0, 0.08);
707
+ color: var(--accent-amber);
708
+ }
709
+
710
+ .pipeline-node.error {
711
+ border-color: var(--accent-red);
712
+ background: rgba(255, 61, 87, 0.08);
713
+ color: var(--accent-red);
714
+ }
715
+
716
+ .pipeline-latency {
717
+ font-size: var(--text-xs);
718
+ font-family: 'JetBrains Mono', monospace;
719
+ color: var(--text-muted);
720
+ min-height: 16px;
721
+ }
722
+
723
+ .pipeline-latency.visible {
724
+ color: var(--accent-green);
725
+ }
726
+
727
+ .pipeline-connector {
728
+ flex: 1;
729
+ height: 2px;
730
+ background: var(--bg-border);
731
+ position: relative;
732
+ min-width: 20px;
733
+ overflow: hidden;
734
+ }
735
+
736
+ .pipeline-connector-fill {
737
+ position: absolute;
738
+ left: 0; top: 0; bottom: 0;
739
+ background: var(--accent-cyan);
740
+ transition: width 0.3s ease;
741
+ box-shadow: 0 0 6px var(--accent-cyan);
742
+ }
743
+
744
+ @keyframes nodePulse {
745
+ 0%, 100% { box-shadow: 0 0 10px rgba(0, 229, 255, 0.3); }
746
+ 50% { box-shadow: 0 0 25px rgba(0, 229, 255, 0.6); }
747
+ }
748
+
749
+ /* ============================================================
750
+ Query Input Area
751
+ ============================================================ */
752
+
753
+ .query-box-wrapper {
754
+ background: var(--bg-surface);
755
+ border: 1px solid var(--bg-border);
756
+ border-radius: var(--radius-lg);
757
+ overflow: hidden;
758
+ transition: border-color var(--transition-fast), box-shadow var(--transition-fast);
759
+ }
760
+
761
+ .query-box-wrapper:focus-within {
762
+ border-color: var(--accent-cyan);
763
+ box-shadow: 0 0 0 3px rgba(0, 229, 255, 0.1);
764
+ }
765
+
766
+ .query-textarea {
767
+ width: 100%;
768
+ background: transparent;
769
+ border: none;
770
+ padding: var(--sp-5);
771
+ color: var(--text-primary);
772
+ font-family: 'JetBrains Mono', monospace;
773
+ font-size: var(--text-base);
774
+ resize: none;
775
+ min-height: 140px;
776
+ outline: none;
777
+ line-height: 1.7;
778
+ }
779
+
780
+ .query-toolbar {
781
+ display: flex;
782
+ align-items: center;
783
+ justify-content: space-between;
784
+ padding: var(--sp-3) var(--sp-4);
785
+ border-top: 1px solid var(--bg-border);
786
+ background: var(--bg-elevated);
787
+ }
788
+
789
+ .query-toolbar-left {
790
+ display: flex;
791
+ align-items: center;
792
+ gap: var(--sp-4);
793
+ }
794
+
795
+ .query-token-count {
796
+ font-family: 'JetBrains Mono', monospace;
797
+ font-size: var(--text-xs);
798
+ color: var(--text-muted);
799
+ }
800
+
801
+ /* ============================================================
802
+ Response Panel
803
+ ============================================================ */
804
+
805
+ .response-panel {
806
+ background: var(--bg-surface);
807
+ border: 1px solid var(--bg-border);
808
+ border-radius: var(--radius-lg);
809
+ overflow: hidden;
810
+ }
811
+
812
+ .response-header {
813
+ display: flex;
814
+ align-items: center;
815
+ justify-content: space-between;
816
+ padding: var(--sp-3) var(--sp-5);
817
+ border-bottom: 1px solid var(--bg-border);
818
+ background: var(--bg-elevated);
819
+ }
820
+
821
+ .response-meta {
822
+ display: flex;
823
+ align-items: center;
824
+ gap: var(--sp-4);
825
+ }
826
+
827
+ .response-meta-item {
828
+ font-family: 'JetBrains Mono', monospace;
829
+ font-size: var(--text-xs);
830
+ color: var(--text-secondary);
831
+ }
832
+
833
+ .response-meta-item span {
834
+ color: var(--accent-cyan);
835
+ }
836
+
837
+ .response-body {
838
+ padding: var(--sp-5);
839
+ font-size: var(--text-base);
840
+ line-height: 1.8;
841
+ max-height: 500px;
842
+ overflow-y: auto;
843
+ }
844
+
845
+ .response-body p { margin-bottom: var(--sp-4); }
846
+ .response-body code {
847
+ background: var(--bg-elevated);
848
+ padding: 2px 6px;
849
+ border-radius: var(--radius-sm);
850
+ font-family: 'Fira Code', monospace;
851
+ font-size: 0.9em;
852
+ color: var(--accent-cyan);
853
+ }
854
+ .response-body pre {
855
+ background: var(--bg-elevated);
856
+ border: 1px solid var(--bg-border);
857
+ border-radius: var(--radius-md);
858
+ padding: var(--sp-4);
859
+ overflow-x: auto;
860
+ margin-bottom: var(--sp-4);
861
+ }
862
+ .response-body pre code {
863
+ background: transparent;
864
+ padding: 0;
865
+ color: var(--text-primary);
866
+ }
867
+
868
+ /* ============================================================
869
+ Explain Card
870
+ ============================================================ */
871
+
872
+ .explain-card {
873
+ background: var(--bg-elevated);
874
+ border: 1px solid var(--bg-border);
875
+ border-radius: var(--radius-md);
876
+ overflow: hidden;
877
+ }
878
+
879
+ .explain-card-header {
880
+ display: flex;
881
+ align-items: center;
882
+ justify-content: space-between;
883
+ padding: var(--sp-3) var(--sp-4);
884
+ border-bottom: 1px solid var(--bg-border);
885
+ cursor: pointer;
886
+ user-select: none;
887
+ }
888
+
889
+ .explain-card-header:hover {
890
+ background: rgba(0, 229, 255, 0.04);
891
+ }
892
+
893
+ .explain-title {
894
+ font-family: 'JetBrains Mono', monospace;
895
+ font-size: var(--text-xs);
896
+ font-weight: 700;
897
+ text-transform: uppercase;
898
+ letter-spacing: 0.1em;
899
+ color: var(--accent-cyan);
900
+ }
901
+
902
+ .explain-body {
903
+ padding: var(--sp-4);
904
+ font-family: 'JetBrains Mono', monospace;
905
+ font-size: var(--text-xs);
906
+ line-height: 2;
907
+ color: var(--text-secondary);
908
+ }
909
+
910
+ .explain-line::before {
911
+ content: '> ';
912
+ color: var(--accent-cyan);
913
+ }
914
+
915
+ .explain-line {
916
+ display: block;
917
+ }
918
+
919
+ .explain-highlight {
920
+ color: var(--accent-green);
921
+ }
922
+
923
+ /* ============================================================
924
+ Tables
925
+ ============================================================ */
926
+
927
+ .data-table {
928
+ width: 100%;
929
+ border-collapse: collapse;
930
+ font-size: var(--text-sm);
931
+ }
932
+
933
+ .data-table th {
934
+ padding: var(--sp-3) var(--sp-4);
935
+ text-align: left;
936
+ font-size: var(--text-xs);
937
+ font-weight: 600;
938
+ text-transform: uppercase;
939
+ letter-spacing: 0.08em;
940
+ color: var(--text-secondary);
941
+ border-bottom: 1px solid var(--bg-border);
942
+ white-space: nowrap;
943
+ }
944
+
945
+ .data-table td {
946
+ padding: var(--sp-3) var(--sp-4);
947
+ border-bottom: 1px solid rgba(37, 42, 56, 0.5);
948
+ vertical-align: middle;
949
+ }
950
+
951
+ .data-table tbody tr:nth-child(odd) { background: var(--bg-surface); }
952
+ .data-table tbody tr:nth-child(even) { background: var(--bg-elevated); }
953
+ .data-table tbody tr:hover { background: rgba(0, 229, 255, 0.04); cursor: pointer; }
954
+
955
+ .complexity-bar {
956
+ display: flex;
957
+ align-items: center;
958
+ gap: var(--sp-2);
959
+ }
960
+
961
+ .complexity-bar-track {
962
+ width: 60px;
963
+ height: 4px;
964
+ background: var(--bg-border);
965
+ border-radius: 2px;
966
+ overflow: hidden;
967
+ flex-shrink: 0;
968
+ }
969
+
970
+ .complexity-bar-fill {
971
+ height: 100%;
972
+ border-radius: 2px;
973
+ transition: width var(--transition-slow);
974
+ }
975
+
976
+ /* ============================================================
977
+ Badge / Pill
978
+ ============================================================ */
979
+
980
+ .badge {
981
+ display: inline-flex;
982
+ align-items: center;
983
+ padding: 2px 8px;
984
+ border-radius: var(--radius-full);
985
+ font-size: var(--text-xs);
986
+ font-weight: 600;
987
+ font-family: 'JetBrains Mono', monospace;
988
+ letter-spacing: 0.04em;
989
+ text-transform: uppercase;
990
+ }
991
+
992
+ .badge-cyan { background: rgba(0, 229, 255, 0.12); color: var(--accent-cyan); border: 1px solid rgba(0, 229, 255, 0.3); }
993
+ .badge-green { background: rgba(0, 255, 148, 0.12); color: var(--accent-green); border: 1px solid rgba(0, 255, 148, 0.3); }
994
+ .badge-amber { background: rgba(255, 179, 0, 0.12); color: var(--accent-amber); border: 1px solid rgba(255, 179, 0, 0.3); }
995
+ .badge-red { background: rgba(255, 61, 87, 0.12); color: var(--accent-red); border: 1px solid rgba(255, 61, 87, 0.3); }
996
+ .badge-purple { background: rgba(124, 77, 255, 0.12); color: var(--accent-purple); border: 1px solid rgba(124, 77, 255, 0.3); }
997
+ .badge-muted { background: rgba(61, 67, 87, 0.3); color: var(--text-secondary); border: 1px solid var(--bg-border); }
998
+
999
+ /* ============================================================
1000
+ Cards
1001
+ ============================================================ */
1002
+
1003
+ .card {
1004
+ background: var(--bg-surface);
1005
+ border: 1px solid var(--bg-border);
1006
+ border-radius: var(--radius-lg);
1007
+ overflow: hidden;
1008
+ transition: border-color var(--transition-normal), box-shadow var(--transition-normal);
1009
+ flex-shrink: 0;
1010
+ }
1011
+
1012
+ .card:hover {
1013
+ border-color: rgba(0, 229, 255, 0.2);
1014
+ box-shadow: 0 4px 20px rgba(0, 0, 0, 0.3);
1015
+ }
1016
+
1017
+ .card-header {
1018
+ padding: var(--sp-4) var(--sp-5);
1019
+ border-bottom: 1px solid var(--bg-border);
1020
+ display: flex;
1021
+ align-items: center;
1022
+ justify-content: space-between;
1023
+ }
1024
+
1025
+ .card-title {
1026
+ font-family: 'JetBrains Mono', monospace;
1027
+ font-size: var(--text-base);
1028
+ font-weight: 700;
1029
+ color: var(--text-primary);
1030
+ }
1031
+
1032
+ .card-subtitle {
1033
+ font-size: var(--text-xs);
1034
+ color: var(--text-secondary);
1035
+ margin-top: 2px;
1036
+ }
1037
+
1038
+ .card-body {
1039
+ padding: var(--sp-5);
1040
+ }
1041
+
1042
+ /* ============================================================
1043
+ Grid layouts
1044
+ ============================================================ */
1045
+
1046
+ .grid-2 { display: grid; grid-template-columns: repeat(2, 1fr); gap: var(--sp-4); }
1047
+ .grid-3 { display: grid; grid-template-columns: repeat(3, 1fr); gap: var(--sp-4); }
1048
+ .grid-4 { display: grid; grid-template-columns: repeat(4, 1fr); gap: var(--sp-4); }
1049
+
1050
+ @media (max-width: 1280px) {
1051
+ .grid-4 { grid-template-columns: repeat(2, 1fr); }
1052
+ }
1053
+
1054
+ @media (max-width: 1024px) {
1055
+ /* Settings responsive overrides - stacked at 1024px to prevent overlapping when sidebar is open */
1056
+ .settings-row {
1057
+ flex-direction: column;
1058
+ align-items: stretch;
1059
+ gap: var(--sp-3);
1060
+ }
1061
+ .settings-key-input-wrapper {
1062
+ width: 100%;
1063
+ max-width: none;
1064
+ }
1065
+ }
1066
+
1067
+ @media (max-width: 768px) {
1068
+ .grid-2,
1069
+ .grid-3,
1070
+ .grid-4 { grid-template-columns: 1fr; }
1071
+
1072
+ .main-content { margin-left: var(--sidebar-w-collapsed) !important; }
1073
+ .sidebar { width: var(--sidebar-w-collapsed) !important; }
1074
+ .sidebar-logo-text,
1075
+ .sidebar-nav-item span,
1076
+ .sidebar-section-label,
1077
+ .sidebar-status { display: none; }
1078
+
1079
+ .page-content { padding: var(--sp-4); }
1080
+ }
1081
+
1082
+ /* ============================================================
1083
+ Page headings
1084
+ ============================================================ */
1085
+
1086
+ .page-header {
1087
+ margin-bottom: var(--sp-6);
1088
+ }
1089
+
1090
+ .page-title {
1091
+ font-family: 'JetBrains Mono', monospace;
1092
+ font-size: var(--text-xl);
1093
+ font-weight: 700;
1094
+ color: var(--text-primary);
1095
+ margin-bottom: var(--sp-1);
1096
+ }
1097
+
1098
+ .page-subtitle {
1099
+ font-size: var(--text-sm);
1100
+ color: var(--text-secondary);
1101
+ }
1102
+
1103
+ /* ============================================================
1104
+ Section
1105
+ ============================================================ */
1106
+
1107
+ .section {
1108
+ margin-bottom: var(--sp-6);
1109
+ }
1110
+
1111
+ .section-title {
1112
+ font-family: 'JetBrains Mono', monospace;
1113
+ font-size: var(--text-sm);
1114
+ font-weight: 700;
1115
+ text-transform: uppercase;
1116
+ letter-spacing: 0.1em;
1117
+ color: var(--text-secondary);
1118
+ margin-bottom: var(--sp-4);
1119
+ display: flex;
1120
+ align-items: center;
1121
+ gap: var(--sp-3);
1122
+ }
1123
+
1124
+ .section-title::after {
1125
+ content: '';
1126
+ flex: 1;
1127
+ height: 1px;
1128
+ background: var(--bg-border);
1129
+ }
1130
+
1131
+ /* ============================================================
1132
+ Auth / Login page
1133
+ ============================================================ */
1134
+
1135
+ .auth-page {
1136
+ min-height: 100vh;
1137
+ background: var(--bg-base);
1138
+ display: flex;
1139
+ align-items: center;
1140
+ justify-content: center;
1141
+ position: relative;
1142
+ overflow: hidden;
1143
+ }
1144
+
1145
+ .auth-bg-grid {
1146
+ position: absolute;
1147
+ inset: 0;
1148
+ background-image:
1149
+ linear-gradient(rgba(0, 229, 255, 0.03) 1px, transparent 1px),
1150
+ linear-gradient(90deg, rgba(0, 229, 255, 0.03) 1px, transparent 1px);
1151
+ background-size: 40px 40px;
1152
+ pointer-events: none;
1153
+ }
1154
+
1155
+ .auth-bg-glow {
1156
+ position: absolute;
1157
+ width: 600px;
1158
+ height: 600px;
1159
+ border-radius: 50%;
1160
+ background: radial-gradient(circle, rgba(0, 229, 255, 0.06) 0%, transparent 70%);
1161
+ top: -100px;
1162
+ left: -100px;
1163
+ pointer-events: none;
1164
+ }
1165
+
1166
+ .auth-bg-glow-2 {
1167
+ position: absolute;
1168
+ width: 400px;
1169
+ height: 400px;
1170
+ border-radius: 50%;
1171
+ background: radial-gradient(circle, rgba(124, 77, 255, 0.06) 0%, transparent 70%);
1172
+ bottom: -50px;
1173
+ right: -50px;
1174
+ pointer-events: none;
1175
+ }
1176
+
1177
+ .auth-card {
1178
+ background: var(--bg-surface);
1179
+ border: 1px solid var(--bg-border);
1180
+ border-radius: var(--radius-xl);
1181
+ padding: var(--sp-10);
1182
+ width: 100%;
1183
+ max-width: 440px;
1184
+ position: relative;
1185
+ z-index: 1;
1186
+ }
1187
+
1188
+ .auth-logo {
1189
+ display: flex;
1190
+ align-items: center;
1191
+ gap: var(--sp-3);
1192
+ margin-bottom: var(--sp-8);
1193
+ }
1194
+
1195
+ .auth-logo-icon {
1196
+ color: var(--accent-cyan);
1197
+ font-size: 28px;
1198
+ }
1199
+
1200
+ .auth-logo-text {
1201
+ font-family: 'JetBrains Mono', monospace;
1202
+ font-size: var(--text-xl);
1203
+ font-weight: 700;
1204
+ }
1205
+
1206
+ .auth-logo-text span {
1207
+ color: var(--accent-cyan);
1208
+ }
1209
+
1210
+ .auth-title {
1211
+ font-family: 'JetBrains Mono', monospace;
1212
+ font-size: var(--text-lg);
1213
+ font-weight: 700;
1214
+ margin-bottom: var(--sp-1);
1215
+ }
1216
+
1217
+ .auth-subtitle {
1218
+ font-size: var(--text-sm);
1219
+ color: var(--text-secondary);
1220
+ margin-bottom: var(--sp-8);
1221
+ }
1222
+
1223
+ .auth-form {
1224
+ display: flex;
1225
+ flex-direction: column;
1226
+ gap: var(--sp-4);
1227
+ }
1228
+
1229
+ .auth-divider {
1230
+ display: flex;
1231
+ align-items: center;
1232
+ gap: var(--sp-3);
1233
+ margin: var(--sp-5) 0;
1234
+ }
1235
+
1236
+ .auth-divider-line {
1237
+ flex: 1;
1238
+ height: 1px;
1239
+ background: var(--bg-border);
1240
+ }
1241
+
1242
+ .auth-divider-text {
1243
+ font-size: var(--text-xs);
1244
+ color: var(--text-muted);
1245
+ text-transform: uppercase;
1246
+ letter-spacing: 0.08em;
1247
+ }
1248
+
1249
+ .oauth-btn {
1250
+ display: flex;
1251
+ align-items: center;
1252
+ justify-content: center;
1253
+ gap: var(--sp-3);
1254
+ padding: var(--sp-3);
1255
+ border: 1px solid var(--bg-border);
1256
+ border-radius: var(--radius-md);
1257
+ background: var(--bg-elevated);
1258
+ color: var(--text-primary);
1259
+ font-size: var(--text-sm);
1260
+ font-weight: 500;
1261
+ cursor: pointer;
1262
+ transition: all var(--transition-fast);
1263
+ text-decoration: none;
1264
+ width: 100%;
1265
+ }
1266
+
1267
+ .oauth-btn:hover {
1268
+ border-color: var(--text-muted);
1269
+ background: rgba(255, 255, 255, 0.04);
1270
+ }
1271
+
1272
+ .auth-footer {
1273
+ margin-top: var(--sp-6);
1274
+ text-align: center;
1275
+ font-size: var(--text-sm);
1276
+ color: var(--text-secondary);
1277
+ }
1278
+
1279
+ .auth-footer a, .auth-link {
1280
+ color: var(--accent-cyan);
1281
+ cursor: pointer;
1282
+ text-decoration: none;
1283
+ }
1284
+
1285
+ .auth-footer a:hover, .auth-link:hover {
1286
+ text-decoration: underline;
1287
+ }
1288
+
1289
+ .auth-error {
1290
+ padding: var(--sp-3) var(--sp-4);
1291
+ background: rgba(255, 61, 87, 0.1);
1292
+ border: 1px solid rgba(255, 61, 87, 0.3);
1293
+ border-radius: var(--radius-md);
1294
+ color: var(--accent-red);
1295
+ font-size: var(--text-sm);
1296
+ }
1297
+
1298
+ /* ============================================================
1299
+ Model Registry Card
1300
+ ============================================================ */
1301
+
1302
+ .model-card {
1303
+ background: var(--bg-surface);
1304
+ border: 1px solid var(--bg-border);
1305
+ border-radius: var(--radius-lg);
1306
+ padding: var(--sp-5);
1307
+ display: flex;
1308
+ flex-direction: column;
1309
+ gap: var(--sp-4);
1310
+ transition: border-color var(--transition-normal), box-shadow var(--transition-normal);
1311
+ }
1312
+
1313
+ .model-card:hover {
1314
+ border-color: rgba(0, 229, 255, 0.3);
1315
+ box-shadow: 0 0 20px rgba(0, 229, 255, 0.06);
1316
+ }
1317
+
1318
+ .model-card-name {
1319
+ font-family: 'JetBrains Mono', monospace;
1320
+ font-size: var(--text-base);
1321
+ font-weight: 700;
1322
+ color: var(--text-primary);
1323
+ }
1324
+
1325
+ .model-card-pricing {
1326
+ display: flex;
1327
+ gap: var(--sp-6);
1328
+ }
1329
+
1330
+ .model-card-price-item {
1331
+ display: flex;
1332
+ flex-direction: column;
1333
+ gap: 2px;
1334
+ }
1335
+
1336
+ .model-card-price-label {
1337
+ font-size: var(--text-xs);
1338
+ color: var(--text-muted);
1339
+ text-transform: uppercase;
1340
+ letter-spacing: 0.08em;
1341
+ }
1342
+
1343
+ .model-card-price-value {
1344
+ font-family: 'JetBrains Mono', monospace;
1345
+ font-size: var(--text-sm);
1346
+ font-weight: 600;
1347
+ color: var(--accent-green);
1348
+ }
1349
+
1350
+ .capability-gauge {
1351
+ position: relative;
1352
+ width: 60px;
1353
+ height: 60px;
1354
+ flex-shrink: 0;
1355
+ }
1356
+
1357
+ /* ============================================================
1358
+ Settings
1359
+ ============================================================ */
1360
+
1361
+ .settings-section {
1362
+ background: var(--bg-surface);
1363
+ border: 1px solid var(--bg-border);
1364
+ border-radius: var(--radius-lg);
1365
+ overflow: hidden;
1366
+ margin-bottom: var(--sp-5);
1367
+ flex-shrink: 0;
1368
+ }
1369
+
1370
+ .settings-section-header {
1371
+ padding: var(--sp-4) var(--sp-5);
1372
+ border-bottom: 1px solid var(--bg-border);
1373
+ background: var(--bg-elevated);
1374
+ }
1375
+
1376
+ .settings-section-title {
1377
+ font-family: 'JetBrains Mono', monospace;
1378
+ font-size: var(--text-sm);
1379
+ font-weight: 700;
1380
+ text-transform: uppercase;
1381
+ letter-spacing: 0.1em;
1382
+ color: var(--text-primary);
1383
+ }
1384
+
1385
+ .settings-section-desc {
1386
+ font-size: var(--text-xs);
1387
+ color: var(--text-secondary);
1388
+ margin-top: 2px;
1389
+ }
1390
+
1391
+ .settings-row {
1392
+ display: flex;
1393
+ align-items: center;
1394
+ justify-content: space-between;
1395
+ gap: var(--sp-4);
1396
+ padding: var(--sp-4) var(--sp-5);
1397
+ border-bottom: 1px solid rgba(37, 42, 56, 0.5);
1398
+ }
1399
+
1400
+ .settings-row:last-child {
1401
+ border-bottom: none;
1402
+ }
1403
+
1404
+ .settings-row-info {
1405
+ flex: 1;
1406
+ }
1407
+
1408
+ .settings-row-label {
1409
+ font-size: var(--text-sm);
1410
+ font-weight: 500;
1411
+ color: var(--text-primary);
1412
+ margin-bottom: 2px;
1413
+ }
1414
+
1415
+ .settings-row-desc {
1416
+ font-size: var(--text-xs);
1417
+ color: var(--text-secondary);
1418
+ }
1419
+
1420
+ .settings-key-input-wrapper {
1421
+ position: relative;
1422
+ display: flex;
1423
+ gap: var(--sp-2);
1424
+ align-items: center;
1425
+ width: 100%;
1426
+ max-width: 450px;
1427
+ }
1428
+
1429
+ /* ============================================================
1430
+ Loading spinner
1431
+ ============================================================ */
1432
+
1433
+ .spinner {
1434
+ width: 16px;
1435
+ height: 16px;
1436
+ border: 2px solid var(--bg-border);
1437
+ border-top-color: var(--accent-cyan);
1438
+ border-radius: 50%;
1439
+ animation: spin 0.7s linear infinite;
1440
+ }
1441
+
1442
+ @keyframes spin {
1443
+ to { transform: rotate(360deg); }
1444
+ }
1445
+
1446
+ /* ============================================================
1447
+ Empty states
1448
+ ============================================================ */
1449
+
1450
+ .empty-state {
1451
+ display: flex;
1452
+ flex-direction: column;
1453
+ align-items: center;
1454
+ justify-content: center;
1455
+ padding: var(--sp-12) var(--sp-8);
1456
+ text-align: center;
1457
+ color: var(--text-secondary);
1458
+ gap: var(--sp-4);
1459
+ }
1460
+
1461
+ .empty-state-icon {
1462
+ font-size: 48px;
1463
+ opacity: 0.3;
1464
+ }
1465
+
1466
+ .empty-state-title {
1467
+ font-family: 'JetBrains Mono', monospace;
1468
+ font-size: var(--text-lg);
1469
+ font-weight: 700;
1470
+ color: var(--text-muted);
1471
+ }
1472
+
1473
+ .empty-state-desc {
1474
+ font-size: var(--text-sm);
1475
+ max-width: 320px;
1476
+ }
1477
+
1478
+ /* ============================================================
1479
+ Topbar / Header strip
1480
+ ============================================================ */
1481
+
1482
+ .topbar {
1483
+ height: 52px;
1484
+ background: var(--bg-surface);
1485
+ border-bottom: 1px solid var(--bg-border);
1486
+ display: flex;
1487
+ align-items: center;
1488
+ justify-content: space-between;
1489
+ padding: 0 var(--sp-6);
1490
+ flex-shrink: 0;
1491
+ }
1492
+
1493
+ .topbar-breadcrumb {
1494
+ font-family: 'JetBrains Mono', monospace;
1495
+ font-size: var(--text-xs);
1496
+ color: var(--text-secondary);
1497
+ display: flex;
1498
+ align-items: center;
1499
+ gap: var(--sp-2);
1500
+ }
1501
+
1502
+ .topbar-breadcrumb strong {
1503
+ color: var(--text-primary);
1504
+ font-weight: 700;
1505
+ }
1506
+
1507
+ .topbar-actions {
1508
+ display: flex;
1509
+ align-items: center;
1510
+ gap: var(--sp-3);
1511
+ }
1512
+
1513
+ .topbar-health {
1514
+ display: flex;
1515
+ align-items: center;
1516
+ gap: var(--sp-2);
1517
+ font-family: 'JetBrains Mono', monospace;
1518
+ font-size: var(--text-xs);
1519
+ color: var(--text-secondary);
1520
+ }
1521
+
1522
+ /* ============================================================
1523
+ Tooltip
1524
+ ============================================================ */
1525
+
1526
+ [data-tooltip] {
1527
+ position: relative;
1528
+ }
1529
+
1530
+ [data-tooltip]::after {
1531
+ content: attr(data-tooltip);
1532
+ position: absolute;
1533
+ bottom: calc(100% + 8px);
1534
+ left: 50%;
1535
+ transform: translateX(-50%);
1536
+ background: var(--bg-elevated);
1537
+ border: 1px solid var(--bg-border);
1538
+ border-radius: var(--radius-sm);
1539
+ padding: 4px 8px;
1540
+ font-size: var(--text-xs);
1541
+ white-space: nowrap;
1542
+ pointer-events: none;
1543
+ opacity: 0;
1544
+ transition: opacity var(--transition-fast);
1545
+ z-index: 100;
1546
+ }
1547
+
1548
+ [data-tooltip]:hover::after {
1549
+ opacity: 1;
1550
+ }
1551
+
1552
+ /* ============================================================
1553
+ Misc utilities
1554
+ ============================================================ */
1555
+
1556
+ .flex { display: flex; }
1557
+ .flex-col { display: flex; flex-direction: column; }
1558
+ .items-center { align-items: center; }
1559
+ .justify-between { justify-content: space-between; }
1560
+ .gap-2 { gap: var(--sp-2); }
1561
+ .gap-3 { gap: var(--sp-3); }
1562
+ .gap-4 { gap: var(--sp-4); }
1563
+ .gap-6 { gap: var(--sp-6); }
1564
+ .flex-1 { flex: 1; }
1565
+ .w-full { width: 100%; }
1566
+ .text-cyan { color: var(--accent-cyan); }
1567
+ .text-green { color: var(--accent-green); }
1568
+ .text-amber { color: var(--accent-amber); }
1569
+ .text-red { color: var(--accent-red); }
1570
+ .text-muted { color: var(--text-muted); }
1571
+ .text-secondary { color: var(--text-secondary); }
1572
+ .font-mono { font-family: 'JetBrains Mono', monospace; }
1573
+ .font-sm { font-size: var(--text-sm); }
1574
+ .font-xs { font-size: var(--text-xs); }
1575
+ .truncate { overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
1576
+ .mb-4 { margin-bottom: var(--sp-4); }
1577
+ .mb-6 { margin-bottom: var(--sp-6); }
1578
+ .mt-4 { margin-top: var(--sp-4); }
1579
+
1580
+ /* ============================================================
1581
+ Badge Variants
1582
+ ============================================================ */
1583
+
1584
+ .badge-red {
1585
+ color: var(--accent-red);
1586
+ border-color: rgba(255, 61, 87, 0.4);
1587
+ background: rgba(255, 61, 87, 0.08);
1588
+ }
1589
+
1590
+ /* ============================================================
1591
+ Pipeline Visualizer β€” Vertical Layout (sidebar)
1592
+ ============================================================ */
1593
+
1594
+ .pipeline-wrapper {
1595
+ display: flex;
1596
+ flex-direction: column;
1597
+ gap: 0;
1598
+ position: relative;
1599
+ padding: var(--sp-3) 0;
1600
+ }
1601
+
1602
+ .pipeline-stage {
1603
+ display: flex;
1604
+ align-items: flex-start;
1605
+ gap: var(--sp-3);
1606
+ padding: var(--sp-3) var(--sp-4);
1607
+ position: relative;
1608
+ transition: background var(--transition-fast);
1609
+ border-radius: var(--radius-md);
1610
+ }
1611
+
1612
+ .pipeline-stage.active {
1613
+ background: rgba(0, 229, 255, 0.05);
1614
+ }
1615
+
1616
+ .pipeline-stage.complete .pipeline-stage-label {
1617
+ color: var(--accent-green);
1618
+ }
1619
+
1620
+ .pipeline-stage.error .pipeline-stage-label {
1621
+ color: var(--accent-red);
1622
+ }
1623
+
1624
+ .pipeline-stage-icon {
1625
+ width: 24px;
1626
+ height: 24px;
1627
+ display: flex;
1628
+ align-items: center;
1629
+ justify-content: center;
1630
+ flex-shrink: 0;
1631
+ font-size: 14px;
1632
+ }
1633
+
1634
+ .pipeline-stage-body {
1635
+ flex: 1;
1636
+ min-width: 0;
1637
+ }
1638
+
1639
+ .pipeline-stage-label {
1640
+ font-size: var(--text-xs);
1641
+ font-weight: 600;
1642
+ font-family: 'JetBrains Mono', monospace;
1643
+ color: var(--text-secondary);
1644
+ transition: color var(--transition-fast);
1645
+ margin-bottom: 3px;
1646
+ }
1647
+
1648
+ .pipeline-stage.active .pipeline-stage-label {
1649
+ color: var(--accent-cyan);
1650
+ }
1651
+
1652
+ .pipeline-stage-detail {
1653
+ font-size: 10px;
1654
+ color: var(--text-muted);
1655
+ line-height: 1.4;
1656
+ word-break: break-word;
1657
+ }
1658
+
1659
+ .pipeline-stage.complete .pipeline-stage-detail {
1660
+ color: var(--text-secondary);
1661
+ }
1662
+
1663
+ .pipeline-connector {
1664
+ position: absolute;
1665
+ left: calc(var(--sp-4) + 11px);
1666
+ top: calc(100% - var(--sp-3));
1667
+ width: 2px;
1668
+ height: var(--sp-3);
1669
+ background: var(--bg-border);
1670
+ z-index: 0;
1671
+ }
1672
+
1673
+ /* ============================================================
1674
+ Metrics Bar (Response)
1675
+ ============================================================ */
1676
+
1677
+ .metrics-bar {
1678
+ display: flex;
1679
+ align-items: center;
1680
+ flex-wrap: wrap;
1681
+ gap: 0;
1682
+ background: var(--bg-base);
1683
+ border: 1px solid var(--bg-border);
1684
+ border-radius: var(--radius-md);
1685
+ overflow: hidden;
1686
+ margin-bottom: var(--sp-4);
1687
+ }
1688
+
1689
+ .metric-item {
1690
+ flex: 1;
1691
+ min-width: 80px;
1692
+ padding: var(--sp-3) var(--sp-4);
1693
+ text-align: center;
1694
+ }
1695
+
1696
+ .metric-label {
1697
+ font-size: 10px;
1698
+ text-transform: uppercase;
1699
+ letter-spacing: 0.08em;
1700
+ color: var(--text-muted);
1701
+ margin-bottom: 4px;
1702
+ }
1703
+
1704
+ .metric-value {
1705
+ font-family: 'JetBrains Mono', monospace;
1706
+ font-size: var(--text-xs);
1707
+ font-weight: 700;
1708
+ color: var(--text-primary);
1709
+ }
1710
+
1711
+ .metric-divider {
1712
+ width: 1px;
1713
+ height: 40px;
1714
+ background: var(--bg-border);
1715
+ flex-shrink: 0;
1716
+ }
1717
+
1718
+ /* ============================================================
1719
+ Response Content (markdown rendering)
1720
+ ============================================================ */
1721
+
1722
+ .response-content {
1723
+ font-size: var(--text-sm);
1724
+ line-height: 1.8;
1725
+ color: var(--text-primary);
1726
+ }
1727
+
1728
+ .response-content p { margin-bottom: var(--sp-4); }
1729
+ .response-content p:last-child { margin-bottom: 0; }
1730
+
1731
+ .response-content h1,
1732
+ .response-content h2,
1733
+ .response-content h3 {
1734
+ font-family: 'JetBrains Mono', monospace;
1735
+ color: var(--accent-cyan);
1736
+ margin: var(--sp-5) 0 var(--sp-3);
1737
+ font-size: var(--text-base);
1738
+ }
1739
+
1740
+ .response-content code {
1741
+ font-family: 'Fira Code', 'JetBrains Mono', monospace;
1742
+ font-size: 12px;
1743
+ background: var(--bg-base);
1744
+ border: 1px solid var(--bg-border);
1745
+ border-radius: var(--radius-sm);
1746
+ padding: 2px 6px;
1747
+ color: var(--accent-cyan);
1748
+ }
1749
+
1750
+ .response-content pre {
1751
+ background: var(--bg-base);
1752
+ border: 1px solid var(--bg-border);
1753
+ border-radius: var(--radius-md);
1754
+ padding: var(--sp-4);
1755
+ overflow-x: auto;
1756
+ margin: var(--sp-4) 0;
1757
+ }
1758
+
1759
+ .response-content pre code {
1760
+ background: none;
1761
+ border: none;
1762
+ padding: 0;
1763
+ color: var(--text-primary);
1764
+ font-size: 13px;
1765
+ line-height: 1.6;
1766
+ }
1767
+
1768
+ .response-content ul,
1769
+ .response-content ol {
1770
+ padding-left: var(--sp-6);
1771
+ margin-bottom: var(--sp-4);
1772
+ }
1773
+
1774
+ .response-content li {
1775
+ margin-bottom: var(--sp-2);
1776
+ }
1777
+
1778
+ .response-content blockquote {
1779
+ border-left: 3px solid var(--accent-cyan);
1780
+ padding-left: var(--sp-4);
1781
+ color: var(--text-secondary);
1782
+ margin: var(--sp-4) 0;
1783
+ }
1784
+
1785
+ .response-content table {
1786
+ width: 100%;
1787
+ border-collapse: collapse;
1788
+ margin: var(--sp-4) 0;
1789
+ font-size: var(--text-xs);
1790
+ }
1791
+
1792
+ .response-content th,
1793
+ .response-content td {
1794
+ padding: var(--sp-2) var(--sp-3);
1795
+ border: 1px solid var(--bg-border);
1796
+ text-align: left;
1797
+ }
1798
+
1799
+ .response-content th {
1800
+ background: var(--bg-elevated);
1801
+ color: var(--accent-cyan);
1802
+ font-weight: 700;
1803
+ }
1804
+
1805
+ /* ============================================================
1806
+ Example Prompt Buttons
1807
+ ============================================================ */
1808
+
1809
+ .example-prompt-btn {
1810
+ display: flex;
1811
+ align-items: flex-start;
1812
+ gap: var(--sp-3);
1813
+ padding: var(--sp-3) var(--sp-4);
1814
+ background: var(--bg-elevated);
1815
+ border: 1px solid var(--bg-border);
1816
+ border-radius: var(--radius-md);
1817
+ color: var(--text-secondary);
1818
+ font-size: var(--text-sm);
1819
+ text-align: left;
1820
+ cursor: pointer;
1821
+ transition: all var(--transition-fast);
1822
+ width: 100%;
1823
+ font-family: inherit;
1824
+ }
1825
+
1826
+ .example-prompt-btn:hover {
1827
+ border-color: var(--accent-cyan);
1828
+ color: var(--text-primary);
1829
+ background: rgba(0, 229, 255, 0.04);
1830
+ }
1831
+
1832
+ .example-prompt-icon {
1833
+ color: var(--accent-cyan);
1834
+ font-weight: 700;
1835
+ flex-shrink: 0;
1836
+ }
1837
+
1838
+ /* ============================================================
1839
+ Rationale Card
1840
+ ============================================================ */
1841
+
1842
+ .rationale-card {
1843
+ background: var(--bg-surface);
1844
+ border: 1px solid var(--bg-border);
1845
+ border-radius: var(--radius-lg);
1846
+ overflow: hidden;
1847
+ }
1848
+
1849
+ .rationale-card-title {
1850
+ display: flex;
1851
+ align-items: center;
1852
+ gap: var(--sp-2);
1853
+ padding: var(--sp-4) var(--sp-5);
1854
+ border-bottom: 1px solid var(--bg-border);
1855
+ font-size: var(--text-xs);
1856
+ font-weight: 700;
1857
+ font-family: 'JetBrains Mono', monospace;
1858
+ text-transform: uppercase;
1859
+ letter-spacing: 0.08em;
1860
+ color: var(--accent-cyan);
1861
+ }
1862
+
1863
+ .rationale-section {
1864
+ padding: var(--sp-4) var(--sp-5);
1865
+ border-bottom: 1px solid var(--bg-border);
1866
+ }
1867
+
1868
+ .rationale-section:last-child {
1869
+ border-bottom: none;
1870
+ }
1871
+
1872
+ .rationale-label {
1873
+ font-size: 10px;
1874
+ font-weight: 700;
1875
+ text-transform: uppercase;
1876
+ letter-spacing: 0.1em;
1877
+ color: var(--text-muted);
1878
+ margin-bottom: var(--sp-3);
1879
+ }
1880
+
1881
+ /* ============================================================
1882
+ Card component
1883
+ ============================================================ */
1884
+
1885
+ .card {
1886
+ background: var(--bg-surface);
1887
+ border: 1px solid var(--bg-border);
1888
+ border-radius: var(--radius-lg);
1889
+ padding: var(--sp-5);
1890
+ transition: border-color var(--transition-normal);
1891
+ flex-shrink: 0;
1892
+ }
1893
+
1894
+ .card:hover {
1895
+ border-color: rgba(0, 229, 255, 0.15);
1896
+ }
1897
+
1898
+ .card-header {
1899
+ font-family: 'JetBrains Mono', monospace;
1900
+ font-size: var(--text-xs);
1901
+ font-weight: 700;
1902
+ text-transform: uppercase;
1903
+ letter-spacing: 0.08em;
1904
+ color: var(--text-muted);
1905
+ margin-bottom: var(--sp-4);
1906
+ }
1907
+
1908
+ /* Grid layout helpers */
1909
+ .grid-2 {
1910
+ display: grid;
1911
+ grid-template-columns: repeat(auto-fill, minmax(340px, 1fr));
1912
+ gap: var(--sp-4);
1913
+ }
1914
+
1915
+ /* Model card */
1916
+ .model-card {
1917
+ background: var(--bg-surface);
1918
+ border: 1px solid var(--bg-border);
1919
+ border-radius: var(--radius-lg);
1920
+ padding: var(--sp-5);
1921
+ display: flex;
1922
+ flex-direction: column;
1923
+ gap: var(--sp-4);
1924
+ transition: border-color var(--transition-normal), box-shadow var(--transition-normal);
1925
+ }
1926
+
1927
+ .model-card:hover {
1928
+ border-color: rgba(0, 229, 255, 0.2);
1929
+ }
1930
+
1931
+ .model-card-name {
1932
+ font-family: 'JetBrains Mono', monospace;
1933
+ font-size: var(--text-sm);
1934
+ font-weight: 700;
1935
+ color: var(--text-primary);
1936
+ word-break: break-all;
1937
+ }
1938
+
1939
+ .model-card-pricing {
1940
+ display: flex;
1941
+ gap: var(--sp-4);
1942
+ padding: var(--sp-3) 0;
1943
+ border-top: 1px solid var(--bg-border);
1944
+ border-bottom: 1px solid var(--bg-border);
1945
+ }
1946
+
1947
+ .model-card-price-item {
1948
+ flex: 1;
1949
+ text-align: center;
1950
+ }
1951
+
1952
+ .model-card-price-label {
1953
+ font-size: 10px;
1954
+ text-transform: uppercase;
1955
+ letter-spacing: 0.08em;
1956
+ color: var(--text-muted);
1957
+ margin-bottom: 4px;
1958
+ }
1959
+
1960
+ .model-card-price-value {
1961
+ font-family: 'JetBrains Mono', monospace;
1962
+ font-size: var(--text-xs);
1963
+ font-weight: 700;
1964
+ color: var(--text-primary);
1965
+ }
1966
+
1967
+ /* Prevent browser autofill style overrides */
1968
+ input:-webkit-autofill,
1969
+ input:-webkit-autofill:hover,
1970
+ input:-webkit-autofill:focus,
1971
+ textarea:-webkit-autofill,
1972
+ textarea:-webkit-autofill:hover,
1973
+ textarea:-webkit-autofill:focus,
1974
+ select:-webkit-autofill,
1975
+ select:-webkit-autofill:hover,
1976
+ select:-webkit-autofill:focus {
1977
+ border: 1px solid var(--accent-cyan) !important;
1978
+ -webkit-text-fill-color: var(--text-primary) !important;
1979
+ -webkit-box-shadow: 0 0 0px 1000px var(--bg-elevated) inset !important;
1980
+ transition: background-color 5000s ease-in-out 0s;
1981
+ }
1982
+
frontend/src/types.ts ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Global types for LLMOpt frontend β€” aligned with backend schemas
2
+
3
+ export type BudgetMode = 'cheap' | 'balanced' | 'quality';
4
+
5
+ export type PipelineStageStatus = 'idle' | 'active' | 'complete' | 'skipped' | 'error';
6
+
7
+ export type ComplexityTier = 'trivial' | 'easy' | 'medium' | 'hard' | 'expert';
8
+
9
+ export interface PipelineStage {
10
+ id: string;
11
+ label: string;
12
+ icon: string;
13
+ status: PipelineStageStatus;
14
+ latencyMs?: number;
15
+ detail?: string;
16
+ }
17
+
18
+ export interface GenerateRequest {
19
+ query: string;
20
+ budget_mode: BudgetMode;
21
+ max_cost_per_request?: number;
22
+ quality_threshold?: number;
23
+ exclude_providers?: string[];
24
+ only_providers?: string[];
25
+ prefer_local?: boolean;
26
+ conversation_history?: { role: string; content: string }[];
27
+ temperature?: number;
28
+ api_keys?: Record<string, string>;
29
+ alpha?: number;
30
+ beta?: number;
31
+ gamma?: number;
32
+ compression_enabled?: boolean;
33
+ evaluate?: boolean;
34
+ }
35
+
36
+ export interface GenerateResponse {
37
+ response: string;
38
+ model_used: string;
39
+ provider: string;
40
+ input_tokens: number;
41
+ output_tokens: number;
42
+ total_tokens: number;
43
+ estimated_cost: number;
44
+ tokens_saved: number;
45
+ cost_saved: number;
46
+ compression_ratio: number;
47
+ complexity_score: number;
48
+ complexity_tier: ComplexityTier;
49
+ latency_ms: number;
50
+ }
51
+
52
+ // The /explain endpoint returns nested objects from core.py
53
+ export interface ExplainResponse {
54
+ query: string;
55
+ features: {
56
+ token_count: number;
57
+ sentence_count: number;
58
+ primary_domain: string;
59
+ estimated_output_length: string;
60
+ domain_code: boolean;
61
+ domain_math: boolean;
62
+ domain_science: boolean;
63
+ domain_reasoning: boolean;
64
+ domain_creative: boolean;
65
+ multi_step: boolean;
66
+ requires_comparison: boolean;
67
+ requires_generation: boolean;
68
+ requires_analysis: boolean;
69
+ requires_debate: boolean;
70
+ has_math_notation: boolean;
71
+ has_code_block: boolean;
72
+ [key: string]: unknown;
73
+ };
74
+ complexity: {
75
+ score: number;
76
+ tier: ComplexityTier;
77
+ required_reasoning: number;
78
+ required_coding: number;
79
+ required_math: number;
80
+ rationale: string[];
81
+ estimated_input_tokens: number;
82
+ estimated_output_tokens: number;
83
+ [key: string]: unknown;
84
+ };
85
+ optimization: {
86
+ selected_model: string;
87
+ provider: string;
88
+ fallback_model: string | null;
89
+ compression_enabled: boolean;
90
+ system_prompt_style: string;
91
+ estimated_input_tokens: number;
92
+ estimated_output_tokens: number;
93
+ estimated_cost: number;
94
+ rationale: string[];
95
+ budget_mode: string;
96
+ [key: string]: unknown;
97
+ };
98
+ optimized_prompt: {
99
+ tokens_before: number;
100
+ tokens_after: number;
101
+ tokens_saved: number;
102
+ compression_ratio: number;
103
+ [key: string]: unknown;
104
+ };
105
+ }
106
+
107
+ export interface HistoryItem {
108
+ id: number;
109
+ query: string;
110
+ response: string;
111
+ model_used: string;
112
+ provider: string;
113
+ input_tokens: number;
114
+ output_tokens: number;
115
+ total_tokens: number;
116
+ estimated_cost: number;
117
+ tokens_saved: number;
118
+ cost_saved: number;
119
+ latency_ms: number;
120
+ complexity_score: number;
121
+ complexity_tier: ComplexityTier;
122
+ time_ago: string;
123
+ }
124
+
125
+ export interface DashboardStats {
126
+ tokens_saved: string;
127
+ prompts_improved: number;
128
+ routing_savings: string;
129
+ avg_boost: string;
130
+ distribution: Record<string, number>;
131
+ recent_decisions: {
132
+ id: string;
133
+ time_ago: string;
134
+ model: string;
135
+ provider: string;
136
+ tier: string;
137
+ score: number;
138
+ reason: string;
139
+ }[];
140
+ recent_optimizations: {
141
+ name: string;
142
+ model_used: string;
143
+ time_ago: string;
144
+ score: string;
145
+ tokens_saved: string;
146
+ }[];
147
+ }
148
+
149
+ // ModelSpec from registry β€” uses model_name not id
150
+ export interface ModelSpec {
151
+ model_name: string;
152
+ provider: string;
153
+ input_cost_per_1k: number;
154
+ output_cost_per_1k: number;
155
+ context_window: number;
156
+ reasoning_score: number;
157
+ coding_score: number;
158
+ math_score: number;
159
+ instruction_following_score: number;
160
+ latency_score: number;
161
+ max_complexity: number;
162
+ capability_score: number;
163
+ notes: string;
164
+ }
165
+
166
+ export interface HealthStatus {
167
+ redis: 'ok' | 'error' | 'unknown';
168
+ ml_deps: 'ok' | 'error' | 'unknown';
169
+ api: 'ok' | 'error';
170
+ }
frontend/src/vite-env.d.ts ADDED
@@ -0,0 +1 @@
 
 
1
+ /// <reference types="vite/client" />
frontend/tsconfig.app.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "compilerOptions": {
3
+ "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
4
+ "target": "ES2020",
5
+ "useDefineForClassFields": true,
6
+ "lib": ["ES2020", "DOM", "DOM.Iterable"],
7
+ "module": "ESNext",
8
+ "skipLibCheck": true,
9
+
10
+ /* Bundler mode */
11
+ "moduleResolution": "Bundler",
12
+ "allowImportingTsExtensions": true,
13
+ "isolatedModules": true,
14
+ "moduleDetection": "force",
15
+ "noEmit": true,
16
+ "jsx": "react-jsx",
17
+
18
+ /* Linting */
19
+ "strict": true,
20
+ "noUnusedLocals": true,
21
+ "noUnusedParameters": true,
22
+ "noFallthroughCasesInSwitch": true,
23
+ "noUncheckedSideEffectImports": true
24
+ },
25
+ "include": ["src"]
26
+ }
frontend/tsconfig.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "files": [],
3
+ "references": [
4
+ { "path": "./tsconfig.app.json" },
5
+ { "path": "./tsconfig.node.json" }
6
+ ]
7
+ }
frontend/tsconfig.node.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "compilerOptions": {
3
+ "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo",
4
+ "target": "ES2022",
5
+ "lib": ["ES2023"],
6
+ "module": "ESNext",
7
+ "skipLibCheck": true,
8
+
9
+ /* Bundler mode */
10
+ "moduleResolution": "Bundler",
11
+ "allowImportingTsExtensions": true,
12
+ "isolatedModules": true,
13
+ "moduleDetection": "force",
14
+ "noEmit": true,
15
+
16
+ /* Linting */
17
+ "strict": true,
18
+ "noUnusedLocals": true,
19
+ "noUnusedParameters": true,
20
+ "noFallthroughCasesInSwitch": true,
21
+ "noUncheckedSideEffectImports": true
22
+ },
23
+ "include": ["vite.config.ts"]
24
+ }
frontend/vite.config.ts ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { defineConfig } from 'vite'
2
+ import react from '@vitejs/plugin-react'
3
+ import path from 'path'
4
+
5
+ // https://vite.dev/config/
6
+ export default defineConfig({
7
+ plugins: [react()],
8
+ base: '/ui/',
9
+ build: {
10
+ outDir: '../static',
11
+ emptyOutDir: true,
12
+ chunkSizeWarningLimit: 1600,
13
+ rollupOptions: {
14
+ output: {
15
+ manualChunks: {
16
+ vendor: ['react', 'react-dom', 'react-router-dom'],
17
+ charts: ['recharts'],
18
+ motion: ['framer-motion'],
19
+ markdown: ['react-markdown', 'react-syntax-highlighter'],
20
+ },
21
+ },
22
+ },
23
+ },
24
+ resolve: {
25
+ alias: {
26
+ '@': path.resolve(__dirname, './src'),
27
+ },
28
+ },
29
+ server: {
30
+ proxy: {
31
+ '/generate': 'http://localhost:8000',
32
+ '/explain': 'http://localhost:8000',
33
+ '/models': 'http://localhost:8000',
34
+ '/health': 'http://localhost:8000',
35
+ '/auth': 'http://localhost:8000',
36
+ '/stream': 'http://localhost:8000',
37
+ },
38
+ },
39
+ })
40
+
llmopt/analyzer/query_analyzer.py CHANGED
@@ -161,18 +161,21 @@ class QueryAnalyzer:
161
  "code", "math", "science", "creative",
162
  "reasoning", "summarization", "translation", "factual"
163
  ]
164
- try:
165
- from transformers import pipeline # type: ignore
166
- logger.info("Loading ML Zero-Shot Classifier for Query Analyzer...")
167
- self.ml_classifier = pipeline(
168
- "zero-shot-classification",
169
- model="cross-encoder/nli-distilroberta-base",
170
- device=-1
171
- )
172
- except ImportError:
173
- logger.info("transformers not found, using V1 heuristic Query Analyzer.")
174
- except Exception as e:
175
- logger.warning(f"Failed to load ML classifier: {e}. Falling back to V1.")
 
 
 
176
 
177
  def analyze(self, query: str) -> QueryFeatures:
178
  q = query.strip()
 
161
  "code", "math", "science", "creative",
162
  "reasoning", "summarization", "translation", "factual"
163
  ]
164
+ import os
165
+ if os.getenv("USE_ML_ANALYZER", "false").lower() == "true":
166
+ try:
167
+ from transformers import pipeline # type: ignore
168
+ logger.info("Loading ML Zero-Shot Classifier for Query Analyzer...")
169
+ self.ml_classifier = pipeline(
170
+ "zero-shot-classification",
171
+ model="cross-encoder/nli-distilroberta-base",
172
+ device=-1,
173
+ local_files_only=True
174
+ )
175
+ except ImportError:
176
+ logger.info("transformers not found, using V1 heuristic Query Analyzer.")
177
+ except Exception as e:
178
+ logger.warning(f"Failed to load ML classifier: {e}. Falling back to V1.")
179
 
180
  def analyze(self, query: str) -> QueryFeatures:
181
  q = query.strip()
llmopt/api/app.py CHANGED
@@ -11,15 +11,31 @@ Endpoints:
11
  from __future__ import annotations
12
 
13
  import os
 
 
 
14
  import logging
15
  from typing import Optional, Dict
16
 
17
- from fastapi import FastAPI, HTTPException
18
- from fastapi.responses import StreamingResponse
 
 
19
  from pydantic import BaseModel, Field
20
 
21
  from llmopt.core import LLMOpt
22
-
 
 
 
 
 
 
 
 
 
 
 
23
  logger = logging.getLogger(__name__)
24
 
25
  # ---------------------------------------------------------------------------
@@ -32,9 +48,39 @@ app = FastAPI(
32
  version="0.1.0",
33
  )
34
 
35
- # Single shared client (stateless β€” safe for concurrent use)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  _client = LLMOpt(log_level=os.getenv("LOG_LEVEL", "WARNING"))
37
 
 
 
 
 
 
 
 
 
 
38
 
39
  # ---------------------------------------------------------------------------
40
  # Request / Response schemas
@@ -57,6 +103,11 @@ class GenerateRequest(BaseModel):
57
  None,
58
  description="Optional provider API keys (e.g. {'openai': 'sk-...', 'anthropic': '...' })"
59
  )
 
 
 
 
 
60
 
61
 
62
  class GenerateResponse(BaseModel):
@@ -78,20 +129,469 @@ class GenerateResponse(BaseModel):
78
  class ExplainRequest(BaseModel):
79
  query: str = Field(..., min_length=1, max_length=32000)
80
  budget_mode: str = Field("balanced")
 
 
 
 
 
 
 
 
81
 
 
 
82
 
83
  # ---------------------------------------------------------------------------
84
  # Endpoints
85
  # ---------------------------------------------------------------------------
86
 
87
- @app.get("/")
88
- def root():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  return {
90
- "message": "LLMOpt V2 API is running!",
91
- "docs": "/docs",
92
- "health": "/health"
 
 
 
 
 
 
93
  }
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  @app.get("/health")
97
  def health():
@@ -105,11 +605,24 @@ def list_models():
105
 
106
 
107
  @app.post("/generate", response_model=GenerateResponse)
108
- def generate(req: GenerateRequest):
109
  """
110
  Full pipeline: analyze β†’ optimize β†’ route β†’ return response + metrics.
111
  """
 
 
 
 
 
 
 
 
 
 
112
  try:
 
 
 
113
  result = _client.generate(
114
  query=req.query,
115
  budget_mode=req.budget_mode,
@@ -122,31 +635,85 @@ def generate(req: GenerateRequest):
122
  temperature=req.temperature,
123
  dry_run=req.dry_run,
124
  api_keys=req.api_keys, # Pass BYOK keys
 
 
 
 
 
125
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  return GenerateResponse(**result.to_dict())
127
  except KeyError as e:
128
  raise HTTPException(status_code=400, detail=f"Model not found: {e}")
129
  except Exception as e:
130
  logger.exception("generate() failed")
 
 
 
 
 
131
  raise HTTPException(status_code=500, detail=str(e))
132
 
133
 
134
  @app.post("/explain")
135
- def explain(req: ExplainRequest):
136
  """
137
  Returns the full routing decision for a query WITHOUT making an LLM API call.
138
  Useful for debugging, testing, and understanding optimization decisions.
139
  """
 
 
140
  try:
141
- return _client.explain(query=req.query, budget_mode=req.budget_mode)
 
 
 
 
 
 
 
 
 
 
142
  except Exception as e:
143
  logger.exception("explain() failed")
144
  raise HTTPException(status_code=500, detail=str(e))
145
 
146
 
 
147
  @app.post("/stream")
148
- def stream_generate(req: GenerateRequest):
149
  """Server-sent stream of response tokens."""
 
 
 
 
 
 
 
150
  def token_generator():
151
  try:
152
  for chunk in _client.stream(
 
11
  from __future__ import annotations
12
 
13
  import os
14
+ from dotenv import load_dotenv
15
+ load_dotenv(os.path.join(os.path.dirname(__file__), "..", "..", "config", ".env"))
16
+
17
  import logging
18
  from typing import Optional, Dict
19
 
20
+ from fastapi import FastAPI, HTTPException, Depends, Request, Response, status
21
+ from fastapi.responses import StreamingResponse, RedirectResponse
22
+ from fastapi.staticfiles import StaticFiles
23
+ from fastapi.middleware.cors import CORSMiddleware
24
  from pydantic import BaseModel, Field
25
 
26
  from llmopt.core import LLMOpt
27
+ from llmopt.cache.redis_client import redis_manager
28
+ from llmopt.api.security import (
29
+ create_session,
30
+ delete_session,
31
+ get_session_payload,
32
+ check_rate_limit,
33
+ get_session_id_from_request
34
+ )
35
+ from sqlalchemy.orm import Session
36
+ from llmopt.db.session import engine, get_db
37
+ from llmopt.db import models
38
+ from llmopt.api import crud
39
  logger = logging.getLogger(__name__)
40
 
41
  # ---------------------------------------------------------------------------
 
48
  version="0.1.0",
49
  )
50
 
51
+ # Detect dev mode β€” disable secure cookies on localhost
52
+ IS_DEV = os.getenv("ENVIRONMENT", "development").lower() in ("development", "dev", "local")
53
+ COOKIE_SECURE = not IS_DEV # True only in production (HTTPS)
54
+ COOKIE_SAMESITE = "lax" if IS_DEV else "none" # lax works on HTTP localhost
55
+
56
+ # Configure CORS β€” allow localhost in dev, full regex in prod
57
+ if IS_DEV:
58
+ app.add_middleware(
59
+ CORSMiddleware,
60
+ allow_origins=["http://localhost:5173", "http://localhost:8000", "http://127.0.0.1:8000"],
61
+ allow_credentials=True,
62
+ allow_methods=["*"],
63
+ allow_headers=["*"],
64
+ )
65
+ else:
66
+ app.add_middleware(
67
+ CORSMiddleware,
68
+ allow_origin_regex=r"https?://.*",
69
+ allow_credentials=True,
70
+ allow_methods=["*"],
71
+ allow_headers=["*"],
72
+ )
73
  _client = LLMOpt(log_level=os.getenv("LOG_LEVEL", "WARNING"))
74
 
75
+ @app.on_event("startup")
76
+ async def startup_event():
77
+ models.Base.metadata.create_all(bind=engine)
78
+ await redis_manager.connect()
79
+
80
+ @app.on_event("shutdown")
81
+ async def shutdown_event():
82
+ await redis_manager.close()
83
+
84
 
85
  # ---------------------------------------------------------------------------
86
  # Request / Response schemas
 
103
  None,
104
  description="Optional provider API keys (e.g. {'openai': 'sk-...', 'anthropic': '...' })"
105
  )
106
+ alpha: Optional[float] = Field(None, description="Custom cost weight")
107
+ beta: Optional[float] = Field(None, description="Custom token weight")
108
+ gamma: Optional[float] = Field(None, description="Custom quality weight")
109
+ compression_enabled: Optional[bool] = Field(None, description="Force enable/disable prompt compression")
110
+ evaluate: bool = Field(False, description="Enable LLM-as-judge evaluation")
111
 
112
 
113
  class GenerateResponse(BaseModel):
 
129
  class ExplainRequest(BaseModel):
130
  query: str = Field(..., min_length=1, max_length=32000)
131
  budget_mode: str = Field("balanced")
132
+ alpha: Optional[float] = Field(None)
133
+ beta: Optional[float] = Field(None)
134
+ gamma: Optional[float] = Field(None)
135
+ compression_enabled: Optional[bool] = Field(None)
136
+ exclude_providers: list[str] = Field(default_factory=list)
137
+ only_providers: list[str] = Field(default_factory=list)
138
+
139
+
140
 
141
+ class AuthRequest(BaseModel):
142
+ api_keys: Dict[str, str] = Field(..., description="Provider API keys")
143
 
144
  # ---------------------------------------------------------------------------
145
  # Endpoints
146
  # ---------------------------------------------------------------------------
147
 
148
+ @app.post("/auth/register")
149
+ async def register(user: crud.UserCreate, db: Session = Depends(get_db)):
150
+ db_user = crud.get_user_by_email(db, email=user.email)
151
+ if db_user:
152
+ raise HTTPException(status_code=400, detail="Email already registered")
153
+ crud.create_user(db=db, user=user)
154
+ return {"message": "User created successfully"}
155
+
156
+ @app.post("/auth/login")
157
+ async def login_user(user: crud.UserLogin, response: Response, db: Session = Depends(get_db)):
158
+ db_user = crud.get_user_by_email(db, email=user.email)
159
+ if not db_user or not crud.verify_password(user.password, db_user.hashed_password):
160
+ raise HTTPException(status_code=400, detail="Incorrect email or password")
161
+
162
+ api_keys = {}
163
+ from llmopt.api.security import decrypt_string
164
+ user_keys_encrypted = crud.get_user_api_keys(db, db_user.id)
165
+ if user_keys_encrypted:
166
+ try:
167
+ api_keys = {p: decrypt_string(k) for p, k in user_keys_encrypted.items()}
168
+ except Exception:
169
+ pass
170
+
171
+ session_id = await create_session(api_keys, user_id=db_user.id)
172
+ response.set_cookie(
173
+ key="session_id",
174
+ value=session_id,
175
+ httponly=True,
176
+ secure=COOKIE_SECURE,
177
+ samesite=COOKIE_SAMESITE,
178
+ max_age=int(os.getenv("SESSION_TTL", 7200))
179
+ )
180
+ return {"message": "Logged in successfully", "session_id": session_id}
181
+
182
+ @app.post("/auth/keys")
183
+ async def update_keys(req: AuthRequest, session_id: str = Depends(get_session_id_from_request), db: Session = Depends(get_db)):
184
+ """
185
+ Securely store API keys in Redis and the persistent database.
186
+ """
187
+ if not req.api_keys:
188
+ raise HTTPException(status_code=400, detail="No API keys provided.")
189
+
190
+ payload = await get_session_payload(session_id)
191
+ user_id = payload.get("user_id")
192
+
193
+ # Merge keys with existing ones in Redis session
194
+ current_keys = payload.get("api_keys", {})
195
+ updated_keys = {**current_keys, **req.api_keys}
196
+
197
+ # Update redis session in-place
198
+ payload["api_keys"] = updated_keys
199
+ from llmopt.api.security import update_session_payload
200
+ await update_session_payload(session_id, payload)
201
+
202
+ # Save to db if authenticated
203
+ if user_id:
204
+ from llmopt.api.security import encrypt_string
205
+ encrypted_keys = {p: encrypt_string(k) for p, k in req.api_keys.items()}
206
+ crud.update_user_api_keys(db, user_id, encrypted_keys)
207
+
208
+ return {"message": "Keys updated securely"}
209
+
210
+ @app.delete("/auth/keys/{provider}")
211
+ async def delete_key(provider: str, session_id: str = Depends(get_session_id_from_request), db: Session = Depends(get_db)):
212
+ """
213
+ Delete an API key for a specific provider.
214
+ """
215
+ provider = provider.lower()
216
+ payload = await get_session_payload(session_id)
217
+ user_id = payload.get("user_id")
218
+
219
+ current_keys = payload.get("api_keys", {})
220
+ if provider in current_keys:
221
+ del current_keys[provider]
222
+
223
+ payload["api_keys"] = current_keys
224
+ from llmopt.api.security import update_session_payload
225
+ await update_session_payload(session_id, payload)
226
+
227
+ if user_id:
228
+ crud.delete_user_api_key(db, user_id, provider)
229
+
230
+ return {"message": f"Key for {provider} deleted successfully"}
231
+
232
+
233
+ @app.get("/auth/keys")
234
+ async def get_keys(session_id: str = Depends(get_session_id_from_request)):
235
+ """
236
+ Get the list of providers that have API keys configured in the current session.
237
+ """
238
+ payload = await get_session_payload(session_id)
239
+ api_keys = payload.get("api_keys", {})
240
+ connected = [provider for provider, key in api_keys.items() if key]
241
+ return {"connected_providers": connected}
242
+
243
+ # OAuth configuration
244
+ GOOGLE_CLIENT_ID = os.getenv("GOOGLE_CLIENT_ID")
245
+ GOOGLE_CLIENT_SECRET = os.getenv("GOOGLE_CLIENT_SECRET")
246
+ GITHUB_CLIENT_ID = os.getenv("GITHUB_CLIENT_ID")
247
+ GITHUB_CLIENT_SECRET = os.getenv("GITHUB_CLIENT_SECRET")
248
+ REDIRECT_URI_HOST = os.getenv("REDIRECT_URI_HOST", "http://localhost:8000")
249
+
250
+ @app.get("/auth/login/google")
251
+ def login_google():
252
+ if not GOOGLE_CLIENT_ID:
253
+ raise HTTPException(status_code=400, detail="Google Auth is not configured. Please set GOOGLE_CLIENT_ID env variable.")
254
+ redirect_uri = f"{REDIRECT_URI_HOST}/auth/callback/google"
255
+ auth_url = (
256
+ "https://accounts.google.com/o/oauth2/v2/auth"
257
+ f"?response_type=code"
258
+ f"&client_id={GOOGLE_CLIENT_ID}"
259
+ f"&redirect_uri={redirect_uri}"
260
+ f"&scope=openid%20email%20profile"
261
+ f"&state=google_auth_state"
262
+ )
263
+ return RedirectResponse(url=auth_url)
264
+
265
+ @app.get("/auth/callback/google")
266
+ async def callback_google(code: str, response: Response, db: Session = Depends(get_db)):
267
+ if not GOOGLE_CLIENT_ID or not GOOGLE_CLIENT_SECRET:
268
+ raise HTTPException(status_code=400, detail="Google Auth credentials missing.")
269
+
270
+ redirect_uri = f"{REDIRECT_URI_HOST}/auth/callback/google"
271
+ token_url = "https://oauth2.googleapis.com/token"
272
+ data = {
273
+ "code": code,
274
+ "client_id": GOOGLE_CLIENT_ID,
275
+ "client_secret": GOOGLE_CLIENT_SECRET,
276
+ "redirect_uri": redirect_uri,
277
+ "grant_type": "authorization_code",
278
+ }
279
+
280
+ import urllib.request
281
+ import urllib.parse
282
+ import json
283
+
284
+ try:
285
+ req_data = urllib.parse.urlencode(data).encode("utf-8")
286
+ req = urllib.request.Request(token_url, data=req_data, method="POST")
287
+ with urllib.request.urlopen(req) as r:
288
+ token_res = json.loads(r.read().decode("utf-8"))
289
+
290
+ access_token = token_res.get("access_token")
291
+ if not access_token:
292
+ raise HTTPException(status_code=400, detail="Failed to retrieve access token from Google.")
293
+
294
+ userinfo_url = "https://www.googleapis.com/oauth2/v3/userinfo"
295
+ req_user = urllib.request.Request(
296
+ userinfo_url,
297
+ headers={"Authorization": f"Bearer {access_token}"}
298
+ )
299
+ with urllib.request.urlopen(req_user) as r_user:
300
+ user_info = json.loads(r_user.read().decode("utf-8"))
301
+
302
+ email = user_info.get("email")
303
+ if not email:
304
+ raise HTTPException(status_code=400, detail="Google account has no email associated.")
305
+
306
+ db_user = crud.get_user_by_email(db, email=email)
307
+ if not db_user:
308
+ import secrets
309
+ random_pw = secrets.token_hex(16)
310
+ user_in = crud.UserCreate(email=email, password=random_pw)
311
+ db_user = crud.create_user(db, user_in)
312
+
313
+ api_keys = {}
314
+ from llmopt.api.security import decrypt_string
315
+ user_keys_encrypted = crud.get_user_api_keys(db, db_user.id)
316
+ if user_keys_encrypted:
317
+ try:
318
+ api_keys = {p: decrypt_string(k) for p, k in user_keys_encrypted.items()}
319
+ except Exception:
320
+ pass
321
+
322
+ session_id = await create_session(api_keys, user_id=db_user.id)
323
+ redirect_url = f"{REDIRECT_URI_HOST}/ui/workspace.html#api"
324
+ res = RedirectResponse(url=redirect_url)
325
+ res.set_cookie(
326
+ key="session_id",
327
+ value=session_id,
328
+ httponly=True,
329
+ secure=COOKIE_SECURE,
330
+ samesite=COOKIE_SAMESITE,
331
+ max_age=int(os.getenv("SESSION_TTL", 7200))
332
+ )
333
+ return res
334
+
335
+ except Exception as e:
336
+ logger.error(f"Google OAuth failed: {e}")
337
+ raise HTTPException(status_code=500, detail=f"Google OAuth failed: {str(e)}")
338
+
339
+ @app.get("/auth/login/github")
340
+ def login_github():
341
+ if not GITHUB_CLIENT_ID:
342
+ raise HTTPException(status_code=400, detail="GitHub Auth is not configured. Please set GITHUB_CLIENT_ID env variable.")
343
+ redirect_uri = f"{REDIRECT_URI_HOST}/auth/callback/github"
344
+ auth_url = (
345
+ "https://github.com/login/oauth/authorize"
346
+ f"?client_id={GITHUB_CLIENT_ID}"
347
+ f"&redirect_uri={redirect_uri}"
348
+ f"&scope=user:email"
349
+ f"&state=github_auth_state"
350
+ )
351
+ return RedirectResponse(url=auth_url)
352
+
353
+ @app.get("/auth/callback/github")
354
+ async def callback_github(code: str, response: Response, db: Session = Depends(get_db)):
355
+ if not GITHUB_CLIENT_ID or not GITHUB_CLIENT_SECRET:
356
+ raise HTTPException(status_code=400, detail="GitHub Auth credentials missing.")
357
+
358
+ redirect_uri = f"{REDIRECT_URI_HOST}/auth/callback/github"
359
+ token_url = "https://github.com/login/oauth/access_token"
360
+ data = {
361
+ "code": code,
362
+ "client_id": GITHUB_CLIENT_ID,
363
+ "client_secret": GITHUB_CLIENT_SECRET,
364
+ "redirect_uri": redirect_uri,
365
+ }
366
+
367
+ import urllib.request
368
+ import urllib.parse
369
+ import json
370
+
371
+ try:
372
+ req_data = urllib.parse.urlencode(data).encode("utf-8")
373
+ req = urllib.request.Request(
374
+ token_url,
375
+ data=req_data,
376
+ headers={"Accept": "application/json"},
377
+ method="POST"
378
+ )
379
+ with urllib.request.urlopen(req) as r:
380
+ token_res = json.loads(r.read().decode("utf-8"))
381
+
382
+ access_token = token_res.get("access_token")
383
+ if not access_token:
384
+ raise HTTPException(status_code=400, detail="Failed to retrieve access token from GitHub.")
385
+
386
+ email_url = "https://api.github.com/user/emails"
387
+ req_email = urllib.request.Request(
388
+ email_url,
389
+ headers={
390
+ "Authorization": f"token {access_token}",
391
+ "User-Agent": "LLMOpt-Server"
392
+ }
393
+ )
394
+ with urllib.request.urlopen(req_email) as r_email:
395
+ emails = json.loads(r_email.read().decode("utf-8"))
396
+
397
+ email = None
398
+ for email_info in emails:
399
+ if email_info.get("primary"):
400
+ email = email_info.get("email")
401
+ break
402
+ if not email and emails:
403
+ email = emails[0].get("email")
404
+
405
+ if not email:
406
+ raise HTTPException(status_code=400, detail="GitHub account has no email associated.")
407
+
408
+ db_user = crud.get_user_by_email(db, email=email)
409
+ if not db_user:
410
+ import secrets
411
+ random_pw = secrets.token_hex(16)
412
+ user_in = crud.UserCreate(email=email, password=random_pw)
413
+ db_user = crud.create_user(db, user_in)
414
+
415
+ api_keys = {}
416
+ from llmopt.api.security import decrypt_string
417
+ user_keys_encrypted = crud.get_user_api_keys(db, db_user.id)
418
+ if user_keys_encrypted:
419
+ try:
420
+ api_keys = {p: decrypt_string(k) for p, k in user_keys_encrypted.items()}
421
+ except Exception:
422
+ pass
423
+
424
+ session_id = await create_session(api_keys, user_id=db_user.id)
425
+ redirect_url = f"{REDIRECT_URI_HOST}/ui/workspace.html#api"
426
+ res = RedirectResponse(url=redirect_url)
427
+ res.set_cookie(
428
+ key="session_id",
429
+ value=session_id,
430
+ httponly=True,
431
+ secure=COOKIE_SECURE,
432
+ samesite=COOKIE_SAMESITE,
433
+ max_age=int(os.getenv("SESSION_TTL", 7200))
434
+ )
435
+ return res
436
+
437
+ except Exception as e:
438
+ logger.error(f"GitHub OAuth failed: {e}")
439
+ raise HTTPException(status_code=500, detail=f"GitHub OAuth failed: {str(e)}")
440
+
441
+ @app.get("/auth/dashboard-stats")
442
+ async def get_dashboard_stats(session_id: str = Depends(get_session_id_from_request), db: Session = Depends(get_db)):
443
+ """Calculate dashboard statistics dynamically from the generation logs in the database."""
444
+ session_payload = await get_session_payload(session_id)
445
+ user_id = session_payload.get("user_id")
446
+
447
+ logs = db.query(models.GenerationLog).filter(models.GenerationLog.user_id == user_id).order_by(models.GenerationLog.created_at.desc()).all()
448
+
449
+ prompts_improved = len(logs)
450
+
451
+ total_tokens_saved = sum(log.tokens_saved for log in logs if log.tokens_saved)
452
+ total_cost_saved = sum(log.cost_saved for log in logs if log.cost_saved)
453
+
454
+ distribution = {}
455
+ total_with_provider = 0
456
+ for log in logs:
457
+ if log.provider:
458
+ provider = log.provider.lower()
459
+ distribution[provider] = distribution.get(provider, 0) + 1
460
+ total_with_provider += 1
461
+
462
+ distribution_percentages = {}
463
+ if total_with_provider > 0:
464
+ for provider, count in distribution.items():
465
+ distribution_percentages[provider] = round((count / total_with_provider) * 100, 1)
466
+
467
+ recent_decisions = []
468
+ from datetime import datetime
469
+ for log in logs[:5]:
470
+ time_diff = datetime.utcnow() - log.created_at
471
+ if time_diff.days > 0:
472
+ time_str = f"{time_diff.days}d ago"
473
+ elif time_diff.seconds // 3600 > 0:
474
+ time_str = f"{time_diff.seconds // 3600}h ago"
475
+ else:
476
+ time_str = f"{(time_diff.seconds % 3600) // 60}m ago"
477
+ if time_str == "0m ago":
478
+ time_str = "just now"
479
+
480
+ recent_decisions.append({
481
+ "id": f"PROMPT_{log.id}",
482
+ "time_ago": time_str,
483
+ "model": log.model_used,
484
+ "provider": log.provider,
485
+ "tier": log.complexity_tier or "standard",
486
+ "score": round((log.complexity_score or 0.72) * 100, 1),
487
+ "reason": f"Routed based on {log.complexity_tier or 'standard'} tier (complexity score: {round((log.complexity_score or 0.72)*100)}/100)."
488
+ })
489
+
490
+ recent_optimizations = []
491
+ for log in logs[:3]:
492
+ time_diff = datetime.utcnow() - log.created_at
493
+ if time_diff.days > 0:
494
+ time_str = f"{time_diff.days}d ago"
495
+ elif time_diff.seconds // 3600 > 0:
496
+ time_str = f"{time_diff.seconds // 3600}h ago"
497
+ else:
498
+ time_str = f"{(time_diff.seconds % 3600) // 60}m ago"
499
+ if time_str == "0m ago":
500
+ time_str = "just now"
501
+
502
+ recent_optimizations.append({
503
+ "name": log.query[:40] + ("..." if len(log.query) > 40 else ""),
504
+ "model_used": log.model_used,
505
+ "time_ago": time_str,
506
+ "score": f"{round((log.complexity_score or 0.72) * 100, 1)}%",
507
+ "tokens_saved": f"-{log.tokens_saved or 0} tokens/avg"
508
+ })
509
+
510
+ avg_complexity = 0.0
511
+ valid_scores = [log.complexity_score for log in logs if log.complexity_score is not None]
512
+ if valid_scores:
513
+ avg_complexity = sum(valid_scores) / len(valid_scores)
514
+ avg_boost = f"+{round(avg_complexity * 30, 1)}%" if avg_complexity > 0 else "0%"
515
+
516
  return {
517
+ "tokens_saved": f"{total_tokens_saved:,}" if total_tokens_saved > 0 else "0",
518
+ "prompts_improved": prompts_improved,
519
+ "routing_savings": f"${total_cost_saved:,.2f}" if total_cost_saved > 0 else "$0.00",
520
+ "avg_boost": avg_boost,
521
+ "distribution": distribution_percentages,
522
+ "recent_decisions": recent_decisions,
523
+ "recent_optimizations": recent_optimizations,
524
+ "running_workflows": 0,
525
+ "queued_workflows": 0
526
  }
527
 
528
+ @app.get("/auth/history")
529
+ async def get_history(session_id: str = Depends(get_session_id_from_request), db: Session = Depends(get_db)):
530
+ """Fetch the list of recent generation logs for the authenticated user."""
531
+ session_payload = await get_session_payload(session_id)
532
+ user_id = session_payload.get("user_id")
533
+ if not user_id:
534
+ raise HTTPException(status_code=401, detail="Not authenticated")
535
+
536
+ logs = db.query(models.GenerationLog).filter(
537
+ models.GenerationLog.user_id == user_id
538
+ ).order_by(models.GenerationLog.created_at.desc()).limit(20).all()
539
+
540
+ # Calculate time-ago strings for frontend
541
+ from datetime import datetime
542
+ history_items = []
543
+ for log in logs:
544
+ time_diff = datetime.utcnow() - log.created_at
545
+ if time_diff.days > 0:
546
+ time_str = f"{time_diff.days}d ago"
547
+ elif time_diff.seconds // 3600 > 0:
548
+ time_str = f"{time_diff.seconds // 3600}h ago"
549
+ else:
550
+ time_str = f"{(time_diff.seconds % 3600) // 60}m ago"
551
+ if time_str == "0m ago":
552
+ time_str = "just now"
553
+
554
+ history_items.append({
555
+ "id": log.id,
556
+ "query": log.query,
557
+ "response": log.response,
558
+ "model_used": log.model_used,
559
+ "provider": log.provider,
560
+ "input_tokens": log.input_tokens,
561
+ "output_tokens": log.output_tokens,
562
+ "total_tokens": log.total_tokens,
563
+ "estimated_cost": log.estimated_cost,
564
+ "tokens_saved": log.tokens_saved,
565
+ "cost_saved": log.cost_saved,
566
+ "latency_ms": log.latency_ms,
567
+ "complexity_score": log.complexity_score,
568
+ "complexity_tier": log.complexity_tier,
569
+ "time_ago": time_str
570
+ })
571
+ return history_items
572
+
573
+ @app.post("/auth/logout")
574
+ async def logout(response: Response, session_id: str = Depends(get_session_id_from_request)):
575
+ """Clear the session from Redis and remove the cookie."""
576
+ await delete_session(session_id)
577
+ response.delete_cookie("session_id", samesite=COOKIE_SAMESITE, secure=COOKIE_SECURE)
578
+ return {"message": "Logged out"}
579
+
580
+
581
+ @app.get("/")
582
+ def root():
583
+ if IS_DEV:
584
+ return RedirectResponse(url="http://localhost:5173/ui/")
585
+ return RedirectResponse(url="/ui/")
586
+
587
+ if not IS_DEV:
588
+ app.mount("/ui", StaticFiles(directory="static", html=True), name="static")
589
+ else:
590
+ @app.get("/ui")
591
+ @app.get("/ui/{path:path}")
592
+ def redirect_to_vite(path: str = ""):
593
+ return RedirectResponse(url=f"http://localhost:5173/ui/{path}")
594
+
595
 
596
  @app.get("/health")
597
  def health():
 
605
 
606
 
607
  @app.post("/generate", response_model=GenerateResponse)
608
+ async def generate(req: GenerateRequest, session_id: str = Depends(get_session_id_from_request), db: Session = Depends(get_db)):
609
  """
610
  Full pipeline: analyze β†’ optimize β†’ route β†’ return response + metrics.
611
  """
612
+ await check_rate_limit(session_id)
613
+
614
+ # Override req.api_keys with the ones securely stored in the session
615
+ session_payload = await get_session_payload(session_id)
616
+ session_keys = session_payload.get("api_keys", {})
617
+ user_id = session_payload.get("user_id")
618
+ if not req.api_keys:
619
+ req.api_keys = {}
620
+ req.api_keys.update(session_keys)
621
+
622
  try:
623
+ # LLMOpt core relies on synchronous execution right now (litellm async is separate)
624
+ # Assuming _client.generate is synchronous, we run it normally
625
+ # In a high-concurrency async app, we might want run_in_threadpool
626
  result = _client.generate(
627
  query=req.query,
628
  budget_mode=req.budget_mode,
 
635
  temperature=req.temperature,
636
  dry_run=req.dry_run,
637
  api_keys=req.api_keys, # Pass BYOK keys
638
+ alpha=req.alpha,
639
+ beta=req.beta,
640
+ gamma=req.gamma,
641
+ compression_enabled=req.compression_enabled,
642
+ evaluate=req.evaluate,
643
  )
644
+
645
+ # Save generation log to database
646
+ try:
647
+ log_entry = models.GenerationLog(
648
+ user_id=user_id,
649
+ query=req.query,
650
+ response=result.response,
651
+ model_used=result.model_used,
652
+ provider=result.provider,
653
+ input_tokens=result.input_tokens,
654
+ output_tokens=result.output_tokens,
655
+ total_tokens=result.total_tokens,
656
+ estimated_cost=result.estimated_cost,
657
+ tokens_saved=result.tokens_saved,
658
+ cost_saved=result.cost_saved,
659
+ latency_ms=result.latency_ms,
660
+ complexity_score=result.complexity.score,
661
+ complexity_tier=result.complexity.tier
662
+ )
663
+ db.add(log_entry)
664
+ db.commit()
665
+ except Exception as log_err:
666
+ logger.error(f"Failed to save generation log: {log_err}")
667
+
668
  return GenerateResponse(**result.to_dict())
669
  except KeyError as e:
670
  raise HTTPException(status_code=400, detail=f"Model not found: {e}")
671
  except Exception as e:
672
  logger.exception("generate() failed")
673
+ error_msg = str(e).lower()
674
+ if "authentication" in error_msg or "unauthorized" in error_msg or "invalid api key" in error_msg or "401" in error_msg:
675
+ raise HTTPException(status_code=401, detail="API is expired or token limit ended")
676
+ elif "rate limit" in error_msg or "429" in error_msg:
677
+ raise HTTPException(status_code=429, detail="API is expired or token limit ended")
678
  raise HTTPException(status_code=500, detail=str(e))
679
 
680
 
681
  @app.post("/explain")
682
+ async def explain(req: ExplainRequest, session_id: str = Depends(get_session_id_from_request)):
683
  """
684
  Returns the full routing decision for a query WITHOUT making an LLM API call.
685
  Useful for debugging, testing, and understanding optimization decisions.
686
  """
687
+ session_payload = await get_session_payload(session_id)
688
+ session_keys = session_payload.get("api_keys", {})
689
  try:
690
+ return _client.explain(
691
+ query=req.query,
692
+ budget_mode=req.budget_mode,
693
+ alpha=req.alpha,
694
+ beta=req.beta,
695
+ gamma=req.gamma,
696
+ compression_enabled=req.compression_enabled,
697
+ exclude_providers=req.exclude_providers,
698
+ only_providers=req.only_providers,
699
+ api_keys=session_keys,
700
+ )
701
  except Exception as e:
702
  logger.exception("explain() failed")
703
  raise HTTPException(status_code=500, detail=str(e))
704
 
705
 
706
+
707
  @app.post("/stream")
708
+ async def stream_generate(req: GenerateRequest, session_id: str = Depends(get_session_id_from_request)):
709
  """Server-sent stream of response tokens."""
710
+ await check_rate_limit(session_id)
711
+ session_payload = await get_session_payload(session_id)
712
+ session_keys = session_payload.get("api_keys", {})
713
+ if not req.api_keys:
714
+ req.api_keys = {}
715
+ req.api_keys.update(session_keys)
716
+
717
  def token_generator():
718
  try:
719
  for chunk in _client.stream(
llmopt/api/crud.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy.orm import Session
2
+ from passlib.context import CryptContext
3
+ from llmopt.db import models
4
+ from pydantic import BaseModel
5
+
6
+ pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
7
+
8
+ class UserCreate(BaseModel):
9
+ email: str
10
+ password: str
11
+
12
+ class UserLogin(BaseModel):
13
+ email: str
14
+ password: str
15
+
16
+ def get_password_hash(password):
17
+ return pwd_context.hash(password)
18
+
19
+ def verify_password(plain_password, hashed_password):
20
+ return pwd_context.verify(plain_password, hashed_password)
21
+
22
+ def get_user_by_email(db: Session, email: str):
23
+ return db.query(models.User).filter(models.User.email == email).first()
24
+
25
+ def create_user(db: Session, user: UserCreate):
26
+ hashed_password = get_password_hash(user.password)
27
+ db_user = models.User(email=user.email, hashed_password=hashed_password)
28
+ db.add(db_user)
29
+ db.commit()
30
+ db.refresh(db_user)
31
+ return db_user
32
+
33
+ def get_user_by_id(db: Session, user_id: int):
34
+ return db.query(models.User).filter(models.User.id == user_id).first()
35
+
36
+ def update_user_api_keys(db: Session, user_id: int, provider_keys: dict):
37
+ for provider, encrypted_key in provider_keys.items():
38
+ existing = db.query(models.UserAPIKey).filter(
39
+ models.UserAPIKey.user_id == user_id,
40
+ models.UserAPIKey.provider == provider
41
+ ).first()
42
+ if existing:
43
+ existing.encrypted_key = encrypted_key
44
+ else:
45
+ api_key_record = models.UserAPIKey(user_id=user_id, provider=provider, encrypted_key=encrypted_key)
46
+ db.add(api_key_record)
47
+ db.commit()
48
+
49
+ def get_user_api_keys(db: Session, user_id: int) -> dict:
50
+ records = db.query(models.UserAPIKey).filter(models.UserAPIKey.user_id == user_id).all()
51
+ return {r.provider: r.encrypted_key for r in records}
52
+
53
+ def delete_user_api_key(db: Session, user_id: int, provider: str) -> None:
54
+ db.query(models.UserAPIKey).filter(
55
+ models.UserAPIKey.user_id == user_id,
56
+ models.UserAPIKey.provider == provider
57
+ ).delete()
58
+ db.commit()
59
+
llmopt/api/security.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import uuid
4
+ import logging
5
+ from typing import Dict, Optional
6
+ from cryptography.fernet import Fernet
7
+ from fastapi import Request, HTTPException, status
8
+ from llmopt.cache.redis_client import get_redis
9
+ import jwt
10
+ import datetime
11
+ from llmopt.db.session import SessionLocal
12
+ from llmopt.api import crud
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # Master key for encrypting user API keys in Redis
17
+ # In production, this MUST be set via environment variable.
18
+ _SECRET_KEY = os.getenv("SESSION_SECRET_KEY")
19
+ if not _SECRET_KEY:
20
+ logger.warning("SESSION_SECRET_KEY not set. Generating a temporary one for this process.")
21
+ _SECRET_KEY = Fernet.generate_key().decode("utf-8")
22
+
23
+ fernet = Fernet(_SECRET_KEY.encode("utf-8"))
24
+
25
+ # Default session TTL: 2 hours
26
+ SESSION_TTL = int(os.getenv("SESSION_TTL", 7200))
27
+
28
+
29
+ def encrypt_payload(payload: dict) -> str:
30
+ """Encrypts the dictionary payload into a secure string."""
31
+ json_data = json.dumps(payload)
32
+ return fernet.encrypt(json_data.encode("utf-8")).decode("utf-8")
33
+
34
+
35
+ def decrypt_payload(encrypted_data: str) -> dict:
36
+ """Decrypts the secure string back into a dictionary."""
37
+ json_data = fernet.decrypt(encrypted_data.encode("utf-8")).decode("utf-8")
38
+ return json.loads(json_data)
39
+
40
+
41
+ def encrypt_string(data: str) -> str:
42
+ return fernet.encrypt(data.encode("utf-8")).decode("utf-8")
43
+
44
+
45
+ def decrypt_string(encrypted_data: str) -> str:
46
+ return fernet.decrypt(encrypted_data.encode("utf-8")).decode("utf-8")
47
+
48
+
49
+ async def create_session(api_keys: Dict[str, str], user_id: Optional[int] = None) -> str:
50
+ """Stores encrypted API keys and user_id in Redis and returns a session ID (JWT)."""
51
+ redis = await get_redis()
52
+ if not redis:
53
+ raise HTTPException(
54
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
55
+ detail="Redis cache is unavailable. Cannot create session."
56
+ )
57
+
58
+ # Generate JWT for session_id
59
+ jti = str(uuid.uuid4())
60
+ jwt_payload = {"jti": jti}
61
+ if user_id is not None:
62
+ jwt_payload["user_id"] = user_id
63
+
64
+ session_id = jwt.encode(jwt_payload, _SECRET_KEY, algorithm="HS256")
65
+
66
+ payload = {"api_keys": api_keys, "user_id": user_id}
67
+ encrypted_payload = encrypt_payload(payload)
68
+
69
+ # Store with TTL
70
+ await redis.setex(f"session:{session_id}", SESSION_TTL, encrypted_payload)
71
+ return session_id
72
+
73
+
74
+ async def update_session_payload(session_id: str, payload: dict) -> None:
75
+ """Updates the encrypted session payload in Redis under the existing session ID."""
76
+ redis = await get_redis()
77
+ if not redis:
78
+ raise HTTPException(
79
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
80
+ detail="Redis cache is unavailable. Cannot update session."
81
+ )
82
+ encrypted_payload = encrypt_payload(payload)
83
+ await redis.setex(f"session:{session_id}", SESSION_TTL, encrypted_payload)
84
+
85
+
86
+ async def get_session_payload(session_id: str) -> dict:
87
+ """Retrieves and decrypts the payload from Redis. On cache miss, restores from DB using JWT."""
88
+ redis = await get_redis()
89
+ if not redis:
90
+ raise HTTPException(
91
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
92
+ detail="Redis cache is unavailable."
93
+ )
94
+
95
+ encrypted_keys = await redis.get(f"session:{session_id}")
96
+
97
+ # Cache hit
98
+ if encrypted_keys:
99
+ await redis.expire(f"session:{session_id}", SESSION_TTL)
100
+ try:
101
+ return decrypt_payload(encrypted_keys)
102
+ except Exception as e:
103
+ logger.error(f"Failed to decrypt session keys: {e}")
104
+ raise HTTPException(
105
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
106
+ detail="Failed to decrypt session."
107
+ )
108
+
109
+ # Cache miss: attempt to decode JWT and recover from database
110
+ try:
111
+ jwt_payload = jwt.decode(session_id, _SECRET_KEY, algorithms=["HS256"])
112
+ user_id = jwt_payload.get("user_id")
113
+
114
+ if not user_id:
115
+ raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Session expired. No user context.")
116
+
117
+ # Fetch from database
118
+ db = SessionLocal()
119
+ try:
120
+ user_keys_encrypted = crud.get_user_api_keys(db, user_id)
121
+ finally:
122
+ db.close()
123
+
124
+ # Decrypt keys from DB
125
+ api_keys = {p: decrypt_string(k) for p, k in user_keys_encrypted.items()}
126
+
127
+ payload = {"api_keys": api_keys, "user_id": user_id}
128
+ encrypted_payload = encrypt_payload(payload)
129
+
130
+ # Repopulate Redis
131
+ await redis.setex(f"session:{session_id}", SESSION_TTL, encrypted_payload)
132
+
133
+ return payload
134
+
135
+ except jwt.InvalidTokenError:
136
+ raise HTTPException(
137
+ status_code=status.HTTP_401_UNAUTHORIZED,
138
+ detail="Session expired or invalid."
139
+ )
140
+
141
+
142
+ async def delete_session(session_id: str) -> bool:
143
+ """Removes the session from Redis."""
144
+ redis = await get_redis()
145
+ if not redis:
146
+ return False
147
+ await redis.delete(f"session:{session_id}")
148
+ return True
149
+
150
+
151
+ async def check_rate_limit(session_id: str) -> None:
152
+ """
153
+ Basic rate limiting: max 20 requests per minute per session.
154
+ """
155
+ redis = await get_redis()
156
+ if not redis:
157
+ return
158
+
159
+ key = f"ratelimit:{session_id}"
160
+ requests = await redis.incr(key)
161
+
162
+ if requests == 1:
163
+ await redis.expire(key, 60)
164
+
165
+ if requests > 20:
166
+ raise HTTPException(
167
+ status_code=status.HTTP_429_TOO_MANY_REQUESTS,
168
+ detail="Rate limit exceeded. Please try again later."
169
+ )
170
+
171
+ def get_session_id_from_request(request: Request) -> str:
172
+ """Extracts session ID from cookies or Authorization header."""
173
+ # First try cookie
174
+ session_id = request.cookies.get("session_id")
175
+ if session_id:
176
+ return session_id
177
+
178
+ # Then try Authorization header (Bearer token)
179
+ auth_header = request.headers.get("Authorization")
180
+ if auth_header and auth_header.startswith("Bearer "):
181
+ return auth_header.split(" ")[1]
182
+
183
+ raise HTTPException(
184
+ status_code=status.HTTP_401_UNAUTHORIZED,
185
+ detail="Missing session_id cookie or Bearer token."
186
+ )
llmopt/cache/redis_client.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ from typing import Optional
4
+ from redis import asyncio as aioredis
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ class RedisManager:
9
+ def __init__(self):
10
+ self.redis: Optional[aioredis.Redis] = None
11
+
12
+ async def connect(self):
13
+ redis_url = os.getenv("REDIS_URL")
14
+ if not redis_url:
15
+ logger.warning("REDIS_URL environment variable is not set. Redis features will be disabled.")
16
+ return
17
+
18
+ try:
19
+ self.redis = aioredis.from_url(
20
+ redis_url,
21
+ encoding="utf-8",
22
+ decode_responses=True,
23
+ socket_timeout=5.0,
24
+ socket_connect_timeout=5.0,
25
+ retry_on_timeout=True,
26
+ max_connections=10
27
+ )
28
+ await self.redis.ping()
29
+ logger.info("Successfully connected to Redis.")
30
+ except Exception as e:
31
+ logger.error(f"Failed to connect to Redis: {e}")
32
+ self.redis = None
33
+
34
+ async def close(self):
35
+ if self.redis:
36
+ await self.redis.close()
37
+
38
+ redis_manager = RedisManager()
39
+
40
+ async def get_redis():
41
+ return redis_manager.redis
llmopt/core.py CHANGED
@@ -18,17 +18,18 @@ import time
18
  import os
19
  from dataclasses import dataclass
20
  from pathlib import Path
21
- from typing import Optional
22
 
23
  from llmopt.analyzer.query_analyzer import QueryAnalyzer, QueryFeatures
24
  from llmopt.estimator.complexity_estimator import ComplexityEstimator, ComplexityResult
25
  from llmopt.engine.optimization_engine import OptimizationEngine, OptimizationResult, UserConstraints
 
 
26
  from llmopt.optimizer.prompt_optimizer import PromptOptimizer, OptimizedPrompt
27
  from llmopt.router.model_router import ModelRouter, RoutedResponse
28
  from llmopt.registry.model_registry import ModelRegistry
29
  from llmopt.cache.semantic_cache import SemanticCache
30
  from llmopt.evaluation.evaluator import LLMJudge, EvaluationResult
31
- import os
32
 
33
  logger = logging.getLogger(__name__)
34
 
@@ -136,30 +137,40 @@ class LLMOpt:
136
  registry_path: Optional[Path] = None,
137
  ollama_base_url: Optional[str] = None,
138
  log_level: str = "WARNING",
 
139
  ):
140
  logging.basicConfig(level=getattr(logging, log_level.upper(), logging.WARNING))
141
 
142
  self.registry = ModelRegistry(registry_path)
143
  self.analyzer = QueryAnalyzer()
144
  self.estimator = ComplexityEstimator()
145
- self.engine = OptimizationEngine(self.registry)
146
  self.optimizer = PromptOptimizer()
147
  self.router = ModelRouter(ollama_base_url=ollama_base_url)
148
-
149
  # Initialize Semantic Cache (reads REDIS_URL from env if available)
150
- # Using python-dotenv to ensure .env is loaded
151
  try:
152
  from dotenv import load_dotenv # type: ignore
153
- # Attempt to load from both the root and config/.env
154
  load_dotenv()
155
  load_dotenv("config/.env")
156
  except ImportError:
157
  pass
158
-
159
  redis_url = os.environ.get("REDIS_URL")
160
  self.cache = SemanticCache(redis_url=redis_url)
161
  self.judge = LLMJudge(judge_model="gpt-4o-mini")
162
 
 
 
 
 
 
 
 
 
 
 
 
163
  # ------------------------------------------------------------------
164
  # Primary API
165
  # ------------------------------------------------------------------
@@ -178,6 +189,10 @@ class LLMOpt:
178
  dry_run: bool = False,
179
  evaluate: bool = False,
180
  api_keys: Optional[Dict[str, str]] = None,
 
 
 
 
181
  ) -> GenerateResult:
182
  """
183
  Full pipeline: analyze β†’ estimate β†’ optimize β†’ compress β†’ route β†’ return.
@@ -212,11 +227,17 @@ class LLMOpt:
212
  latency_ms = (time.perf_counter() - t0) * 1000
213
  logger.info("Returning cached response directly.")
214
 
215
- constraints = UserConstraints(budget_mode=budget_mode)
 
 
 
216
  optimization = self.engine.optimize(
217
  complexity=complexity,
218
  output_length_bucket=features.estimated_output_length,
219
  constraints=constraints,
 
 
 
220
  )
221
  optimized_prompt = self.optimizer.optimize(
222
  query=query,
@@ -257,16 +278,40 @@ class LLMOpt:
257
  exclude_providers=exclude_providers or [],
258
  only_providers=only_providers or [],
259
  prefer_local=prefer_local,
 
260
  )
261
  if prefer_local:
262
  constraints.only_providers = ["ollama"]
263
 
264
  # 4. Optimize (select model + config)
265
- optimization = self.engine.optimize(
266
- complexity=complexity,
267
- output_length_bucket=features.estimated_output_length,
268
- constraints=constraints,
269
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  logger.debug(f"Selected: {optimization.selected_model}")
271
 
272
  # 5. Optimize prompt
@@ -284,16 +329,26 @@ class LLMOpt:
284
  if dry_run:
285
  routed = self._mock_response(optimization)
286
  else:
287
- model_spec = self.registry.get(optimization.selected_model)
 
 
 
 
 
 
 
 
 
 
288
  routed = self.router.route(
289
  model_name=optimization.selected_model,
290
  provider=optimization.provider,
291
  messages=messages,
292
  max_tokens=optimization.max_tokens,
293
  temperature=temperature,
294
- input_cost_per_1k=model_spec.input_cost_per_1k,
295
- output_cost_per_1k=model_spec.output_cost_per_1k,
296
- api_keys=api_keys, # Pass BYOK keys
297
  )
298
  latency_ms = (time.perf_counter() - t0) * 1000
299
 
@@ -308,11 +363,21 @@ class LLMOpt:
308
  )
309
  cost_saved = max(0.0, baseline_cost - routed.estimated_cost)
310
 
311
- # 9. Evaluate (if requested) and feed Bayesian optimizer
312
  evaluation = None
313
  if evaluate and not dry_run:
314
  evaluation = self.judge.evaluate(query, routed.content)
315
- if evaluation:
 
 
 
 
 
 
 
 
 
 
316
  Ξ±, Ξ², Ξ³ = self.engine.bayes.get_weights(constraints.budget_mode)
317
  self.engine.bayes.record_outcome(
318
  budget_mode=constraints.budget_mode,
@@ -351,15 +416,26 @@ class LLMOpt:
351
  api_keys: Optional[Dict[str, str]] = None,
352
  **kwargs,
353
  ):
354
- """Yields text chunks. Pipeline still runs fully before streaming."""
355
- features = self.analyzer.analyze(query)
356
- complexity = self.estimator.estimate(features)
357
- constraints = UserConstraints(budget_mode=budget_mode)
358
- optimization = self.engine.optimize(
359
- complexity=complexity,
360
- output_length_bucket=features.estimated_output_length,
361
- constraints=constraints,
362
- )
 
 
 
 
 
 
 
 
 
 
 
363
  optimized_prompt = self.optimizer.optimize(
364
  query=query,
365
  system_prompt_style=optimization.system_prompt_style,
@@ -370,27 +446,62 @@ class LLMOpt:
370
  model_name=optimization.selected_model,
371
  messages=messages,
372
  max_tokens=optimization.max_tokens,
373
- provider=optimization.provider, # Pass provider
374
- api_keys=api_keys, # Pass BYOK keys
375
  )
376
 
377
  # ------------------------------------------------------------------
378
  # Explainability (standalone)
379
  # ------------------------------------------------------------------
380
 
381
- def explain(self, query: str, budget_mode: str = "balanced") -> dict:
 
 
 
 
 
 
 
 
 
 
 
382
  """
383
  Returns a structured explanation of what LLMOpt would do for a query,
384
  without making an actual API call.
385
  """
386
  features = self.analyzer.analyze(query)
387
  complexity = self.estimator.estimate(features)
388
- constraints = UserConstraints(budget_mode=budget_mode)
389
- optimization = self.engine.optimize(
390
- complexity=complexity,
391
- output_length_bucket=features.estimated_output_length,
392
- constraints=constraints,
393
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
394
  optimized_prompt = self.optimizer.optimize(
395
  query=query,
396
  system_prompt_style=optimization.system_prompt_style,
@@ -404,6 +515,7 @@ class LLMOpt:
404
  "optimized_prompt": optimized_prompt.to_dict(),
405
  }
406
 
 
407
  # ------------------------------------------------------------------
408
  # Helpers
409
  # ------------------------------------------------------------------
@@ -433,3 +545,62 @@ class LLMOpt:
433
  latency_ms=0.0,
434
  estimated_cost=optimization.estimated_cost,
435
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  import os
19
  from dataclasses import dataclass
20
  from pathlib import Path
21
+ from typing import Optional, Dict, List
22
 
23
  from llmopt.analyzer.query_analyzer import QueryAnalyzer, QueryFeatures
24
  from llmopt.estimator.complexity_estimator import ComplexityEstimator, ComplexityResult
25
  from llmopt.engine.optimization_engine import OptimizationEngine, OptimizationResult, UserConstraints
26
+ from llmopt.engine.llmopt_engine import LLMOptEngine
27
+ from llmopt.engine.utility_engine import RoutingDecision
28
  from llmopt.optimizer.prompt_optimizer import PromptOptimizer, OptimizedPrompt
29
  from llmopt.router.model_router import ModelRouter, RoutedResponse
30
  from llmopt.registry.model_registry import ModelRegistry
31
  from llmopt.cache.semantic_cache import SemanticCache
32
  from llmopt.evaluation.evaluator import LLMJudge, EvaluationResult
 
33
 
34
  logger = logging.getLogger(__name__)
35
 
 
137
  registry_path: Optional[Path] = None,
138
  ollama_base_url: Optional[str] = None,
139
  log_level: str = "WARNING",
140
+ use_v2_engine: bool = True,
141
  ):
142
  logging.basicConfig(level=getattr(logging, log_level.upper(), logging.WARNING))
143
 
144
  self.registry = ModelRegistry(registry_path)
145
  self.analyzer = QueryAnalyzer()
146
  self.estimator = ComplexityEstimator()
147
+ self.engine = OptimizationEngine(self.registry) # V1 β€” kept for fallback
148
  self.optimizer = PromptOptimizer()
149
  self.router = ModelRouter(ollama_base_url=ollama_base_url)
150
+
151
  # Initialize Semantic Cache (reads REDIS_URL from env if available)
 
152
  try:
153
  from dotenv import load_dotenv # type: ignore
 
154
  load_dotenv()
155
  load_dotenv("config/.env")
156
  except ImportError:
157
  pass
158
+
159
  redis_url = os.environ.get("REDIS_URL")
160
  self.cache = SemanticCache(redis_url=redis_url)
161
  self.judge = LLMJudge(judge_model="gpt-4o-mini")
162
 
163
+ # V2 Utility Engine β€” default active
164
+ self._use_v2 = use_v2_engine
165
+ self._v2_engine: Optional[LLMOptEngine] = None
166
+ if use_v2_engine:
167
+ self._v2_engine = LLMOptEngine(
168
+ available_keys={}, # populated per-request via update_keys()
169
+ include_ollama=True,
170
+ log_level=logging.WARNING,
171
+ )
172
+ logger.info("[LLMOpt] V2 utility engine active.")
173
+
174
  # ------------------------------------------------------------------
175
  # Primary API
176
  # ------------------------------------------------------------------
 
189
  dry_run: bool = False,
190
  evaluate: bool = False,
191
  api_keys: Optional[Dict[str, str]] = None,
192
+ alpha: Optional[float] = None,
193
+ beta: Optional[float] = None,
194
+ gamma: Optional[float] = None,
195
+ compression_enabled: Optional[bool] = None,
196
  ) -> GenerateResult:
197
  """
198
  Full pipeline: analyze β†’ estimate β†’ optimize β†’ compress β†’ route β†’ return.
 
227
  latency_ms = (time.perf_counter() - t0) * 1000
228
  logger.info("Returning cached response directly.")
229
 
230
+ constraints = UserConstraints(
231
+ budget_mode=budget_mode,
232
+ compression_enabled=compression_enabled,
233
+ )
234
  optimization = self.engine.optimize(
235
  complexity=complexity,
236
  output_length_bucket=features.estimated_output_length,
237
  constraints=constraints,
238
+ alpha=alpha,
239
+ beta=beta,
240
+ gamma=gamma,
241
  )
242
  optimized_prompt = self.optimizer.optimize(
243
  query=query,
 
278
  exclude_providers=exclude_providers or [],
279
  only_providers=only_providers or [],
280
  prefer_local=prefer_local,
281
+ compression_enabled=compression_enabled,
282
  )
283
  if prefer_local:
284
  constraints.only_providers = ["ollama"]
285
 
286
  # 4. Optimize (select model + config)
287
+ if self._use_v2 and self._v2_engine is not None:
288
+ # Update BYOK keys for this request
289
+ if api_keys:
290
+ self._v2_engine.update_keys(api_keys)
291
+ # Build constraints dict for V2 engine
292
+ v2_constraints = {
293
+ "exclude_providers": exclude_providers or [],
294
+ "only_providers": only_providers or [],
295
+ }
296
+ if max_cost_per_request is not None:
297
+ v2_constraints["max_cost_per_request"] = max_cost_per_request
298
+ if prefer_local:
299
+ v2_constraints["only_providers"] = ["ollama"]
300
+ decision = self._v2_engine.route(
301
+ query_features=features,
302
+ budget_mode=budget_mode,
303
+ constraints=v2_constraints,
304
+ )
305
+ optimization = self._v2_to_optimization_result(decision, complexity, features)
306
+ else:
307
+ optimization = self.engine.optimize(
308
+ complexity=complexity,
309
+ output_length_bucket=features.estimated_output_length,
310
+ constraints=constraints,
311
+ alpha=alpha,
312
+ beta=beta,
313
+ gamma=gamma,
314
+ )
315
  logger.debug(f"Selected: {optimization.selected_model}")
316
 
317
  # 5. Optimize prompt
 
329
  if dry_run:
330
  routed = self._mock_response(optimization)
331
  else:
332
+ # Fetch model spec from appropriate registry
333
+ if self._use_v2 and self._v2_engine is not None:
334
+ # V2: look up from the merged V2 registry (knows all new model IDs)
335
+ v2_spec = self._v2_engine._registry.get_model(optimization.selected_model)
336
+ in_cost = v2_spec["input_cost_per_1k"] if v2_spec else optimization.estimated_cost / 2
337
+ out_cost = v2_spec["output_cost_per_1k"] if v2_spec else optimization.estimated_cost / 2
338
+ else:
339
+ # V1: look up from the old ModelRegistry
340
+ model_spec = self.registry.get(optimization.selected_model)
341
+ in_cost = model_spec.input_cost_per_1k
342
+ out_cost = model_spec.output_cost_per_1k
343
  routed = self.router.route(
344
  model_name=optimization.selected_model,
345
  provider=optimization.provider,
346
  messages=messages,
347
  max_tokens=optimization.max_tokens,
348
  temperature=temperature,
349
+ input_cost_per_1k=in_cost,
350
+ output_cost_per_1k=out_cost,
351
+ api_keys=api_keys,
352
  )
353
  latency_ms = (time.perf_counter() - t0) * 1000
354
 
 
363
  )
364
  cost_saved = max(0.0, baseline_cost - routed.estimated_cost)
365
 
366
+ # 9. Evaluate (if requested) and feed optimizer
367
  evaluation = None
368
  if evaluate and not dry_run:
369
  evaluation = self.judge.evaluate(query, routed.content)
370
+ if self._use_v2 and self._v2_engine is not None:
371
+ # Feed outcome back into adaptive EMA updater
372
+ self._v2_engine.record_outcome(
373
+ model_id=routed.model_used,
374
+ latency_ms=routed.latency_ms,
375
+ success=True,
376
+ quality_score=evaluation.overall if evaluation else None,
377
+ cost_usd=routed.estimated_cost,
378
+ )
379
+ elif evaluation:
380
+ # V1 path: feed Bayesian optimizer
381
  Ξ±, Ξ², Ξ³ = self.engine.bayes.get_weights(constraints.budget_mode)
382
  self.engine.bayes.record_outcome(
383
  budget_mode=constraints.budget_mode,
 
416
  api_keys: Optional[Dict[str, str]] = None,
417
  **kwargs,
418
  ):
419
+ """Yields text chunks. Pipeline still runs fully before streaming."""
420
+ features = self.analyzer.analyze(query)
421
+ complexity = self.estimator.estimate(features)
422
+
423
+ if self._use_v2 and self._v2_engine is not None:
424
+ if api_keys:
425
+ self._v2_engine.update_keys(api_keys)
426
+ decision = self._v2_engine.route(
427
+ query_features=features,
428
+ budget_mode=budget_mode,
429
+ )
430
+ optimization = self._v2_to_optimization_result(decision, complexity, features)
431
+ else:
432
+ constraints = UserConstraints(budget_mode=budget_mode)
433
+ optimization = self.engine.optimize(
434
+ complexity=complexity,
435
+ output_length_bucket=features.estimated_output_length,
436
+ constraints=constraints,
437
+ )
438
+
439
  optimized_prompt = self.optimizer.optimize(
440
  query=query,
441
  system_prompt_style=optimization.system_prompt_style,
 
446
  model_name=optimization.selected_model,
447
  messages=messages,
448
  max_tokens=optimization.max_tokens,
449
+ provider=optimization.provider,
450
+ api_keys=api_keys,
451
  )
452
 
453
  # ------------------------------------------------------------------
454
  # Explainability (standalone)
455
  # ------------------------------------------------------------------
456
 
457
+ def explain(
458
+ self,
459
+ query: str,
460
+ budget_mode: str = "balanced",
461
+ alpha: Optional[float] = None,
462
+ beta: Optional[float] = None,
463
+ gamma: Optional[float] = None,
464
+ compression_enabled: Optional[bool] = None,
465
+ exclude_providers: Optional[list[str]] = None,
466
+ only_providers: Optional[list[str]] = None,
467
+ api_keys: Optional[Dict[str, str]] = None,
468
+ ) -> dict:
469
  """
470
  Returns a structured explanation of what LLMOpt would do for a query,
471
  without making an actual API call.
472
  """
473
  features = self.analyzer.analyze(query)
474
  complexity = self.estimator.estimate(features)
475
+
476
+ if self._use_v2 and self._v2_engine is not None:
477
+ if api_keys:
478
+ self._v2_engine.update_keys(api_keys)
479
+ v2_constraints = {
480
+ "exclude_providers": exclude_providers or [],
481
+ "only_providers": only_providers or [],
482
+ }
483
+ decision = self._v2_engine.route(
484
+ query_features=features,
485
+ budget_mode=budget_mode,
486
+ constraints=v2_constraints,
487
+ )
488
+ optimization = self._v2_to_optimization_result(decision, complexity, features)
489
+ else:
490
+ constraints = UserConstraints(
491
+ budget_mode=budget_mode,
492
+ compression_enabled=compression_enabled,
493
+ exclude_providers=exclude_providers or [],
494
+ only_providers=only_providers or [],
495
+ )
496
+ optimization = self.engine.optimize(
497
+ complexity=complexity,
498
+ output_length_bucket=features.estimated_output_length,
499
+ constraints=constraints,
500
+ alpha=alpha,
501
+ beta=beta,
502
+ gamma=gamma,
503
+ )
504
+
505
  optimized_prompt = self.optimizer.optimize(
506
  query=query,
507
  system_prompt_style=optimization.system_prompt_style,
 
515
  "optimized_prompt": optimized_prompt.to_dict(),
516
  }
517
 
518
+
519
  # ------------------------------------------------------------------
520
  # Helpers
521
  # ------------------------------------------------------------------
 
545
  latency_ms=0.0,
546
  estimated_cost=optimization.estimated_cost,
547
  )
548
+
549
+ @staticmethod
550
+ def _v2_to_optimization_result(
551
+ decision: RoutingDecision,
552
+ complexity: ComplexityResult,
553
+ features: QueryFeatures,
554
+ ) -> OptimizationResult:
555
+ """
556
+ Compatibility shim: maps RoutingDecision (V2) β†’ OptimizationResult (V1 shape).
557
+
558
+ This allows all downstream pipeline stages (PromptOptimizer, ModelRouter,
559
+ logging, GenerateResult) to remain completely unchanged while the routing
560
+ layer has been replaced by the utility engine.
561
+
562
+ OptimizationResult fields (from optimization_engine.py):
563
+ selected_model, provider, estimated_cost, estimated_input_tokens,
564
+ estimated_output_tokens, max_tokens, compression_enabled,
565
+ system_prompt_style, rationale, fallback_model, objective_score
566
+ """
567
+ ex = decision.explanation
568
+
569
+ # Build a rationale list from the V2 explanation dict
570
+ rationale = [
571
+ f"engine=utility_v2 domain={ex.get('primary_domain', 'general')}",
572
+ f"utility_score={decision.utility_score:.4f} budget_lambda={ex.get('lambda', '?')}",
573
+ f"top_dims={list(ex.get('query_dimensions', {}).keys())[:3]}",
574
+ f"candidates_evaluated={ex.get('candidates_evaluated', '?')}",
575
+ f"registry_source={ex.get('registry_source', 'baseline')}",
576
+ ]
577
+ if decision.fallback_model_id:
578
+ rationale.append(f"fallback={decision.fallback_model_id} ({decision.fallback_provider})")
579
+
580
+ # Output length β†’ token estimate lookup
581
+ output_token_map = {"short": 300, "medium": 700, "long": 1500, "very_long": 3000}
582
+ est_output = output_token_map.get(
583
+ str(getattr(features, 'estimated_output_length', 'medium')).lower(), 700
584
+ )
585
+ est_input = max(getattr(features, 'token_count', 100), 100)
586
+
587
+ # Budget mode drives compression and prompt style
588
+ budget_mode = ex.get("budget_mode", "balanced")
589
+ compression = (budget_mode == "cheap")
590
+ system_prompt_style = "minimal" if budget_mode == "cheap" else "standard"
591
+ max_tokens = min(est_output + 200, 4096)
592
+
593
+ return OptimizationResult(
594
+ selected_model=decision.model_id,
595
+ provider=decision.provider,
596
+ estimated_cost=decision.estimated_cost,
597
+ estimated_input_tokens=est_input,
598
+ estimated_output_tokens=est_output,
599
+ max_tokens=max_tokens,
600
+ compression_enabled=compression,
601
+ system_prompt_style=system_prompt_style,
602
+ rationale=rationale,
603
+ fallback_model=decision.fallback_model_id,
604
+ objective_score=1.0 - decision.utility_score, # invert: lower is better (V1 convention)
605
+ )
606
+
llmopt/db/models.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import Column, Integer, String, DateTime, ForeignKey, Float
2
+ from sqlalchemy.orm import relationship
3
+ from datetime import datetime
4
+ from llmopt.db.session import Base
5
+
6
+ class User(Base):
7
+ __tablename__ = "users"
8
+
9
+ id = Column(Integer, primary_key=True, index=True)
10
+ email = Column(String, unique=True, index=True)
11
+ hashed_password = Column(String)
12
+ created_at = Column(DateTime, default=datetime.utcnow)
13
+
14
+ api_keys = relationship("UserAPIKey", back_populates="user", cascade="all, delete-orphan")
15
+ generation_logs = relationship("GenerationLog", back_populates="user", cascade="all, delete-orphan")
16
+
17
+ class UserAPIKey(Base):
18
+ __tablename__ = "user_api_keys"
19
+
20
+ id = Column(Integer, primary_key=True, index=True)
21
+ user_id = Column(Integer, ForeignKey("users.id"))
22
+ provider = Column(String, index=True)
23
+ encrypted_key = Column(String)
24
+
25
+ user = relationship("User", back_populates="api_keys")
26
+
27
+ class GenerationLog(Base):
28
+ __tablename__ = "generation_logs"
29
+
30
+ id = Column(Integer, primary_key=True, index=True)
31
+ user_id = Column(Integer, ForeignKey("users.id"), nullable=True)
32
+ query = Column(String)
33
+ response = Column(String, nullable=True)
34
+ model_used = Column(String)
35
+ provider = Column(String)
36
+ input_tokens = Column(Integer)
37
+ output_tokens = Column(Integer)
38
+ total_tokens = Column(Integer)
39
+ estimated_cost = Column(Float)
40
+ tokens_saved = Column(Integer)
41
+ cost_saved = Column(Float)
42
+ latency_ms = Column(Float)
43
+ complexity_score = Column(Float)
44
+ complexity_tier = Column(String)
45
+ created_at = Column(DateTime, default=datetime.utcnow)
46
+
47
+ user = relationship("User", back_populates="generation_logs")
llmopt/db/session.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from sqlalchemy import create_engine
3
+ from sqlalchemy.orm import sessionmaker, declarative_base
4
+
5
+ # Default to local SQLite if DATABASE_URL is not set
6
+ SQLALCHEMY_DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./llmopt.db")
7
+
8
+ # For SQLite, we need connect_args={"check_same_thread": False}
9
+ if SQLALCHEMY_DATABASE_URL.startswith("sqlite"):
10
+ engine = create_engine(
11
+ SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False}
12
+ )
13
+ else:
14
+ # For Postgres (e.g. Neon, Supabase)
15
+ # SQLAlchemy requires `postgresql://` instead of `postgres://`
16
+ if SQLALCHEMY_DATABASE_URL.startswith("postgres://"):
17
+ SQLALCHEMY_DATABASE_URL = SQLALCHEMY_DATABASE_URL.replace("postgres://", "postgresql://", 1)
18
+ engine = create_engine(
19
+ SQLALCHEMY_DATABASE_URL,
20
+ pool_pre_ping=True,
21
+ pool_recycle=300,
22
+ )
23
+
24
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
25
+
26
+ Base = declarative_base()
27
+
28
+ # Dependency for FastAPI
29
+ def get_db():
30
+ db = SessionLocal()
31
+ try:
32
+ yield db
33
+ finally:
34
+ db.close()
llmopt/engine/__init__.py CHANGED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """LLMOpt engine package."""
2
+ # V1 (legacy, kept for compatibility)
3
+ from llmopt.engine.optimization_engine import OptimizationEngine, OptimizationResult, UserConstraints
4
+
5
+ # V2 β€” utility-based routing
6
+ from llmopt.engine.utility_engine import UtilityOptimizationEngine, RoutingDecision, QueryUtilityProfile
7
+ from llmopt.engine.llmopt_engine import LLMOptEngine
8
+
9
+ __all__ = [
10
+ # V1
11
+ "OptimizationEngine", "OptimizationResult", "UserConstraints",
12
+ # V2
13
+ "UtilityOptimizationEngine", "RoutingDecision", "QueryUtilityProfile",
14
+ "LLMOptEngine",
15
+ ]
llmopt/engine/llmopt_engine.py ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ LLMOpt β€” Main Facade (V2 Engine Entry Point)
3
+ =============================================
4
+ Wires the utility engine into the existing LLMOpt pipeline.
5
+
6
+ Replaces: OptimizationEngine
7
+ Keeps intact: QueryAnalyzer, ComplexityEstimator, SemanticCache,
8
+ PromptOptimizer, ModelRouter, LLMJudge
9
+
10
+ Usage (identical to old OptimizationEngine interface):
11
+ ------------------------------------------------------
12
+ from llmopt.engine.llmopt_engine import LLMOptEngine
13
+
14
+ engine = LLMOptEngine(
15
+ available_keys={
16
+ "openai": "sk-...",
17
+ "anthropic": "sk-ant-...",
18
+ }
19
+ )
20
+
21
+ decision = engine.route(query_features, budget_mode="balanced")
22
+ # decision.model_id β†’ "claude-sonnet-4-5"
23
+ # decision.provider β†’ "anthropic"
24
+ # decision.utility_score β†’ 0.8241
25
+ # decision.estimated_cost β†’ 0.00312
26
+ # decision.explanation β†’ {...full reasoning...}
27
+ # decision.fallback_model_id β†’ "gpt-4.1-mini"
28
+
29
+ # After getting a response, record outcome for adaptive updating:
30
+ engine.record_outcome(
31
+ model_id="claude-sonnet-4-5",
32
+ latency_ms=1340,
33
+ success=True,
34
+ quality_score=8.5,
35
+ cost_usd=0.00312,
36
+ )
37
+ """
38
+
39
+ import logging
40
+ import os
41
+ from typing import Optional
42
+
43
+ from llmopt.registry.hybrid_updater import HybridRegistryUpdater
44
+ from llmopt.engine.utility_engine import UtilityOptimizationEngine, RoutingDecision
45
+ from llmopt.updater.adaptive_updater import AdaptiveRuntimeUpdater
46
+
47
+ logger = logging.getLogger(__name__)
48
+
49
+
50
+ class LLMOptEngine:
51
+ """
52
+ Main entry point for the utility-based routing engine.
53
+
54
+ Pipeline position: slots in between QueryAnalyzer/ComplexityEstimator
55
+ and PromptOptimizer/ModelRouter β€” identical interface to old OptimizationEngine.
56
+
57
+ Args:
58
+ available_keys: Dict of provider β†’ API key. Only provided keys are routed to.
59
+ openrouter_api_key: Optional OpenRouter key for live pricing patches.
60
+ include_ollama: Whether to include local Ollama as a routing option.
61
+ log_level: Logging verbosity.
62
+ """
63
+
64
+ def __init__(
65
+ self,
66
+ available_keys: Optional[dict] = None,
67
+ openrouter_api_key: Optional[str] = None,
68
+ include_ollama: bool = True,
69
+ log_level: int = logging.WARNING,
70
+ ):
71
+ logging.basicConfig(level=log_level)
72
+
73
+ # Resolve API keys: constructor args > environment variables
74
+ resolved_keys = self._resolve_keys(available_keys or {})
75
+
76
+ # Layer 1: Registry (baseline JSON + live OpenRouter patch)
77
+ or_key = openrouter_api_key or os.getenv("OPENROUTER_API_KEY", "")
78
+ self._registry = HybridRegistryUpdater(openrouter_api_key=or_key)
79
+
80
+ # Layer 2: Utility Engine (BYOK-aware routing)
81
+ self._engine = UtilityOptimizationEngine(
82
+ registry_updater=self._registry,
83
+ available_keys=resolved_keys,
84
+ include_ollama=include_ollama,
85
+ )
86
+
87
+ # Layer 3: Adaptive Runtime Stats (EMA-based, no RL)
88
+ self._runtime = AdaptiveRuntimeUpdater()
89
+
90
+ logger.info(
91
+ f"[LLMOptEngine] Initialized. "
92
+ f"Providers: {list(resolved_keys.keys())}"
93
+ )
94
+
95
+ # ── Main routing method β€” drop-in for old OptimizationEngine ─────────────
96
+
97
+ def route(
98
+ self,
99
+ query_features,
100
+ budget_mode: str = "balanced",
101
+ constraints: dict = None,
102
+ force_refresh_registry: bool = False,
103
+ ) -> RoutingDecision:
104
+ """
105
+ Route a query to the best available model.
106
+
107
+ Args:
108
+ query_features: QueryFeatures dataclass (from QueryAnalyzer) or dict.
109
+ budget_mode: "cheap" | "balanced" | "quality"
110
+ constraints: Optional hard overrides. See UtilityOptimizationEngine.route().
111
+ force_refresh_registry: Force live registry refresh from OpenRouter.
112
+
113
+ Returns:
114
+ RoutingDecision β€” same fields as old OptimizationEngine output.
115
+ """
116
+ decision = self._engine.route(
117
+ query_features=query_features,
118
+ budget_mode=budget_mode,
119
+ constraints=constraints or {},
120
+ force_refresh_registry=force_refresh_registry,
121
+ )
122
+
123
+ # Inject runtime adjustment into utility score
124
+ adj = self._runtime.get_utility_adjustment(decision.model_id)
125
+ if adj != 0.0:
126
+ decision.utility_score = round(decision.utility_score + adj, 4)
127
+ decision.explanation["runtime_adjustment"] = adj
128
+ logger.debug(
129
+ f"[LLMOptEngine] Runtime adj for {decision.model_id}: {adj:+.4f}"
130
+ )
131
+
132
+ # Override latency estimate if we have runtime data
133
+ runtime_lat = self._runtime.get_latency_estimate(decision.model_id)
134
+ if runtime_lat:
135
+ decision.explanation["observed_latency_ms"] = round(runtime_lat, 0)
136
+
137
+ return decision
138
+
139
+ # ── Outcome recording β€” call after each LLM API response ─────────────────
140
+
141
+ def record_outcome(
142
+ self,
143
+ model_id: str,
144
+ latency_ms: Optional[float] = None,
145
+ success: bool = True,
146
+ quality_score: Optional[float] = None,
147
+ cost_usd: Optional[float] = None,
148
+ ):
149
+ """
150
+ Record the outcome of a routing decision for adaptive updating.
151
+
152
+ Call this after the LLM API call completes (in ModelRouter or the main
153
+ generate() method). quality_score comes from LLMJudge if evaluate=True.
154
+
155
+ Args:
156
+ model_id: The model that was used.
157
+ latency_ms: Actual end-to-end latency.
158
+ success: Whether the API call succeeded.
159
+ quality_score: Optional 1–10 quality score from LLMJudge.
160
+ cost_usd: Actual cost of the request.
161
+ """
162
+ self._runtime.record_outcome(
163
+ model_id=model_id,
164
+ latency_ms=latency_ms,
165
+ success=success,
166
+ quality_score=quality_score,
167
+ cost_usd=cost_usd,
168
+ )
169
+
170
+ # ── Key management ────────────────────────────────────────────────────────
171
+
172
+ def update_keys(self, keys: dict):
173
+ """
174
+ Update available API keys mid-session.
175
+ Use this when keys are passed per-request (BYOK REST API mode).
176
+ """
177
+ resolved = self._resolve_keys(keys)
178
+ self._engine.update_available_keys(resolved)
179
+
180
+ # ── Observability ─────────────────────────────────────────────────────────
181
+
182
+ def explain(
183
+ self,
184
+ query_features,
185
+ budget_mode: str = "balanced",
186
+ constraints: dict = None,
187
+ ) -> str:
188
+ """
189
+ Dry-run routing β€” returns formatted explanation without making an API call.
190
+ Drop-in replacement for old client.explain() method.
191
+ """
192
+ decision = self.route(query_features, budget_mode, constraints)
193
+ return self._format_explanation(decision)
194
+
195
+ def get_registry_info(self) -> dict:
196
+ """Returns registry runtime metadata."""
197
+ return self._registry.get_registry().get("_runtime_meta", {})
198
+
199
+ def get_model_stats(self, model_id: str) -> dict:
200
+ """Returns runtime stats for a specific model."""
201
+ return self._runtime.get_stats_summary(model_id)
202
+
203
+ def save_runtime_stats(self):
204
+ """Persist runtime stats to disk (call on shutdown)."""
205
+ self._runtime.save()
206
+
207
+ # ── Internal helpers ──────────────────────────────────────────────────────
208
+
209
+ @staticmethod
210
+ def _resolve_keys(keys: dict) -> dict:
211
+ """
212
+ Merge provided keys with environment variables.
213
+ Provided keys take precedence over env vars.
214
+ """
215
+ env_map = {
216
+ "openai": "OPENAI_API_KEY",
217
+ "anthropic": "ANTHROPIC_API_KEY",
218
+ "google": "GEMINI_API_KEY",
219
+ "mistral": "MISTRAL_API_KEY",
220
+ "deepseek": "DEEPSEEK_API_KEY",
221
+ }
222
+ resolved = {}
223
+ for provider, env_var in env_map.items():
224
+ # Explicit key takes priority; fall back to env
225
+ val = keys.get(provider) or os.getenv(env_var, "")
226
+ if val and not val.startswith("your_") and val != env_var:
227
+ resolved[provider] = val
228
+
229
+ # Pass through any extra keys provided (custom providers)
230
+ for k, v in keys.items():
231
+ if k not in resolved and v and not str(v).startswith("your_"):
232
+ resolved[k] = v
233
+
234
+ return resolved
235
+
236
+
237
+ @staticmethod
238
+ def _format_explanation(decision: RoutingDecision) -> str:
239
+ """Formats a RoutingDecision as a human-readable explanation string."""
240
+ ex = decision.explanation
241
+ dims = ex.get("query_dimensions", {})
242
+ shortlist = ex.get("shortlist", [])
243
+
244
+ lines = [
245
+ "=" * 55,
246
+ "LLMOpt β€” Routing Decision",
247
+ "=" * 55,
248
+ f"Selected model : {decision.model_id} ({decision.provider})",
249
+ f"Utility score : {decision.utility_score:.4f}",
250
+ f"Estimated cost : ${decision.estimated_cost:.6f}",
251
+ f"Budget mode : {ex.get('budget_mode')} (Ξ»={ex.get('lambda')})",
252
+ f"Primary domain : {ex.get('primary_domain')}",
253
+ "",
254
+ "Query dimensions (active weights):",
255
+ ]
256
+ for dim, weight in dims.items():
257
+ lines.append(f" {dim:<25} weight={weight:.2f}")
258
+
259
+ if shortlist:
260
+ lines += ["", "Top candidates:"]
261
+ for item in shortlist:
262
+ lines.append(
263
+ f" {item['model_id']:<35} U={item['utility_score']:.4f} "
264
+ f"cap={item['capability']:.3f} ${item['est_cost_usd']:.6f}"
265
+ )
266
+
267
+ if decision.fallback_model_id:
268
+ lines.append(f"\nFallback model : {decision.fallback_model_id}")
269
+
270
+ adj = ex.get("runtime_adjustment")
271
+ if adj:
272
+ lines.append(f"Runtime adj : {adj:+.4f} (from observed outcomes)")
273
+
274
+ lines.append("=" * 55)
275
+ return "\n".join(lines)
llmopt/engine/optimization_engine.py CHANGED
@@ -240,12 +240,19 @@ class OptimizationEngine:
240
  complexity: ComplexityResult,
241
  output_length_bucket: str,
242
  constraints: Optional[UserConstraints] = None,
 
 
 
243
  ) -> OptimizationResult:
244
  if constraints is None:
245
  constraints = UserConstraints()
246
 
247
- Ξ±, Ξ², Ξ³ = self.bayes.get_weights(constraints.budget_mode)
248
- logger.debug(f"Using weights Ξ±={Ξ±:.3f} Ξ²={Ξ²:.3f} Ξ³={Ξ³:.3f} for mode '{constraints.budget_mode}'")
 
 
 
 
249
 
250
  # --- 1. Build candidate set ---
251
  candidates = self.registry.capable_of(
 
240
  complexity: ComplexityResult,
241
  output_length_bucket: str,
242
  constraints: Optional[UserConstraints] = None,
243
+ alpha: Optional[float] = None,
244
+ beta: Optional[float] = None,
245
+ gamma: Optional[float] = None,
246
  ) -> OptimizationResult:
247
  if constraints is None:
248
  constraints = UserConstraints()
249
 
250
+ if alpha is not None and beta is not None and gamma is not None:
251
+ Ξ±, Ξ², Ξ³ = alpha, beta, gamma
252
+ logger.debug(f"Using custom weights Ξ±={Ξ±:.3f} Ξ²={Ξ²:.3f} Ξ³={Ξ³:.3f}")
253
+ else:
254
+ Ξ±, Ξ², Ξ³ = self.bayes.get_weights(constraints.budget_mode)
255
+ logger.debug(f"Using weights Ξ±={Ξ±:.3f} Ξ²={Ξ²:.3f} Ξ³={Ξ³:.3f} for mode '{constraints.budget_mode}'")
256
 
257
  # --- 1. Build candidate set ---
258
  candidates = self.registry.capable_of(
llmopt/engine/utility_engine.py ADDED
@@ -0,0 +1,665 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ LLMOpt β€” Utility-Based Optimization Engine (V2 Drop-in Replacement)
3
+ ====================================================================
4
+
5
+ Replaces the old J(x) = Ξ±Β·Cost + Ξ²Β·Tokens βˆ’ Ξ³Β·Quality complexity-routing engine.
6
+
7
+ NEW APPROACH β€” Utility-Constrained Routing:
8
+ --------------------------------------------
9
+ Instead of routing on query complexity alone, the engine:
10
+
11
+ 1. Resolves available models from the user's actual API keys (BYOK)
12
+ 2. Applies hard constraints (context window, required features, cost cap)
13
+ 3. Builds a query utility profile β€” what dimensions matter FOR THIS QUERY
14
+ 4. Scores each candidate: U(m, q) = Ξ£ wα΅’ Β· capabilityα΅’(m) βˆ’ Ξ» Β· cost_norm(m)
15
+ where weights wα΅’ come from the query profile, not global defaults
16
+ 5. Returns the best model + fallback + full explanation
17
+
18
+ Key differences from old engine:
19
+ - Routing is driven by WHAT THE QUERY NEEDS, not a global complexity score
20
+ - Only models with available API keys are considered (BYOK)
21
+ - Weights are query-derived, not budget-mode static
22
+ - Budget mode adjusts Ξ» (cost penalty), not the capability weights
23
+ - No Bayesian/Optuna dependency β€” deterministic, debuggable, stable
24
+
25
+ Drop-in interface:
26
+ engine = UtilityOptimizationEngine(available_keys={"openai": "sk-...", ...})
27
+ result = engine.route(query_features, budget_mode="balanced", constraints={})
28
+ # result.model_id, result.score, result.explanation, result.fallback_model_id
29
+ """
30
+
31
+ import math
32
+ import logging
33
+ from dataclasses import dataclass, field
34
+ from typing import Optional
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+
39
+ # ── Data contracts (same shape as old engine output) ─────────────────────────
40
+
41
+ @dataclass
42
+ class QueryUtilityProfile:
43
+ """
44
+ What this query actually needs β€” extracted from QueryFeatures.
45
+ Each weight ∈ [0.0, 1.0] indicating how important that dimension is.
46
+ Weights do NOT need to sum to 1 β€” they're importance signals, not probabilities.
47
+ """
48
+ # Capability dimension weights
49
+ reasoning: float = 0.0
50
+ coding: float = 0.0
51
+ math: float = 0.0
52
+ creativity: float = 0.0
53
+ factuality: float = 0.0
54
+ instruction_following: float = 0.5 # always baseline important
55
+ long_context: float = 0.0
56
+ multilingual: float = 0.0
57
+ tool_use: float = 0.0
58
+ summarization: float = 0.0
59
+ conversation: float = 0.0
60
+
61
+ # Hard requirements (boolean)
62
+ requires_tool_calling: bool = False
63
+ requires_image_input: bool = False
64
+ requires_json_mode: bool = False
65
+ min_context_tokens: int = 0
66
+
67
+ # Estimated token budget for this query
68
+ estimated_input_tokens: int = 500
69
+ estimated_output_tokens: int = 500
70
+
71
+ # Primary domain label (for logging/explainability)
72
+ primary_domain: str = "general"
73
+
74
+
75
+ @dataclass
76
+ class RoutingDecision:
77
+ """
78
+ Output of the engine β€” same fields the old OptimizationEngine returned,
79
+ plus richer explanation. Drop-in compatible.
80
+ """
81
+ model_id: str
82
+ provider: str
83
+ utility_score: float # U(m,q) β€” higher is better
84
+ estimated_cost: float # USD for this request
85
+ input_cost_per_1k: float
86
+ output_cost_per_1k: float
87
+ context_window: int
88
+ fallback_model_id: Optional[str] = None
89
+ fallback_provider: Optional[str] = None
90
+
91
+ # Explainability β€” replaces old "rationale" string
92
+ explanation: dict = field(default_factory=dict)
93
+
94
+ # Mirrors old engine fields for pipeline compatibility
95
+ capability_score: float = 0.0 # overall capability of selected model
96
+ complexity_score: float = 0.0 # pass-through from QueryFeatures if available
97
+ tokens_saved: int = 0
98
+ compression_ratio: float = 0.0
99
+
100
+
101
+ # ── Budget mode β†’ cost penalty Ξ» ─────────────────────────────────────────────
102
+
103
+ BUDGET_LAMBDA = {
104
+ # Ξ» scales how much cost penalizes utility score
105
+ # higher Ξ» = cost matters more = cheaper models win more often
106
+ "cheap": 3.0,
107
+ "balanced": 1.2,
108
+ "quality": 0.3,
109
+ }
110
+
111
+ # Minimum acceptable utility score for a model to be considered
112
+ # (filters out wildly incapable models even if they're the only ones available)
113
+ MIN_UTILITY_THRESHOLD = 0.25
114
+
115
+
116
+ # ── Core Engine ───────────────────────────────────────────────────────────────
117
+
118
+ class UtilityOptimizationEngine:
119
+ """
120
+ Drop-in replacement for the old OptimizationEngine.
121
+
122
+ Instantiate once per request session (or per server lifecycle if keys are static).
123
+ Keys can be updated between requests via update_available_keys().
124
+ """
125
+
126
+ def __init__(
127
+ self,
128
+ registry_updater, # HybridRegistryUpdater instance
129
+ available_keys: dict, # {"openai": "sk-...", "anthropic": "sk-ant-..."}
130
+ include_ollama: bool = True # whether local Ollama counts as available
131
+ ):
132
+ self._updater = registry_updater
133
+ self._available_keys = {}
134
+ self._include_ollama = include_ollama
135
+ self.update_available_keys(available_keys)
136
+
137
+ def update_available_keys(self, keys: dict):
138
+ """
139
+ Call this when user's API keys change.
140
+ keys format: {"openai": "sk-...", "anthropic": "...", "ollama": "local", ...}
141
+ Providers with empty/None values are treated as unavailable.
142
+
143
+ Ollama is included only if:
144
+ - "ollama" key is explicitly in keys dict, OR
145
+ - include_ollama=True AND keys dict is empty (no cloud keys at all)
146
+ This prevents Ollama from silently dominating routing when the user
147
+ only provided cloud API keys.
148
+ """
149
+ self._available_keys = {
150
+ provider.lower(): key
151
+ for provider, key in keys.items()
152
+ if key and str(key).strip()
153
+ }
154
+
155
+ # Include Ollama only when explicitly requested or as last-resort fallback
156
+ ollama_explicitly_set = "ollama" in {k.lower() for k in keys}
157
+ no_cloud_keys = not any(
158
+ p in self._available_keys
159
+ for p in ("openai", "anthropic", "google", "mistral", "deepseek")
160
+ )
161
+
162
+ if self._include_ollama and (ollama_explicitly_set or no_cloud_keys):
163
+ self._available_keys.setdefault("ollama", "__local__")
164
+
165
+ logger.info(f"[Engine] Available providers: {list(self._available_keys.keys())}")
166
+
167
+ # ── Main routing method ───────────────────────────────────────────────────
168
+
169
+ def route(
170
+ self,
171
+ query_features, # QueryFeatures dataclass from QueryAnalyzer
172
+ budget_mode: str = "balanced",
173
+ constraints: dict = None,
174
+ force_refresh_registry: bool = False,
175
+ ) -> RoutingDecision:
176
+ """
177
+ Route a query to the best available model.
178
+
179
+ Args:
180
+ query_features: Output of QueryAnalyzer (QueryFeatures dataclass or dict)
181
+ budget_mode: "cheap" | "balanced" | "quality"
182
+ constraints: Optional hard overrides:
183
+ max_cost_per_request: float (USD)
184
+ max_latency_ms: int
185
+ min_context_tokens: int
186
+ exclude_providers: list[str]
187
+ only_providers: list[str]
188
+ require_tool_calling: bool
189
+ require_image_input: bool
190
+ require_json_mode: bool
191
+ force_refresh_registry: Force live registry refresh
192
+
193
+ Returns:
194
+ RoutingDecision (drop-in compatible with old engine output)
195
+ """
196
+ constraints = constraints or {}
197
+ budget_mode = budget_mode if budget_mode in BUDGET_LAMBDA else "balanced"
198
+
199
+ # 1. Get merged registry (baseline + live patch)
200
+ registry = self._updater.get_registry(force_refresh=force_refresh_registry)
201
+ all_models = registry.get("models", {})
202
+
203
+ # 2. Build query utility profile from query features
204
+ profile = self._build_utility_profile(query_features, constraints)
205
+
206
+ # 3. Resolve available candidate pool (BYOK filter)
207
+ candidates = self._filter_by_availability(all_models, constraints)
208
+
209
+ if not candidates:
210
+ raise RuntimeError(
211
+ "No models available. Please provide at least one valid API key "
212
+ "(OpenAI, Anthropic, Google, Mistral, DeepSeek) or run Ollama locally."
213
+ )
214
+
215
+ # 4. Apply hard constraints (context window, features, cost cap)
216
+ viable = self._apply_hard_constraints(candidates, profile, constraints)
217
+
218
+ if not viable:
219
+ # Relax hard constraints partially β€” fall back to best available
220
+ logger.warning(
221
+ "[Engine] No models passed hard constraints. "
222
+ "Relaxing cost/latency caps and retrying."
223
+ )
224
+ relaxed_constraints = {
225
+ k: v for k, v in constraints.items()
226
+ if k not in ("max_cost_per_request", "max_latency_ms")
227
+ }
228
+ viable = self._apply_hard_constraints(candidates, profile, relaxed_constraints)
229
+
230
+ if not viable:
231
+ # Last resort: use all available candidates
232
+ logger.warning("[Engine] Using all available candidates (no constraints).")
233
+ viable = candidates
234
+
235
+ # 5. Score each viable model by utility
236
+ scored = self._score_candidates(viable, profile, budget_mode)
237
+
238
+ if not scored:
239
+ raise RuntimeError("Scoring produced no results. Check model registry integrity.")
240
+
241
+ # Sort: highest utility first
242
+ scored.sort(key=lambda x: x[1], reverse=True)
243
+
244
+ best_id, best_score = scored[0]
245
+ best_spec = viable[best_id]
246
+
247
+ fallback_id = None
248
+ fallback_prov = None
249
+ if len(scored) > 1:
250
+ fallback_id = scored[1][0]
251
+ fallback_prov = viable[fallback_id]["provider"]
252
+
253
+ # 6. Estimate request cost
254
+ est_cost = self._estimate_cost(best_spec, profile)
255
+
256
+ # 7. Build explanation
257
+ explanation = self._build_explanation(
258
+ scored, viable, profile, budget_mode, constraints, best_id
259
+ )
260
+
261
+ return RoutingDecision(
262
+ model_id=best_id,
263
+ provider=best_spec["provider"],
264
+ utility_score=round(best_score, 4),
265
+ estimated_cost=round(est_cost, 8),
266
+ input_cost_per_1k=best_spec["input_cost_per_1k"],
267
+ output_cost_per_1k=best_spec["output_cost_per_1k"],
268
+ context_window=best_spec["context_window"],
269
+ fallback_model_id=fallback_id,
270
+ fallback_provider=fallback_prov,
271
+ explanation=explanation,
272
+ capability_score=self._overall_capability(best_spec),
273
+ )
274
+
275
+ # ── Step 2: Build Query Utility Profile ───────────────────────────────────
276
+
277
+ def _build_utility_profile(self, qf, constraints: dict) -> QueryUtilityProfile:
278
+ """
279
+ Convert QueryFeatures β†’ QueryUtilityProfile.
280
+ Works with both QueryFeatures dataclass and plain dict.
281
+ Uses domain flags to derive per-dimension importance weights.
282
+ """
283
+
284
+ # Normalize input β€” support both dataclass and dict
285
+ def g(attr, default=False):
286
+ if isinstance(qf, dict):
287
+ return qf.get(attr, default)
288
+ return getattr(qf, attr, default)
289
+
290
+ profile = QueryUtilityProfile()
291
+
292
+ # ── Dimension weights from domain flags ──────────────────────────────
293
+ # These are NOT boolean β€” they express HOW IMPORTANT each dim is.
294
+ # Multiple domains can be active simultaneously.
295
+
296
+ if g("domain_reasoning") or g("requires_analysis") or g("requires_debate"):
297
+ profile.reasoning = 0.85
298
+ profile.factuality = 0.70
299
+
300
+ if g("domain_code") or g("domain_coding"):
301
+ profile.coding = 0.90
302
+ profile.reasoning = max(profile.reasoning, 0.60)
303
+ profile.instruction_following = max(profile.instruction_following, 0.70)
304
+
305
+ if g("domain_math"):
306
+ profile.math = 0.90
307
+ profile.reasoning = max(profile.reasoning, 0.70)
308
+
309
+ if g("domain_creative") or g("domain_creative_writing"):
310
+ profile.creativity = 0.88
311
+ profile.instruction_following = max(profile.instruction_following, 0.60)
312
+
313
+ if g("domain_factual") or g("domain_science"):
314
+ profile.factuality = max(profile.factuality, 0.80)
315
+ profile.reasoning = max(profile.reasoning, 0.55)
316
+
317
+ if g("domain_summarization"):
318
+ profile.summarization = 0.85
319
+ profile.long_context = 0.60
320
+
321
+ if g("domain_translation") or g("domain_multilingual"):
322
+ profile.multilingual = 0.90
323
+ profile.factuality = max(profile.factuality, 0.60)
324
+
325
+ if g("domain_conversational") or g("domain_factual"):
326
+ profile.conversation = 0.70
327
+
328
+ # Multi-step / complex reasoning boost
329
+ if g("multi_step") or g("requires_comparison"):
330
+ profile.reasoning = min(1.0, profile.reasoning + 0.15)
331
+
332
+ # Expert-level signal β€” raise the bar on all active dimensions
333
+ if g("_expert_signal") or g("expert_signal"):
334
+ for dim in ["reasoning", "coding", "math"]:
335
+ val = getattr(profile, dim)
336
+ if val > 0:
337
+ setattr(profile, dim, min(1.0, val + 0.10))
338
+
339
+ # Tool use requirement
340
+ if g("requires_tool_use") or g("has_tool_calls") or constraints.get("require_tool_calling"):
341
+ profile.tool_use = 0.80
342
+ profile.requires_tool_calling = True
343
+
344
+ # Image input requirement
345
+ if g("has_image") or constraints.get("require_image_input"):
346
+ profile.requires_image_input = True
347
+
348
+ # JSON mode requirement
349
+ if g("requires_json") or constraints.get("require_json_mode"):
350
+ profile.requires_json_mode = True
351
+
352
+ # Context window requirement
353
+ token_count = g("token_count", 0)
354
+ min_ctx = constraints.get("min_context_tokens", 0)
355
+ profile.min_context_tokens = max(
356
+ int(min_ctx),
357
+ int(token_count * 3) # conservative: input tokens Γ— 3 headroom
358
+ )
359
+ profile.estimated_input_tokens = max(int(token_count), 100)
360
+
361
+ # Estimate output length
362
+ output_len_map = {
363
+ "short": 300,
364
+ "medium": 700,
365
+ "long": 1500,
366
+ "very_long": 3000,
367
+ }
368
+ est_output = g("estimated_output_length", "medium")
369
+ profile.estimated_output_tokens = output_len_map.get(
370
+ str(est_output).lower(), 700
371
+ )
372
+
373
+ # Primary domain label
374
+ domain_priority = [
375
+ ("domain_code", "coding"),
376
+ ("domain_coding", "coding"),
377
+ ("domain_math", "math"),
378
+ ("domain_reasoning", "reasoning"),
379
+ ("domain_creative", "creative"),
380
+ ("domain_science", "science"),
381
+ ("domain_summarization", "summarization"),
382
+ ("domain_translation", "translation"),
383
+ ("domain_factual", "factual"),
384
+ ]
385
+ for flag, label in domain_priority:
386
+ if g(flag):
387
+ profile.primary_domain = label
388
+ break
389
+
390
+ return profile
391
+
392
+ # ── Step 3: BYOK Provider Filter ─────────────────────────────────────────
393
+
394
+ def _filter_by_availability(self, all_models: dict, constraints: dict) -> dict:
395
+ """
396
+ Filter models to only those whose provider has an available API key.
397
+
398
+ Respects:
399
+ - available_keys (BYOK)
400
+ - constraints["exclude_providers"]
401
+ - constraints["only_providers"]
402
+ """
403
+ exclude = {p.lower() for p in constraints.get("exclude_providers", [])}
404
+ only = {p.lower() for p in constraints.get("only_providers", [])} \
405
+ if constraints.get("only_providers") else None
406
+
407
+ available = {}
408
+ for mid, spec in all_models.items():
409
+ provider = spec.get("provider", "").lower()
410
+
411
+ # Must have a key for this provider
412
+ if provider not in self._available_keys:
413
+ continue
414
+
415
+ # Respect exclude list
416
+ if provider in exclude:
417
+ continue
418
+
419
+ # Respect only list
420
+ if only and provider not in only:
421
+ continue
422
+
423
+ available[mid] = spec
424
+
425
+ logger.debug(
426
+ f"[Engine] Available candidate pool: {len(available)} models "
427
+ f"from providers: {set(s['provider'] for s in available.values())}"
428
+ )
429
+ return available
430
+
431
+ # ── Step 4: Hard Constraints Filter ──────────────────────────────────────
432
+
433
+ def _apply_hard_constraints(
434
+ self, candidates: dict, profile: QueryUtilityProfile, constraints: dict
435
+ ) -> dict:
436
+ """
437
+ Filter candidates by hard constraints that are non-negotiable.
438
+ Returns a potentially empty dict β€” caller handles the empty case.
439
+ """
440
+ viable = {}
441
+ max_cost = constraints.get("max_cost_per_request") # USD
442
+ max_latency = constraints.get("max_latency_ms") # ms
443
+
444
+ for mid, spec in candidates.items():
445
+ # Context window check
446
+ if spec.get("context_window", 0) < profile.min_context_tokens:
447
+ logger.debug(f"[Filter] {mid}: context too small "
448
+ f"({spec['context_window']} < {profile.min_context_tokens})")
449
+ continue
450
+
451
+ # Feature: tool calling
452
+ if profile.requires_tool_calling and not spec.get("features", {}).get("tool_calling"):
453
+ logger.debug(f"[Filter] {mid}: no tool_calling support")
454
+ continue
455
+
456
+ # Feature: image input
457
+ if profile.requires_image_input and not spec.get("features", {}).get("image_input"):
458
+ logger.debug(f"[Filter] {mid}: no image_input support")
459
+ continue
460
+
461
+ # Feature: json mode
462
+ if profile.requires_json_mode and not spec.get("features", {}).get("json_mode"):
463
+ logger.debug(f"[Filter] {mid}: no json_mode support")
464
+ continue
465
+
466
+ # Cost cap
467
+ if max_cost is not None:
468
+ est = self._estimate_cost(spec, profile)
469
+ if est > max_cost:
470
+ logger.debug(f"[Filter] {mid}: cost {est:.6f} > cap {max_cost}")
471
+ continue
472
+
473
+ # Latency cap
474
+ if max_latency is not None:
475
+ if spec.get("avg_latency_ms", 99999) > max_latency:
476
+ logger.debug(f"[Filter] {mid}: latency too high")
477
+ continue
478
+
479
+ viable[mid] = spec
480
+
481
+ return viable
482
+
483
+ # ── Step 5: Utility Scoring ───────────────────────────────────────────────
484
+
485
+ def _score_candidates(
486
+ self, candidates: dict, profile: QueryUtilityProfile, budget_mode: str
487
+ ) -> list:
488
+ """
489
+ Score each candidate model with:
490
+
491
+ U(m, q) = (Ξ£ wα΅’ Β· capα΅’(m)) / (Ξ£ wα΅’) βˆ’ Ξ» Β· cost_norm(m)
492
+
493
+ where:
494
+ wα΅’ = importance weight for capability dimension i (from profile)
495
+ capα΅’(m) = model m's score on dimension i (0–1, from registry)
496
+ Ξ» = budget penalty (from BUDGET_LAMBDA)
497
+ cost_norm = model's estimated request cost normalized across candidates
498
+
499
+ Returns list of (model_id, utility_score) tuples.
500
+ """
501
+ lam = BUDGET_LAMBDA[budget_mode]
502
+
503
+ # Dimension weights from profile
504
+ dimension_weights = {
505
+ "reasoning": profile.reasoning,
506
+ "coding": profile.coding,
507
+ "math": profile.math,
508
+ "creativity": profile.creativity,
509
+ "factuality": profile.factuality,
510
+ "instruction_following": profile.instruction_following,
511
+ "long_context": profile.long_context,
512
+ "multilingual": profile.multilingual,
513
+ "tool_use": profile.tool_use,
514
+ "summarization": profile.summarization,
515
+ "conversation": profile.conversation,
516
+ }
517
+
518
+ # Only keep dimensions with non-zero weight
519
+ active_dims = {k: w for k, w in dimension_weights.items() if w > 0}
520
+ total_weight = sum(active_dims.values())
521
+ if total_weight == 0:
522
+ # Pathological case: no signals β€” use instruction_following as baseline
523
+ active_dims = {"instruction_following": 1.0, "conversation": 0.5}
524
+ total_weight = 1.5
525
+
526
+ # Compute raw costs for normalization
527
+ costs = {
528
+ mid: self._estimate_cost(spec, profile)
529
+ for mid, spec in candidates.items()
530
+ }
531
+
532
+ # Log-scale normalization: separates $0.0001 from $0.003 from $0.020
533
+ # meaningfully β€” linear scale collapses these differences when one
534
+ # expensive model anchors the range.
535
+ # Free models (Ollama, cost=0) stay at cost_norm=0.
536
+ LOG_EPS = 1e-7 # prevents log(0); smaller than any real API cost
537
+ log_costs = {mid: math.log(c + LOG_EPS) for mid, c in costs.items()}
538
+ log_max = max(log_costs.values())
539
+ log_min = min(log_costs.values())
540
+ log_range = max(log_max - log_min, 1e-9)
541
+
542
+ scored = []
543
+ for mid, spec in candidates.items():
544
+ caps = spec.get("capabilities", {})
545
+
546
+ # Weighted capability sum
547
+ cap_sum = sum(
548
+ w * caps.get(dim, 0.0)
549
+ for dim, w in active_dims.items()
550
+ )
551
+ cap_score = cap_sum / total_weight # normalized to [0, 1]
552
+
553
+ # Cost normalization on log scale (0 = cheapest, 1 = most expensive)
554
+ cost_norm = (log_costs[mid] - log_min) / log_range
555
+
556
+ # Final utility
557
+ utility = cap_score - (lam * cost_norm) / (1 + lam)
558
+ # The division by (1+lam) prevents Ξ» from pushing utility below 0
559
+ # for genuinely capable but expensive models
560
+
561
+ # Provider-tier adjustment:
562
+ # Ollama (free, local) is great for "cheap" mode but should not
563
+ # dominate "balanced" or "quality" modes β€” local inference has
564
+ # higher latency variance and lower reliability than cloud APIs.
565
+ provider = spec.get("provider", "")
566
+ if provider == "ollama":
567
+ # At cheap(Ξ»=3): penaltyβ‰ˆ0.0 | balanced(Ξ»=1.2): β‰ˆ0.04 | quality(Ξ»=0.3): β‰ˆ0.10
568
+ ollama_penalty = 0.12 / (1 + lam)
569
+ utility -= ollama_penalty
570
+
571
+ if utility >= MIN_UTILITY_THRESHOLD or len(candidates) <= 2:
572
+ scored.append((mid, utility))
573
+
574
+ logger.debug(
575
+ f"[Score] {mid}: cap={cap_score:.3f} cost_norm={cost_norm:.3f} "
576
+ f"U={utility:.4f} (Ξ»={lam})"
577
+ )
578
+
579
+ return scored
580
+
581
+ # ── Helpers ───────────────────────────────────────────────────────────────
582
+
583
+ def _estimate_cost(self, spec: dict, profile: QueryUtilityProfile) -> float:
584
+ """Estimate USD cost for one request with this model."""
585
+ in_cost = spec.get("input_cost_per_1k", 0) * profile.estimated_input_tokens / 1000
586
+ out_cost = spec.get("output_cost_per_1k", 0) * profile.estimated_output_tokens / 1000
587
+ return in_cost + out_cost
588
+
589
+ def _overall_capability(self, spec: dict) -> float:
590
+ """Single overall capability score for a model (for legacy field compatibility)."""
591
+ caps = spec.get("capabilities", {})
592
+ weights = {"reasoning": 0.30, "coding": 0.25, "math": 0.15,
593
+ "instruction_following": 0.15, "factuality": 0.15}
594
+ return round(
595
+ sum(caps.get(k, 0) * w for k, w in weights.items()), 4
596
+ )
597
+
598
+ def _build_explanation(
599
+ self,
600
+ scored: list,
601
+ viable: dict,
602
+ profile: QueryUtilityProfile,
603
+ budget_mode: str,
604
+ constraints: dict,
605
+ winner_id: str,
606
+ ) -> dict:
607
+ """Build the full explainability dict β€” replaces old rationale string."""
608
+ winner_spec = viable[winner_id]
609
+
610
+ # Top 4 capability dimensions that drove this decision
611
+ dim_weights = {
612
+ "reasoning": profile.reasoning,
613
+ "coding": profile.coding,
614
+ "math": profile.math,
615
+ "creativity": profile.creativity,
616
+ "factuality": profile.factuality,
617
+ "instruction_following": profile.instruction_following,
618
+ "long_context": profile.long_context,
619
+ "multilingual": profile.multilingual,
620
+ "tool_use": profile.tool_use,
621
+ "summarization": profile.summarization,
622
+ "conversation": profile.conversation,
623
+ }
624
+ top_dims = sorted(
625
+ [(k, v) for k, v in dim_weights.items() if v > 0],
626
+ key=lambda x: x[1], reverse=True
627
+ )[:4]
628
+
629
+ # Shortlist with scores
630
+ shortlist = [
631
+ {
632
+ "model_id": mid,
633
+ "provider": viable[mid]["provider"],
634
+ "utility_score": round(score, 4),
635
+ "capability": self._overall_capability(viable[mid]),
636
+ "est_cost_usd": round(self._estimate_cost(viable[mid], profile), 8),
637
+ }
638
+ for mid, score in scored[:5]
639
+ ]
640
+
641
+ return {
642
+ "selected_model": winner_id,
643
+ "provider": winner_spec["provider"],
644
+ "budget_mode": budget_mode,
645
+ "lambda": BUDGET_LAMBDA[budget_mode],
646
+ "primary_domain": profile.primary_domain,
647
+ "query_dimensions": {k: round(v, 2) for k, v in top_dims},
648
+ "hard_constraints_applied": {
649
+ k: v for k, v in constraints.items()
650
+ if k in ("max_cost_per_request", "max_latency_ms", "min_context_tokens")
651
+ },
652
+ "feature_requirements": {
653
+ "tool_calling": profile.requires_tool_calling,
654
+ "image_input": profile.requires_image_input,
655
+ "json_mode": profile.requires_json_mode,
656
+ "min_context": profile.min_context_tokens,
657
+ },
658
+ "estimated_tokens": {
659
+ "input": profile.estimated_input_tokens,
660
+ "output": profile.estimated_output_tokens,
661
+ },
662
+ "shortlist": shortlist,
663
+ "candidates_evaluated": len(scored),
664
+ "registry_source": winner_spec.get("live_patch", {}).get("source", "baseline"),
665
+ }
llmopt/registry/__init__.py CHANGED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ """LLMOpt registry package β€” hybrid model registry."""
2
+ from llmopt.registry.hybrid_updater import HybridRegistryUpdater
3
+
4
+ __all__ = ["HybridRegistryUpdater"]
llmopt/registry/hybrid_updater.py ADDED
@@ -0,0 +1,267 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ LLMOpt β€” Hybrid Registry Updater
3
+ =================================
4
+ Strategy:
5
+ 1. Load data/model_registry_v2.json as the authoritative capability baseline
6
+ (benchmark scores, context windows, feature support)
7
+ 2. Fetch live data from OpenRouter API to patch:
8
+ - Current pricing (input/output cost per 1k)
9
+ - Model availability (is it still listed?)
10
+ - Any new models to flag for manual addition
11
+ 3. Merge: registry baseline + live patch β†’ runtime model pool
12
+ 4. Cache the merged result for TTL minutes to avoid hammering the API
13
+
14
+ This runs at startup and on a background refresh cycle.
15
+ """
16
+
17
+ import json
18
+ import time
19
+ import logging
20
+ import os
21
+ import copy
22
+ from datetime import datetime, timezone
23
+ from pathlib import Path
24
+ from typing import Optional
25
+
26
+ try:
27
+ import requests
28
+ REQUESTS_AVAILABLE = True
29
+ except ImportError:
30
+ REQUESTS_AVAILABLE = False
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+ # ── Constants ────────────────────────────────────────────────────────────────
35
+
36
+ # V2 registry β€” benchmark-derived capability vectors
37
+ REGISTRY_PATH = Path(__file__).parent.parent.parent / "data" / "model_registry_v2.json"
38
+
39
+ OPENROUTER_MODELS_URL = "https://openrouter.ai/api/v1/models"
40
+ CACHE_TTL_SECONDS = 1800 # 30 minutes β€” pricing changes infrequently
41
+
42
+
43
+ # ── Provider β†’ OpenRouter prefix map ─────────────────────────────────────────
44
+
45
+ # OpenRouter model IDs follow the pattern: "provider/model-name"
46
+ # This maps our registry provider names to OpenRouter's prefix scheme
47
+ PROVIDER_PREFIX_MAP = {
48
+ "openai": "openai/",
49
+ "anthropic": "anthropic/",
50
+ "google": "google/",
51
+ "mistral": "mistral/",
52
+ "deepseek": "deepseek/",
53
+ "meta": "meta-llama/",
54
+ }
55
+
56
+ # Maps our registry model_id β†’ OpenRouter model id (where they differ)
57
+ MODEL_ID_ALIASES = {
58
+ "gpt-4o": "openai/gpt-4o",
59
+ "gpt-4o-mini": "openai/gpt-4o-mini",
60
+ "gpt-4.1": "openai/gpt-4.1",
61
+ "gpt-4.1-mini": "openai/gpt-4.1-mini",
62
+ "claude-opus-4-5": "anthropic/claude-opus-4-5",
63
+ "claude-sonnet-4-5": "anthropic/claude-sonnet-4-5",
64
+ "claude-haiku-3-5": "anthropic/claude-3-5-haiku",
65
+ "gemini-2.5-pro": "google/gemini-2.5-pro",
66
+ "gemini-2.5-flash": "google/gemini-2.5-flash",
67
+ "gemini-1.5-flash": "google/gemini-2.5-flash",
68
+ "mistral-large-latest": "mistral/mistral-large",
69
+ "mistral-small-latest": "mistral/mistral-small",
70
+ "deepseek-chat": "deepseek/deepseek-chat",
71
+ "deepseek-reasoner": "deepseek/deepseek-r1",
72
+ # Ollama is local β€” no OpenRouter equivalent
73
+ }
74
+
75
+
76
+ # ── Main Updater Class ────────────────────────────────────────────────────────
77
+
78
+ class HybridRegistryUpdater:
79
+ """
80
+ Loads the registry JSON baseline and patches it with live OpenRouter data.
81
+
82
+ Usage:
83
+ updater = HybridRegistryUpdater()
84
+ registry = updater.get_registry() # always returns a valid registry
85
+ """
86
+
87
+ def __init__(self, openrouter_api_key: Optional[str] = None):
88
+ self._baseline: dict = {}
89
+ self._live_patch: dict = {} # openrouter_model_id β†’ pricing dict
90
+ self._merged: dict = {} # final merged runtime registry
91
+ self._cache_timestamp: float = 0.0
92
+ self._openrouter_key = openrouter_api_key or os.getenv("OPENROUTER_API_KEY", "")
93
+
94
+ # Load baseline immediately (synchronous β€” always available)
95
+ self._load_baseline()
96
+
97
+ # ── Public API ────────────────────────────────────────────────────────────
98
+
99
+ def get_registry(self, force_refresh: bool = False) -> dict:
100
+ """
101
+ Returns the merged registry dict.
102
+ Refreshes live patch if cache is stale or force_refresh=True.
103
+ Falls back gracefully to baseline if live fetch fails.
104
+ """
105
+ now = time.time()
106
+ cache_expired = (now - self._cache_timestamp) > CACHE_TTL_SECONDS
107
+
108
+ if force_refresh or cache_expired or not self._merged:
109
+ self._refresh_live_patch()
110
+ self._build_merged()
111
+ self._cache_timestamp = now
112
+
113
+ return self._merged
114
+
115
+ def get_model(self, model_id: str) -> Optional[dict]:
116
+ """Returns a single model's merged spec, or None if not found."""
117
+ registry = self.get_registry()
118
+ return registry.get("models", {}).get(model_id)
119
+
120
+ def list_available_for_providers(self, available_providers: set) -> dict:
121
+ """
122
+ Returns only models whose provider is in available_providers.
123
+ 'ollama' is always included if it's in the registry (local, no key needed unless specified).
124
+ """
125
+ registry = self.get_registry()
126
+ return {
127
+ mid: spec
128
+ for mid, spec in registry.get("models", {}).items()
129
+ if spec.get("provider") in available_providers
130
+ }
131
+
132
+ def get_last_updated(self) -> str:
133
+ return datetime.fromtimestamp(self._cache_timestamp, tz=timezone.utc).isoformat() \
134
+ if self._cache_timestamp else "never"
135
+
136
+ # ── Internal: Load Baseline ───────────────────────────────────────────────
137
+
138
+ def _load_baseline(self):
139
+ """Load registry JSON from disk. Dies loudly if missing β€” it's required."""
140
+ if not REGISTRY_PATH.exists():
141
+ raise FileNotFoundError(
142
+ f"Model registry not found at {REGISTRY_PATH}. "
143
+ "This file is required for LLMOpt to function."
144
+ )
145
+ with open(REGISTRY_PATH, "r") as f:
146
+ self._baseline = json.load(f)
147
+ logger.info(
148
+ f"[Registry] Loaded baseline: {len(self._baseline.get('models', {}))} models"
149
+ )
150
+
151
+ # ── Internal: Live Patch from OpenRouter ─────────────────────────────────
152
+
153
+ def _refresh_live_patch(self):
154
+ """
155
+ Fetch current model list + pricing from OpenRouter.
156
+ Stores results in self._live_patch keyed by openrouter model id.
157
+ Silently skips on any error β€” baseline is always the fallback.
158
+ """
159
+ if not REQUESTS_AVAILABLE:
160
+ logger.warning("[Registry] 'requests' not installed. Skipping live patch.")
161
+ return
162
+
163
+ headers = {"Content-Type": "application/json"}
164
+ if self._openrouter_key:
165
+ headers["Authorization"] = f"Bearer {self._openrouter_key}"
166
+
167
+ try:
168
+ resp = requests.get(
169
+ OPENROUTER_MODELS_URL,
170
+ headers=headers,
171
+ timeout=8
172
+ )
173
+ resp.raise_for_status()
174
+ data = resp.json()
175
+ except Exception as e:
176
+ logger.warning(f"[Registry] Live fetch failed: {e}. Using baseline only.")
177
+ return
178
+
179
+ patch = {}
180
+ for model in data.get("data", []):
181
+ model_id = model.get("id", "")
182
+ pricing = model.get("pricing", {})
183
+
184
+ # OpenRouter returns pricing as strings like "0.000002" per token
185
+ # We normalize to per-1k-token cost (float)
186
+ try:
187
+ input_per_token = float(pricing.get("prompt", 0) or 0)
188
+ output_per_token = float(pricing.get("completion", 0) or 0)
189
+ input_per_1k = round(input_per_token * 1000, 8)
190
+ output_per_1k = round(output_per_token * 1000, 8)
191
+ except (ValueError, TypeError):
192
+ continue
193
+
194
+ patch[model_id] = {
195
+ "input_cost_per_1k": input_per_1k,
196
+ "output_cost_per_1k": output_per_1k,
197
+ "context_window": model.get("context_length"),
198
+ "available_on_openrouter": True,
199
+ "fetched_at": datetime.now(timezone.utc).isoformat(),
200
+ }
201
+
202
+ self._live_patch = patch
203
+ logger.info(f"[Registry] Live patch: {len(patch)} models from OpenRouter")
204
+
205
+ # Flag new models we don't have in registry (for manual review)
206
+ known_or_ids = set(MODEL_ID_ALIASES.values())
207
+ for or_id in patch:
208
+ if or_id not in known_or_ids:
209
+ logger.debug(f"[Registry] Unknown OpenRouter model (not in registry): {or_id}")
210
+
211
+ # ── Internal: Build Merged Registry ──────────────────────────────────────
212
+
213
+ def _build_merged(self):
214
+ """
215
+ Merge baseline + live_patch into self._merged.
216
+
217
+ Merge rules:
218
+ - Capability scores: always from baseline (benchmark-sourced, stable)
219
+ - Feature support: always from baseline
220
+ - Pricing (cost/1k): live_patch wins if available, else baseline
221
+ - Context window: live_patch wins if non-null, else baseline
222
+ - live_patch metadata: stored in model["live_patch"] for observability
223
+ """
224
+ merged = copy.deepcopy(self._baseline)
225
+ models = merged.get("models", {})
226
+
227
+ for our_model_id, spec in models.items():
228
+ # Find the OpenRouter ID for this model
229
+ or_id = MODEL_ID_ALIASES.get(our_model_id)
230
+ if or_id and or_id in self._live_patch:
231
+ patch = self._live_patch[or_id]
232
+
233
+ # Price override
234
+ if patch.get("input_cost_per_1k") is not None:
235
+ spec["input_cost_per_1k"] = patch["input_cost_per_1k"]
236
+ if patch.get("output_cost_per_1k") is not None:
237
+ spec["output_cost_per_1k"] = patch["output_cost_per_1k"]
238
+
239
+ # Context window override (OpenRouter may have more accurate values)
240
+ if patch.get("context_window"):
241
+ spec["context_window"] = patch["context_window"]
242
+
243
+ # Store patch metadata for explainability
244
+ spec["live_patch"] = {
245
+ "source": "openrouter",
246
+ "fetched_at": patch.get("fetched_at"),
247
+ "input_cost_per_1k": patch["input_cost_per_1k"],
248
+ "output_cost_per_1k": patch["output_cost_per_1k"],
249
+ }
250
+ else:
251
+ spec["live_patch"] = {"source": "baseline_only"}
252
+
253
+ merged["_runtime_meta"] = {
254
+ "last_live_fetch": datetime.now(timezone.utc).isoformat(),
255
+ "live_models_patched": sum(
256
+ 1 for s in models.values()
257
+ if s.get("live_patch", {}).get("source") == "openrouter"
258
+ ),
259
+ "total_models": len(models),
260
+ }
261
+
262
+ self._merged = merged
263
+ logger.info(
264
+ f"[Registry] Merged registry ready: "
265
+ f"{merged['_runtime_meta']['live_models_patched']} live-patched, "
266
+ f"{merged['_runtime_meta']['total_models']} total"
267
+ )
llmopt/router/model_router.py CHANGED
@@ -53,26 +53,44 @@ class RoutedResponse:
53
 
54
  # LiteLLM uses "provider/model" strings for non-OpenAI providers
55
  _LITELLM_MODEL_MAP = {
56
- # OpenAI β€” no prefix needed
57
- "gpt-4o": "gpt-4o",
58
- "gpt-4o-mini": "gpt-4o-mini",
59
- "gpt-3.5-turbo": "gpt-3.5-turbo",
60
- # Anthropic
61
- "claude-3-5-haiku-20241022": "claude-3-5-haiku-20241022",
62
- "claude-3-5-sonnet-20241022": "claude-3-5-sonnet-20241022",
63
- "claude-3-haiku-20240307": "claude-3-haiku-20240307",
64
- # Google
65
- "gemini-1.5-flash": "gemini/gemini-1.5-flash",
66
- "gemini-1.5-pro": "gemini/gemini-1.5-pro",
67
- # Mistral
68
- "mistral-small-latest": "mistral/mistral-small-latest",
 
 
 
 
 
 
 
 
 
 
 
69
  "mistral-large-latest": "mistral/mistral-large-latest",
70
- # DeepSeek
71
- "deepseek-chat": "deepseek/deepseek-chat",
72
- # Ollama β€” handled separately
73
- "llama3.2:3b": "ollama/llama3.2:3b",
74
- "llama3.1:8b": "ollama/llama3.1:8b",
75
- "llama3.1:70b": "ollama/llama3.1:70b",
 
 
 
 
 
 
 
76
  }
77
 
78
  _OLLAMA_PROVIDER = "ollama"
 
53
 
54
  # LiteLLM uses "provider/model" strings for non-OpenAI providers
55
  _LITELLM_MODEL_MAP = {
56
+ # ── OpenAI ───────────────────────────────────────────────────────────────
57
+ # no prefix needed for OpenAI models
58
+ "gpt-4o": "gpt-4o",
59
+ "gpt-4o-mini": "gpt-4o-mini",
60
+ "gpt-4.1": "gpt-4.1",
61
+ "gpt-4.1-mini": "gpt-4.1-mini",
62
+ "gpt-3.5-turbo": "gpt-3.5-turbo",
63
+
64
+ # ── Anthropic ────────────────────────────────────────────────────────────
65
+ "claude-opus-4-5": "anthropic/claude-opus-4-5",
66
+ "claude-sonnet-4-5": "anthropic/claude-sonnet-4-5",
67
+ "claude-haiku-3-5": "anthropic/claude-3-5-haiku-20241022",
68
+ # Legacy Anthropic IDs (V1 registry)
69
+ "claude-3-5-haiku-20241022": "claude-3-5-haiku-20241022",
70
+ "claude-3-5-sonnet-20241022": "claude-3-5-sonnet-20241022",
71
+ "claude-3-haiku-20240307": "claude-3-haiku-20240307",
72
+
73
+ # ── Google ───────────────────────────────────────────────────────────────
74
+ "gemini-2.5-pro": "gemini/gemini-2.5-pro",
75
+ "gemini-2.5-flash": "gemini/gemini-2.5-flash",
76
+ "gemini-1.5-flash": "gemini/gemini-2.5-flash",
77
+ "gemini-1.5-pro": "gemini/gemini-2.5-pro",
78
+
79
+ # ── Mistral ──────────────────────────────────────────────────────────────
80
  "mistral-large-latest": "mistral/mistral-large-latest",
81
+ "mistral-small-latest": "mistral/mistral-small-latest",
82
+
83
+ # ── DeepSeek ─────────────────────────────────────────────────────────────
84
+ "deepseek-chat": "deepseek/deepseek-chat",
85
+ "deepseek-reasoner": "deepseek/deepseek-reasoner",
86
+
87
+ # ── Ollama (local) ───────────────────────────────────────────────────────
88
+ "llama3.3-70b": "ollama/llama3.3:70b",
89
+ "llama3.2-vision": "ollama/llama3.2-vision",
90
+ # Legacy Ollama IDs (V1 registry)
91
+ "llama3.2:3b": "ollama/llama3.2:3b",
92
+ "llama3.1:8b": "ollama/llama3.1:8b",
93
+ "llama3.1:70b": "ollama/llama3.1:70b",
94
  }
95
 
96
  _OLLAMA_PROVIDER = "ollama"
llmopt/updater/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ """LLMOpt updater package β€” adaptive runtime statistics."""
2
+ from llmopt.updater.adaptive_updater import AdaptiveRuntimeUpdater
3
+
4
+ __all__ = ["AdaptiveRuntimeUpdater"]
llmopt/updater/adaptive_updater.py ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ LLMOpt β€” Adaptive Runtime Statistics (EMA-Based)
3
+ =================================================
4
+ Lightweight online learning β€” NO RL, NO Optuna, NO GPU.
5
+
6
+ What this updates at runtime:
7
+ - avg_latency_ms (per model, exponential moving average)
8
+ - provider_reliability (rolling success rate)
9
+ - observed_utility (quality Γ— cost-efficiency product, EMA)
10
+
11
+ These stats are combined with registry capability scores at routing time
12
+ to produce small dynamic adjustments. They do NOT overwrite benchmark scores.
13
+
14
+ Formula:
15
+ EMA update: s_new = Ξ± Β· s_old + (1 βˆ’ Ξ±) Β· x_observed
16
+ where Ξ± = momentum (0.85–0.95 for stability)
17
+
18
+ Confidence decay:
19
+ If a model hasn't been observed recently, its runtime adjustment
20
+ fades back toward 0 (no adjustment), so baseline registry scores take over.
21
+
22
+ Storage: Simple JSON file (no DB needed for MVP).
23
+ Can be swapped for Redis or SQLite later.
24
+ """
25
+
26
+ import json
27
+ import math
28
+ import logging
29
+ import os
30
+ import time
31
+ from dataclasses import dataclass, field, asdict
32
+ from datetime import datetime, timezone
33
+ from pathlib import Path
34
+ from typing import Optional
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+ # Runtime stats persisted to data/ at project root
39
+ STATS_PATH = Path(__file__).parent.parent.parent / "data" / "runtime_stats.json"
40
+
41
+ # EMA momentum β€” higher = slower to update (more stable)
42
+ # 0.90 means new obs counts for 10% of the new value
43
+ LATENCY_ALPHA = 0.90
44
+ RELIABILITY_ALPHA = 0.92
45
+ UTILITY_ALPHA = 0.88
46
+
47
+ # After this many seconds without an observation, decay confidence to 0
48
+ CONFIDENCE_DECAY_HALF_LIFE_SECONDS = 3 * 24 * 3600 # 3 days
49
+
50
+ # Minimum observations before runtime stats influence routing
51
+ MIN_OBS_FOR_INFLUENCE = 5
52
+
53
+
54
+ @dataclass
55
+ class ModelRuntimeStats:
56
+ model_id: str
57
+ obs_count: int = 0
58
+ ema_latency_ms: float = 0.0 # 0 = no data yet
59
+ ema_reliability: float = 1.0 # starts optimistic
60
+ ema_utility: float = 0.0 # 0 = no feedback yet
61
+ last_observed_ts: float = 0.0 # unix timestamp
62
+ confidence: float = 0.0 # 0–1, grows with observations
63
+
64
+ # Raw accumulators for logging
65
+ total_successes: int = 0
66
+ total_failures: int = 0
67
+ total_requests: int = 0
68
+
69
+
70
+ class AdaptiveRuntimeUpdater:
71
+ """
72
+ Tracks per-model runtime statistics and provides small adjustments
73
+ to utility scores during routing.
74
+
75
+ Usage:
76
+ updater = AdaptiveRuntimeUpdater()
77
+ updater.record_outcome(model_id, latency_ms=1200, success=True, quality_score=8.5)
78
+ adjustment = updater.get_utility_adjustment(model_id)
79
+ """
80
+
81
+ def __init__(self, stats_path: Optional[Path] = None):
82
+ self._path = stats_path or STATS_PATH
83
+ self._stats: dict[str, ModelRuntimeStats] = {}
84
+ self._load()
85
+
86
+ # ── Public API ────────────────────────────────────────────────────────────
87
+
88
+ def record_outcome(
89
+ self,
90
+ model_id: str,
91
+ latency_ms: Optional[float] = None,
92
+ success: bool = True,
93
+ quality_score: Optional[float] = None, # 1–10 from LLMJudge, or None
94
+ cost_usd: Optional[float] = None,
95
+ ):
96
+ """
97
+ Record a single routing outcome for a model.
98
+ Called after each LLM API response.
99
+
100
+ quality_score: optional 1–10 score (from LLMJudge or user feedback)
101
+ """
102
+ stats = self._get_or_create(model_id)
103
+ now = time.time()
104
+
105
+ stats.obs_count += 1
106
+ stats.total_requests += 1
107
+ stats.last_observed_ts = now
108
+
109
+ # ── Latency EMA ──────────────────────────────────────────────────────
110
+ if latency_ms is not None and latency_ms > 0:
111
+ if stats.ema_latency_ms == 0.0:
112
+ # Cold start: initialize to first observation
113
+ stats.ema_latency_ms = latency_ms
114
+ else:
115
+ stats.ema_latency_ms = (
116
+ LATENCY_ALPHA * stats.ema_latency_ms +
117
+ (1 - LATENCY_ALPHA) * latency_ms
118
+ )
119
+
120
+ # ── Reliability EMA ──────────────────────────────────────────────────
121
+ outcome_val = 1.0 if success else 0.0
122
+ if success:
123
+ stats.total_successes += 1
124
+ else:
125
+ stats.total_failures += 1
126
+
127
+ stats.ema_reliability = (
128
+ RELIABILITY_ALPHA * stats.ema_reliability +
129
+ (1 - RELIABILITY_ALPHA) * outcome_val
130
+ )
131
+
132
+ # ── Utility EMA (from quality + cost efficiency) ──────────────────
133
+ if quality_score is not None and cost_usd is not None and cost_usd > 0:
134
+ # Observed utility = quality (normalized to 0–1) Γ— cost-efficiency
135
+ # cost_efficiency: higher means cheaper relative to quality delivered
136
+ q_norm = quality_score / 10.0
137
+ cost_eff = 1.0 / (1.0 + cost_usd * 100) # sigmoid-like penalty
138
+ obs_util = q_norm * (0.7 + 0.3 * cost_eff) # quality-dominant
139
+
140
+ if stats.ema_utility == 0.0:
141
+ stats.ema_utility = obs_util
142
+ else:
143
+ stats.ema_utility = (
144
+ UTILITY_ALPHA * stats.ema_utility +
145
+ (1 - UTILITY_ALPHA) * obs_util
146
+ )
147
+ elif quality_score is not None:
148
+ obs_util = quality_score / 10.0
149
+ if stats.ema_utility == 0.0:
150
+ stats.ema_utility = obs_util
151
+ else:
152
+ stats.ema_utility = (
153
+ UTILITY_ALPHA * stats.ema_utility +
154
+ (1 - UTILITY_ALPHA) * obs_util
155
+ )
156
+
157
+ # ── Confidence ───────────────────────────────────────────────────────
158
+ # Grows with observations (saturates at 1.0 after ~50 obs)
159
+ stats.confidence = min(1.0, stats.obs_count / MIN_OBS_FOR_INFLUENCE) * \
160
+ self._time_decay_factor(stats.last_observed_ts)
161
+
162
+ logger.debug(
163
+ f"[Runtime] {model_id}: lat={stats.ema_latency_ms:.0f}ms "
164
+ f"rel={stats.ema_reliability:.3f} util={stats.ema_utility:.3f} "
165
+ f"conf={stats.confidence:.3f} n={stats.obs_count}"
166
+ )
167
+
168
+ # Persist every 10 observations to avoid too many writes
169
+ if stats.obs_count % 10 == 0:
170
+ self._save()
171
+
172
+ def get_utility_adjustment(self, model_id: str) -> float:
173
+ """
174
+ Returns a small adjustment ∈ [-0.15, +0.15] to add to the
175
+ utility score during routing.
176
+
177
+ Returns 0.0 if we don't have enough observations yet
178
+ (< MIN_OBS_FOR_INFLUENCE), ensuring cold start doesn't distort routing.
179
+
180
+ The adjustment is intentionally small β€” runtime observations refine
181
+ the routing, they don't override benchmark-based capability scores.
182
+ """
183
+ stats = self._stats.get(model_id)
184
+ if not stats or stats.obs_count < MIN_OBS_FOR_INFLUENCE:
185
+ return 0.0
186
+
187
+ conf = stats.confidence
188
+ if conf < 0.1:
189
+ return 0.0
190
+
191
+ # Reliability penalty (poor reliability β†’ negative adjustment)
192
+ reliability_adj = (stats.ema_reliability - 0.95) * 0.5
193
+ # e.g. 90% reliability β†’ (0.90 - 0.95) * 0.5 = -0.025
194
+
195
+ # Utility signal (if we have quality feedback)
196
+ utility_adj = 0.0
197
+ if stats.ema_utility > 0:
198
+ utility_adj = (stats.ema_utility - 0.7) * 0.2
199
+ # e.g. avg quality 8/10 = 0.8 β†’ (0.8 - 0.7) * 0.2 = +0.02
200
+
201
+ total_adj = (reliability_adj + utility_adj) * conf
202
+ return max(-0.15, min(0.15, total_adj))
203
+
204
+ def get_latency_estimate(self, model_id: str) -> Optional[float]:
205
+ """Returns EMA latency estimate if available, else None."""
206
+ stats = self._stats.get(model_id)
207
+ if stats and stats.ema_latency_ms > 0 and stats.obs_count >= 3:
208
+ return stats.ema_latency_ms
209
+ return None
210
+
211
+ def get_stats_summary(self, model_id: str) -> dict:
212
+ """Returns full stats dict for observability / logging."""
213
+ stats = self._stats.get(model_id)
214
+ if not stats:
215
+ return {"model_id": model_id, "obs_count": 0, "status": "no_data"}
216
+ return {
217
+ **asdict(stats),
218
+ "success_rate": (
219
+ stats.total_successes / stats.total_requests
220
+ if stats.total_requests > 0 else None
221
+ ),
222
+ }
223
+
224
+ def save(self):
225
+ """Explicitly save stats to disk."""
226
+ self._save()
227
+
228
+ # ── Internal ──────────────────────────────────────────────────────────────
229
+
230
+ def _get_or_create(self, model_id: str) -> ModelRuntimeStats:
231
+ if model_id not in self._stats:
232
+ self._stats[model_id] = ModelRuntimeStats(model_id=model_id)
233
+ return self._stats[model_id]
234
+
235
+ def _time_decay_factor(self, last_ts: float) -> float:
236
+ """
237
+ Returns 1.0 if recently observed, decays toward 0 if stale.
238
+ Uses exponential decay with CONFIDENCE_DECAY_HALF_LIFE_SECONDS.
239
+ """
240
+ if last_ts == 0:
241
+ return 0.0
242
+ elapsed = time.time() - last_ts
243
+ half_life = CONFIDENCE_DECAY_HALF_LIFE_SECONDS
244
+ return math.exp(-math.log(2) * elapsed / half_life)
245
+
246
+ def _load(self):
247
+ if not self._path.exists():
248
+ logger.info("[Runtime] No existing stats file. Starting fresh.")
249
+ return
250
+ try:
251
+ with open(self._path, "r") as f:
252
+ raw = json.load(f)
253
+ for mid, data in raw.items():
254
+ self._stats[mid] = ModelRuntimeStats(**data)
255
+ logger.info(f"[Runtime] Loaded stats for {len(self._stats)} models.")
256
+ except Exception as e:
257
+ logger.warning(f"[Runtime] Failed to load stats: {e}. Starting fresh.")
258
+
259
+ def _save(self):
260
+ self._path.parent.mkdir(parents=True, exist_ok=True)
261
+ try:
262
+ with open(self._path, "w") as f:
263
+ json.dump(
264
+ {mid: asdict(s) for mid, s in self._stats.items()},
265
+ f, indent=2
266
+ )
267
+ except Exception as e:
268
+ logger.warning(f"[Runtime] Failed to save stats: {e}")