Eric Xu commited on
Commit
25a1f95
·
unverified ·
1 Parent(s): a641d8c

Add web interface for SGO pipeline

Browse files

FastAPI backend + single-page frontend that wraps the full SGO pipeline:
- Entity input with templates (product/resume/pitch)
- LLM-generated cohort builder with configurable segments
- Streaming evaluation with real-time progress via SSE
- Counterfactual gradient computation with visual table

Run with: uv run --extra web python web/app.py

Files changed (3) hide show
  1. pyproject.toml +7 -0
  2. web/app.py +342 -0
  3. web/static/index.html +879 -0
pyproject.toml CHANGED
@@ -12,6 +12,13 @@ dependencies = [
12
  "python-dotenv>=1.0.0",
13
  ]
14
 
 
 
 
 
 
 
 
15
  [build-system]
16
  requires = ["hatchling"]
17
  build-backend = "hatchling.build"
 
12
  "python-dotenv>=1.0.0",
13
  ]
14
 
15
+ [project.optional-dependencies]
16
+ web = [
17
+ "fastapi>=0.115.0",
18
+ "uvicorn>=0.32.0",
19
+ "sse-starlette>=2.0.0",
20
+ ]
21
+
22
  [build-system]
23
  requires = ["hatchling"]
24
  build-backend = "hatchling.build"
web/app.py ADDED
@@ -0,0 +1,342 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SGO Web Interface — FastAPI backend wrapping the SGO pipeline.
3
+
4
+ Provides a browser UI for:
5
+ 1. Describing an entity to evaluate
6
+ 2. Generating an evaluator cohort (LLM-generated or uploaded)
7
+ 3. Running evaluation against the cohort
8
+ 4. Running counterfactual probes to get the semantic gradient
9
+
10
+ Usage:
11
+ uv run python web/app.py
12
+ # Opens at http://localhost:8000
13
+ """
14
+
15
+ import json
16
+ import os
17
+ import re
18
+ import asyncio
19
+ import time
20
+ import uuid
21
+ import concurrent.futures
22
+ from datetime import datetime
23
+ from pathlib import Path
24
+
25
+ from dotenv import load_dotenv
26
+ from fastapi import FastAPI, HTTPException
27
+ from fastapi.staticfiles import StaticFiles
28
+ from fastapi.responses import FileResponse
29
+ from pydantic import BaseModel
30
+ from sse_starlette.sse import EventSourceResponse
31
+
32
+ PROJECT_ROOT = Path(__file__).resolve().parent.parent
33
+ load_dotenv(PROJECT_ROOT / ".env")
34
+
35
+ from openai import OpenAI
36
+
37
+ # Import core functions from existing scripts
38
+ import sys
39
+ sys.path.insert(0, str(PROJECT_ROOT / "scripts"))
40
+ from evaluate import evaluate_one, analyze as analyze_eval
41
+ from counterfactual import probe_one, analyze_gradient, build_changes_block
42
+ from generate_cohort import generate_segment
43
+
44
+ app = FastAPI(title="SGO — Semantic Gradient Optimization")
45
+ app.mount("/static", StaticFiles(directory=Path(__file__).parent / "static"), name="static")
46
+
47
+ # In-memory store for active sessions
48
+ sessions: dict = {}
49
+
50
+
51
+ def get_client():
52
+ return OpenAI(
53
+ api_key=os.getenv("LLM_API_KEY"),
54
+ base_url=os.getenv("LLM_BASE_URL"),
55
+ )
56
+
57
+
58
+ def get_model():
59
+ return os.getenv("LLM_MODEL_NAME", "openai/gpt-4o-mini")
60
+
61
+
62
+ # ── Models ────────────────────────────────────────────────────────────────
63
+
64
+ class EntityInput(BaseModel):
65
+ entity_text: str
66
+
67
+
68
+ class CohortConfig(BaseModel):
69
+ description: str
70
+ segments: list[dict] # [{"label": "...", "count": N}, ...]
71
+ parallel: int = 3
72
+
73
+
74
+ class EvalConfig(BaseModel):
75
+ session_id: str
76
+ parallel: int = 5
77
+
78
+
79
+ class CounterfactualConfig(BaseModel):
80
+ session_id: str
81
+ changes: list[dict] # [{"id": "...", "label": "...", "description": "..."}, ...]
82
+ min_score: int = 4
83
+ max_score: int = 7
84
+ parallel: int = 5
85
+
86
+
87
+ # ── Routes ────────────────────────────────────────────────────────────────
88
+
89
+ @app.get("/")
90
+ async def index():
91
+ return FileResponse(Path(__file__).parent / "static" / "index.html")
92
+
93
+
94
+ @app.get("/api/config")
95
+ async def get_config():
96
+ """Return current LLM config (model name, whether API key is set)."""
97
+ return {
98
+ "model": get_model(),
99
+ "has_api_key": bool(os.getenv("LLM_API_KEY")),
100
+ "base_url": os.getenv("LLM_BASE_URL", ""),
101
+ }
102
+
103
+
104
+ @app.post("/api/session")
105
+ async def create_session(entity: EntityInput):
106
+ """Create a new evaluation session with an entity."""
107
+ sid = uuid.uuid4().hex[:12]
108
+ sessions[sid] = {
109
+ "id": sid,
110
+ "entity_text": entity.entity_text,
111
+ "cohort": None,
112
+ "eval_results": None,
113
+ "gradient": None,
114
+ "created": datetime.now().isoformat(),
115
+ }
116
+ return {"session_id": sid}
117
+
118
+
119
+ @app.get("/api/session/{sid}")
120
+ async def get_session(sid: str):
121
+ if sid not in sessions:
122
+ raise HTTPException(404, "Session not found")
123
+ s = sessions[sid]
124
+ return {
125
+ "id": s["id"],
126
+ "has_cohort": s["cohort"] is not None,
127
+ "cohort_size": len(s["cohort"]) if s["cohort"] else 0,
128
+ "has_eval": s["eval_results"] is not None,
129
+ "has_gradient": s["gradient"] is not None,
130
+ }
131
+
132
+
133
+ @app.post("/api/cohort/generate")
134
+ async def generate_cohort_endpoint(config: CohortConfig):
135
+ """Generate an LLM cohort and attach to a new session."""
136
+ sid = uuid.uuid4().hex[:12]
137
+
138
+ client = get_client()
139
+ model = get_model()
140
+ all_personas = []
141
+
142
+ with concurrent.futures.ThreadPoolExecutor(max_workers=config.parallel) as pool:
143
+ futs = {
144
+ pool.submit(generate_segment, client, model,
145
+ seg["label"], seg["count"], config.description): seg
146
+ for seg in config.segments
147
+ }
148
+ for fut in concurrent.futures.as_completed(futs):
149
+ personas = fut.result()
150
+ all_personas.extend(personas)
151
+
152
+ for i, p in enumerate(all_personas):
153
+ p["user_id"] = i
154
+
155
+ sessions[sid] = {
156
+ "id": sid,
157
+ "entity_text": config.description,
158
+ "cohort": all_personas,
159
+ "eval_results": None,
160
+ "gradient": None,
161
+ "created": datetime.now().isoformat(),
162
+ }
163
+
164
+ return {"session_id": sid, "cohort_size": len(all_personas), "cohort": all_personas}
165
+
166
+
167
+ @app.post("/api/cohort/upload/{sid}")
168
+ async def upload_cohort(sid: str, cohort: list[dict]):
169
+ if sid not in sessions:
170
+ raise HTTPException(404, "Session not found")
171
+ sessions[sid]["cohort"] = cohort
172
+ return {"cohort_size": len(cohort)}
173
+
174
+
175
+ # ── SSE streaming endpoints ──────────────────────────────────────────────
176
+
177
+ @app.get("/api/evaluate/stream/{sid}")
178
+ async def evaluate_stream(sid: str, parallel: int = 5):
179
+ """Run evaluation with Server-Sent Events for real-time progress."""
180
+ if sid not in sessions:
181
+ raise HTTPException(404, "Session not found")
182
+ session = sessions[sid]
183
+ if not session["cohort"]:
184
+ raise HTTPException(400, "No cohort — generate or upload one first")
185
+
186
+ async def event_generator():
187
+ client = get_client()
188
+ model = get_model()
189
+ cohort = session["cohort"]
190
+ entity_text = session["entity_text"]
191
+ total = len(cohort)
192
+
193
+ yield {"event": "start", "data": json.dumps({"total": total, "model": model})}
194
+
195
+ results = [None] * total
196
+ done = 0
197
+ t0 = time.time()
198
+ loop = asyncio.get_event_loop()
199
+
200
+ with concurrent.futures.ThreadPoolExecutor(max_workers=parallel) as pool:
201
+ futs = {
202
+ pool.submit(evaluate_one, client, model, ev, entity_text): i
203
+ for i, ev in enumerate(cohort)
204
+ }
205
+ for fut in concurrent.futures.as_completed(futs):
206
+ idx = futs[fut]
207
+ result = fut.result()
208
+ results[idx] = result
209
+ done += 1
210
+
211
+ ev = result.get("_evaluator", {})
212
+ progress = {
213
+ "done": done,
214
+ "total": total,
215
+ "name": ev.get("name", "?"),
216
+ "score": result.get("score"),
217
+ "action": result.get("action"),
218
+ "error": result.get("error"),
219
+ }
220
+ yield {"event": "progress", "data": json.dumps(progress)}
221
+
222
+ elapsed = time.time() - t0
223
+ session["eval_results"] = results
224
+
225
+ analysis = analyze_eval(results)
226
+ valid = [r for r in results if "score" in r]
227
+ scores = [r["score"] for r in valid]
228
+ avg = sum(scores) / len(scores) if scores else 0
229
+ actions = [r["action"] for r in valid]
230
+
231
+ summary = {
232
+ "elapsed": round(elapsed, 1),
233
+ "total": len(valid),
234
+ "avg_score": round(avg, 1),
235
+ "positive": actions.count("positive"),
236
+ "neutral": actions.count("neutral"),
237
+ "negative": actions.count("negative"),
238
+ "analysis": analysis,
239
+ "results": results,
240
+ }
241
+ yield {"event": "complete", "data": json.dumps(summary)}
242
+
243
+ return EventSourceResponse(event_generator())
244
+
245
+
246
+ @app.get("/api/counterfactual/stream/{sid}")
247
+ async def counterfactual_stream(
248
+ sid: str, changes_json: str, min_score: int = 4,
249
+ max_score: int = 7, parallel: int = 5
250
+ ):
251
+ """Run counterfactual probes with SSE progress."""
252
+ if sid not in sessions:
253
+ raise HTTPException(404, "Session not found")
254
+ session = sessions[sid]
255
+ if not session["eval_results"]:
256
+ raise HTTPException(400, "Run evaluation first")
257
+
258
+ all_changes = json.loads(changes_json)
259
+
260
+ async def event_generator():
261
+ client = get_client()
262
+ model = get_model()
263
+ cohort = session["cohort"]
264
+ eval_results = session["eval_results"]
265
+ cohort_map = {p["name"]: p for p in cohort}
266
+
267
+ movable = [r for r in eval_results
268
+ if "score" in r and min_score <= r["score"] <= max_score]
269
+
270
+ total = len(movable)
271
+ yield {"event": "start", "data": json.dumps({
272
+ "total": total, "changes": len(all_changes), "model": model
273
+ })}
274
+
275
+ if total == 0:
276
+ yield {"event": "complete", "data": json.dumps({
277
+ "error": "No evaluators in movable middle",
278
+ "gradient": "",
279
+ "results": [],
280
+ })}
281
+ return
282
+
283
+ results = [None] * total
284
+ done = 0
285
+ t0 = time.time()
286
+
287
+ with concurrent.futures.ThreadPoolExecutor(max_workers=parallel) as pool:
288
+ futs = {
289
+ pool.submit(probe_one, client, model, r, cohort_map, all_changes): i
290
+ for i, r in enumerate(movable)
291
+ }
292
+ for fut in concurrent.futures.as_completed(futs):
293
+ idx = futs[fut]
294
+ result = fut.result()
295
+ results[idx] = result
296
+ done += 1
297
+
298
+ ev = result.get("_evaluator", {})
299
+ cfs = result.get("counterfactuals", [])
300
+ top = max(cfs, key=lambda c: c.get("delta", 0)) if cfs else {}
301
+ progress = {
302
+ "done": done,
303
+ "total": total,
304
+ "name": ev.get("name", "?"),
305
+ "original_score": result.get("original_score"),
306
+ "best_delta": top.get("delta", 0),
307
+ "best_change": top.get("change_id", "?"),
308
+ "error": result.get("error"),
309
+ }
310
+ yield {"event": "progress", "data": json.dumps(progress)}
311
+
312
+ elapsed = time.time() - t0
313
+ gradient_text = analyze_gradient(results, all_changes)
314
+ session["gradient"] = gradient_text
315
+
316
+ yield {"event": "complete", "data": json.dumps({
317
+ "elapsed": round(elapsed, 1),
318
+ "gradient": gradient_text,
319
+ "results": results,
320
+ })}
321
+
322
+ return EventSourceResponse(event_generator())
323
+
324
+
325
+ @app.get("/api/results/{sid}")
326
+ async def get_results(sid: str):
327
+ """Get full results for a session."""
328
+ if sid not in sessions:
329
+ raise HTTPException(404, "Session not found")
330
+ s = sessions[sid]
331
+ return {
332
+ "eval_results": s["eval_results"],
333
+ "gradient": s["gradient"],
334
+ "cohort": s["cohort"],
335
+ }
336
+
337
+
338
+ if __name__ == "__main__":
339
+ import uvicorn
340
+ print(f"\n SGO Web Interface")
341
+ print(f" http://localhost:8000\n")
342
+ uvicorn.run(app, host="0.0.0.0", port=8000)
web/static/index.html ADDED
@@ -0,0 +1,879 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>SGO — Semantic Gradient Optimization</title>
7
+ <style>
8
+ :root {
9
+ --bg: #0a0a0f;
10
+ --surface: #12121a;
11
+ --surface2: #1a1a26;
12
+ --border: #2a2a3a;
13
+ --text: #e0e0e8;
14
+ --text2: #8888a0;
15
+ --accent: #6c5ce7;
16
+ --accent2: #a29bfe;
17
+ --green: #00b894;
18
+ --yellow: #fdcb6e;
19
+ --red: #e17055;
20
+ --orange: #e67e22;
21
+ --radius: 12px;
22
+ }
23
+ * { margin: 0; padding: 0; box-sizing: border-box; }
24
+ body {
25
+ font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
26
+ background: var(--bg);
27
+ color: var(--text);
28
+ line-height: 1.6;
29
+ min-height: 100vh;
30
+ }
31
+ .container { max-width: 900px; margin: 0 auto; padding: 24px 20px; }
32
+
33
+ /* Header */
34
+ header {
35
+ text-align: center;
36
+ padding: 48px 0 32px;
37
+ border-bottom: 1px solid var(--border);
38
+ margin-bottom: 32px;
39
+ }
40
+ header h1 {
41
+ font-size: 2rem;
42
+ font-weight: 700;
43
+ letter-spacing: -0.03em;
44
+ background: linear-gradient(135deg, var(--accent2), var(--accent));
45
+ -webkit-background-clip: text;
46
+ -webkit-text-fill-color: transparent;
47
+ }
48
+ header p { color: var(--text2); margin-top: 8px; font-size: 1rem; }
49
+ .config-badge {
50
+ display: inline-block;
51
+ margin-top: 12px;
52
+ padding: 4px 12px;
53
+ background: var(--surface2);
54
+ border: 1px solid var(--border);
55
+ border-radius: 20px;
56
+ font-size: 0.8rem;
57
+ color: var(--text2);
58
+ }
59
+ .config-badge.ok { border-color: var(--green); color: var(--green); }
60
+ .config-badge.warn { border-color: var(--yellow); color: var(--yellow); }
61
+
62
+ /* Steps */
63
+ .step {
64
+ background: var(--surface);
65
+ border: 1px solid var(--border);
66
+ border-radius: var(--radius);
67
+ padding: 28px;
68
+ margin-bottom: 20px;
69
+ transition: border-color 0.2s;
70
+ }
71
+ .step.active { border-color: var(--accent); }
72
+ .step.done { border-color: var(--green); }
73
+ .step-header {
74
+ display: flex;
75
+ align-items: center;
76
+ gap: 12px;
77
+ margin-bottom: 16px;
78
+ }
79
+ .step-num {
80
+ width: 32px; height: 32px;
81
+ border-radius: 50%;
82
+ background: var(--surface2);
83
+ border: 2px solid var(--border);
84
+ display: flex;
85
+ align-items: center;
86
+ justify-content: center;
87
+ font-size: 0.85rem;
88
+ font-weight: 600;
89
+ flex-shrink: 0;
90
+ }
91
+ .step.active .step-num { border-color: var(--accent); color: var(--accent); }
92
+ .step.done .step-num { border-color: var(--green); background: var(--green); color: var(--bg); }
93
+ .step-title { font-size: 1.1rem; font-weight: 600; }
94
+ .step-desc { color: var(--text2); font-size: 0.9rem; margin-bottom: 16px; }
95
+
96
+ /* Forms */
97
+ textarea, input, select {
98
+ width: 100%;
99
+ background: var(--surface2);
100
+ border: 1px solid var(--border);
101
+ border-radius: 8px;
102
+ color: var(--text);
103
+ padding: 12px;
104
+ font-family: inherit;
105
+ font-size: 0.9rem;
106
+ resize: vertical;
107
+ transition: border-color 0.2s;
108
+ }
109
+ textarea:focus, input:focus, select:focus {
110
+ outline: none;
111
+ border-color: var(--accent);
112
+ }
113
+ textarea { min-height: 160px; }
114
+ label {
115
+ display: block;
116
+ font-size: 0.85rem;
117
+ font-weight: 500;
118
+ margin-bottom: 6px;
119
+ color: var(--text2);
120
+ }
121
+ .field { margin-bottom: 16px; }
122
+
123
+ /* Buttons */
124
+ button {
125
+ background: var(--accent);
126
+ color: white;
127
+ border: none;
128
+ border-radius: 8px;
129
+ padding: 10px 24px;
130
+ font-size: 0.9rem;
131
+ font-weight: 600;
132
+ cursor: pointer;
133
+ transition: opacity 0.2s, transform 0.1s;
134
+ }
135
+ button:hover { opacity: 0.9; }
136
+ button:active { transform: scale(0.98); }
137
+ button:disabled { opacity: 0.4; cursor: not-allowed; }
138
+ button.secondary {
139
+ background: var(--surface2);
140
+ border: 1px solid var(--border);
141
+ color: var(--text);
142
+ }
143
+ .btn-row { display: flex; gap: 10px; flex-wrap: wrap; }
144
+
145
+ /* Segments editor */
146
+ .segments-list { display: flex; flex-direction: column; gap: 8px; margin-bottom: 12px; }
147
+ .seg-row {
148
+ display: flex; gap: 8px; align-items: center;
149
+ }
150
+ .seg-row input:first-child { flex: 3; }
151
+ .seg-row input:nth-child(2) { flex: 1; max-width: 80px; text-align: center; }
152
+ .seg-row button { padding: 8px 12px; background: var(--surface2); border: 1px solid var(--border); }
153
+
154
+ /* Progress */
155
+ .progress-bar {
156
+ width: 100%;
157
+ height: 6px;
158
+ background: var(--surface2);
159
+ border-radius: 3px;
160
+ overflow: hidden;
161
+ margin: 12px 0;
162
+ }
163
+ .progress-fill {
164
+ height: 100%;
165
+ background: linear-gradient(90deg, var(--accent), var(--accent2));
166
+ border-radius: 3px;
167
+ transition: width 0.3s;
168
+ width: 0%;
169
+ }
170
+ .progress-text {
171
+ font-size: 0.85rem;
172
+ color: var(--text2);
173
+ margin-bottom: 8px;
174
+ }
175
+ .eval-log {
176
+ max-height: 200px;
177
+ overflow-y: auto;
178
+ font-family: 'JetBrains Mono', 'Fira Code', monospace;
179
+ font-size: 0.8rem;
180
+ background: var(--bg);
181
+ border-radius: 8px;
182
+ padding: 12px;
183
+ margin-top: 12px;
184
+ }
185
+ .eval-log div { padding: 2px 0; }
186
+ .eval-log .pos { color: var(--green); }
187
+ .eval-log .neu { color: var(--yellow); }
188
+ .eval-log .neg { color: var(--red); }
189
+ .eval-log .err { color: var(--red); opacity: 0.7; }
190
+
191
+ /* Results */
192
+ .score-big {
193
+ font-size: 3rem;
194
+ font-weight: 700;
195
+ text-align: center;
196
+ padding: 24px;
197
+ }
198
+ .score-big span { font-size: 1.2rem; color: var(--text2); font-weight: 400; }
199
+ .stats-row {
200
+ display: flex;
201
+ justify-content: center;
202
+ gap: 32px;
203
+ margin: 16px 0;
204
+ }
205
+ .stat {
206
+ text-align: center;
207
+ }
208
+ .stat-val { font-size: 1.5rem; font-weight: 600; }
209
+ .stat-label { font-size: 0.8rem; color: var(--text2); }
210
+ .stat.pos .stat-val { color: var(--green); }
211
+ .stat.neu .stat-val { color: var(--yellow); }
212
+ .stat.neg .stat-val { color: var(--red); }
213
+
214
+ .results-details {
215
+ background: var(--bg);
216
+ border-radius: 8px;
217
+ padding: 16px;
218
+ margin-top: 16px;
219
+ white-space: pre-wrap;
220
+ font-family: 'JetBrains Mono', 'Fira Code', monospace;
221
+ font-size: 0.8rem;
222
+ max-height: 400px;
223
+ overflow-y: auto;
224
+ line-height: 1.5;
225
+ }
226
+
227
+ /* Changes editor */
228
+ .change-card {
229
+ background: var(--surface2);
230
+ border: 1px solid var(--border);
231
+ border-radius: 8px;
232
+ padding: 16px;
233
+ margin-bottom: 10px;
234
+ }
235
+ .change-card .field { margin-bottom: 10px; }
236
+ .change-card input, .change-card textarea { background: var(--bg); }
237
+ .change-card textarea { min-height: 60px; }
238
+ .change-header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 10px; }
239
+ .change-header span { font-weight: 600; font-size: 0.9rem; }
240
+
241
+ /* Gradient */
242
+ .gradient-table {
243
+ width: 100%;
244
+ border-collapse: collapse;
245
+ margin: 16px 0;
246
+ font-size: 0.85rem;
247
+ }
248
+ .gradient-table th {
249
+ text-align: left;
250
+ padding: 10px 12px;
251
+ border-bottom: 2px solid var(--border);
252
+ color: var(--text2);
253
+ font-weight: 500;
254
+ }
255
+ .gradient-table td {
256
+ padding: 10px 12px;
257
+ border-bottom: 1px solid var(--border);
258
+ }
259
+ .gradient-table tr:hover { background: var(--surface2); }
260
+ .delta-pos { color: var(--green); font-weight: 600; }
261
+ .delta-neg { color: var(--red); font-weight: 600; }
262
+ .delta-bar {
263
+ height: 8px;
264
+ border-radius: 4px;
265
+ display: inline-block;
266
+ vertical-align: middle;
267
+ }
268
+
269
+ /* Templates */
270
+ .template-chips {
271
+ display: flex;
272
+ gap: 8px;
273
+ margin-bottom: 12px;
274
+ flex-wrap: wrap;
275
+ }
276
+ .template-chip {
277
+ padding: 6px 14px;
278
+ background: var(--surface2);
279
+ border: 1px solid var(--border);
280
+ border-radius: 20px;
281
+ font-size: 0.8rem;
282
+ cursor: pointer;
283
+ color: var(--text2);
284
+ transition: all 0.2s;
285
+ }
286
+ .template-chip:hover { border-color: var(--accent); color: var(--text); }
287
+
288
+ /* Responsive */
289
+ @media (max-width: 600px) {
290
+ .container { padding: 16px 12px; }
291
+ .step { padding: 20px; }
292
+ .stats-row { gap: 20px; }
293
+ header h1 { font-size: 1.5rem; }
294
+ }
295
+
296
+ /* Utility */
297
+ .hidden { display: none !important; }
298
+ .mt-12 { margin-top: 12px; }
299
+ .mt-16 { margin-top: 16px; }
300
+ .mb-8 { margin-bottom: 8px; }
301
+ .text-center { text-align: center; }
302
+ </style>
303
+ </head>
304
+ <body>
305
+
306
+ <div class="container">
307
+ <header>
308
+ <h1>Semantic Gradient Optimization</h1>
309
+ <p>Evaluate anything against a synthetic panel. Find what to change first.</p>
310
+ <div id="configBadge" class="config-badge">checking...</div>
311
+ </header>
312
+
313
+ <!-- STEP 1: Entity -->
314
+ <div class="step active" id="step1">
315
+ <div class="step-header">
316
+ <div class="step-num">1</div>
317
+ <div class="step-title">Describe what you're optimizing</div>
318
+ </div>
319
+ <p class="step-desc">Paste your landing page copy, resume, pitch deck text, policy document, or any other entity.</p>
320
+
321
+ <div class="template-chips">
322
+ <span class="template-chip" onclick="loadTemplate('product')">Product</span>
323
+ <span class="template-chip" onclick="loadTemplate('resume')">Resume</span>
324
+ <span class="template-chip" onclick="loadTemplate('pitch')">Pitch</span>
325
+ </div>
326
+
327
+ <div class="field">
328
+ <textarea id="entityText" placeholder="Paste your entity here... (landing page, resume, pitch, policy, etc.)"></textarea>
329
+ </div>
330
+ <button onclick="saveEntity()">Continue</button>
331
+ </div>
332
+
333
+ <!-- STEP 2: Cohort -->
334
+ <div class="step hidden" id="step2">
335
+ <div class="step-header">
336
+ <div class="step-num">2</div>
337
+ <div class="step-title">Build your evaluator panel</div>
338
+ </div>
339
+ <p class="step-desc">Define audience segments. The LLM generates realistic personas for each.</p>
340
+
341
+ <div class="field">
342
+ <label>Audience context</label>
343
+ <input type="text" id="cohortDesc" placeholder="e.g. 'Potential customers for a B2B SaaS analytics tool'">
344
+ </div>
345
+
346
+ <label>Segments</label>
347
+ <div id="segmentsList" class="segments-list"></div>
348
+ <div class="btn-row">
349
+ <button class="secondary" onclick="addSegment()">+ Add segment</button>
350
+ <button onclick="generateCohort()" id="genCohortBtn">Generate cohort</button>
351
+ </div>
352
+
353
+ <div id="cohortProgress" class="hidden mt-16">
354
+ <div class="progress-text" id="cohortProgressText">Generating personas...</div>
355
+ <div class="progress-bar"><div class="progress-fill" id="cohortProgressBar"></div></div>
356
+ </div>
357
+
358
+ <div id="cohortResult" class="hidden mt-16">
359
+ <div class="stat text-center">
360
+ <div class="stat-val" id="cohortCount">0</div>
361
+ <div class="stat-label">evaluators generated</div>
362
+ </div>
363
+ <button class="mt-12" onclick="goToStep(3)">Continue to evaluation</button>
364
+ </div>
365
+ </div>
366
+
367
+ <!-- STEP 3: Evaluate -->
368
+ <div class="step hidden" id="step3">
369
+ <div class="step-header">
370
+ <div class="step-num">3</div>
371
+ <div class="step-title">Run evaluation</div>
372
+ </div>
373
+ <p class="step-desc">Each evaluator scores your entity 1-10 with reasoning.</p>
374
+
375
+ <div class="btn-row">
376
+ <button onclick="runEval()" id="evalBtn">Evaluate</button>
377
+ <div style="flex:1"></div>
378
+ <label style="display:flex;align-items:center;gap:6px;margin:0">
379
+ <span style="font-size:0.8rem;color:var(--text2)">Parallel:</span>
380
+ <input type="number" id="evalParallel" value="5" min="1" max="20"
381
+ style="width:60px;padding:6px;text-align:center">
382
+ </label>
383
+ </div>
384
+
385
+ <div id="evalProgress" class="hidden mt-16">
386
+ <div class="progress-text" id="evalProgressText">Evaluating...</div>
387
+ <div class="progress-bar"><div class="progress-fill" id="evalProgressBar"></div></div>
388
+ <div class="eval-log" id="evalLog"></div>
389
+ </div>
390
+
391
+ <div id="evalResults" class="hidden mt-16">
392
+ <div class="score-big" id="avgScore">0<span>/10</span></div>
393
+ <div class="stats-row">
394
+ <div class="stat pos"><div class="stat-val" id="posCount">0</div><div class="stat-label">positive</div></div>
395
+ <div class="stat neu"><div class="stat-val" id="neuCount">0</div><div class="stat-label">neutral</div></div>
396
+ <div class="stat neg"><div class="stat-val" id="negCount">0</div><div class="stat-label">negative</div></div>
397
+ </div>
398
+ <details>
399
+ <summary style="cursor:pointer;color:var(--text2);font-size:0.9rem">Full analysis</summary>
400
+ <div class="results-details" id="evalAnalysis"></div>
401
+ </details>
402
+ <button class="mt-16" onclick="goToStep(4)">Continue to gradient</button>
403
+ </div>
404
+ </div>
405
+
406
+ <!-- STEP 4: Counterfactual -->
407
+ <div class="step hidden" id="step4">
408
+ <div class="step-header">
409
+ <div class="step-num">4</div>
410
+ <div class="step-title">Find the highest-impact changes</div>
411
+ </div>
412
+ <p class="step-desc">List hypothetical changes. Undecided evaluators (scores 4-7) re-evaluate each one.</p>
413
+
414
+ <div id="changesList"></div>
415
+ <div class="btn-row mb-8">
416
+ <button class="secondary" onclick="addChange()">+ Add change</button>
417
+ </div>
418
+ <div class="btn-row">
419
+ <button onclick="runCounterfactual()" id="cfBtn">Compute gradient</button>
420
+ <div style="flex:1"></div>
421
+ <label style="display:flex;align-items:center;gap:6px;margin:0">
422
+ <span style="font-size:0.8rem;color:var(--text2)">Score range:</span>
423
+ <input type="number" id="cfMin" value="4" min="1" max="10" style="width:50px;padding:6px;text-align:center">
424
+ <span style="color:var(--text2)">-</span>
425
+ <input type="number" id="cfMax" value="7" min="1" max="10" style="width:50px;padding:6px;text-align:center">
426
+ </label>
427
+ </div>
428
+
429
+ <div id="cfProgress" class="hidden mt-16">
430
+ <div class="progress-text" id="cfProgressText">Probing...</div>
431
+ <div class="progress-bar"><div class="progress-fill" id="cfProgressBar"></div></div>
432
+ <div class="eval-log" id="cfLog"></div>
433
+ </div>
434
+
435
+ <div id="cfResults" class="hidden mt-16">
436
+ <h3 style="margin-bottom:12px">Semantic Gradient</h3>
437
+ <table class="gradient-table" id="gradientTable">
438
+ <thead>
439
+ <tr><th>#</th><th>Change</th><th>Avg Impact</th><th>Range</th><th>Helps</th><th>Hurts</th></tr>
440
+ </thead>
441
+ <tbody></tbody>
442
+ </table>
443
+ <details class="mt-12">
444
+ <summary style="cursor:pointer;color:var(--text2);font-size:0.9rem">Full gradient output</summary>
445
+ <div class="results-details" id="gradientText"></div>
446
+ </details>
447
+ </div>
448
+ </div>
449
+ </div>
450
+
451
+ <script>
452
+ const TEMPLATES = {
453
+ product: `# [Product Name]
454
+
455
+ ## One-liner
456
+ <!-- What it does in one sentence -->
457
+
458
+ ## Key features
459
+ - Feature 1
460
+ - Feature 2
461
+ - Feature 3
462
+
463
+ ## Pricing
464
+ <!-- Tiers, free plan, usage-based, etc. -->
465
+
466
+ ## Trust signals
467
+ <!-- SOC2, customer count, funding, team size, etc. -->
468
+
469
+ ## Target user
470
+ <!-- Who is this for? -->
471
+
472
+ ## What's NOT included
473
+ <!-- Known limitations, missing features, roadmap items -->`,
474
+ resume: `# [Your Name]
475
+
476
+ ## Target role
477
+ <!-- What position you're applying for -->
478
+
479
+ ## Summary
480
+ <!-- 2-3 sentence professional summary -->
481
+
482
+ ## Experience
483
+ - **[Company]** — [Role] (dates)
484
+ Key achievements...
485
+
486
+ ## Education
487
+ - [Degree], [School], [Year]
488
+
489
+ ## Skills
490
+ - Skill 1, Skill 2, Skill 3`,
491
+ pitch: `# [Company Name]
492
+
493
+ ## Problem
494
+ <!-- What pain point do you solve? -->
495
+
496
+ ## Solution
497
+ <!-- Your product/approach in 2-3 sentences -->
498
+
499
+ ## Market
500
+ <!-- TAM/SAM/SOM or market description -->
501
+
502
+ ## Traction
503
+ <!-- Revenue, users, growth rate -->
504
+
505
+ ## Team
506
+ <!-- Key founders and their backgrounds -->
507
+
508
+ ## Ask
509
+ <!-- Funding amount and use of funds -->`
510
+ };
511
+
512
+ let sessionId = null;
513
+ let evalResultsData = null;
514
+
515
+ // ── Init ──
516
+
517
+ async function init() {
518
+ const resp = await fetch('/api/config');
519
+ const cfg = await resp.json();
520
+ const badge = document.getElementById('configBadge');
521
+ if (cfg.has_api_key) {
522
+ badge.textContent = cfg.model;
523
+ badge.className = 'config-badge ok';
524
+ } else {
525
+ badge.textContent = 'No API key — set LLM_API_KEY in .env';
526
+ badge.className = 'config-badge warn';
527
+ }
528
+
529
+ // Default segments
530
+ addSegment('Early adopter, tech-savvy', 8);
531
+ addSegment('Mainstream user, non-technical', 8);
532
+ addSegment('Budget-conscious comparison shopper', 8);
533
+ addSegment('Enterprise decision-maker', 8);
534
+
535
+ // Default changes
536
+ addChange('Add free tier', 'Introduce a generous free plan that lets users try core features with no credit card required.');
537
+ addChange('Add social proof', 'Display customer logos, case studies, and specific metrics (e.g., "Used by 5,000 teams") prominently on the page.');
538
+ addChange('Lower price by 40%', 'Reduce all paid plan prices by 40% across the board.');
539
+ }
540
+
541
+ // ── Templates ──
542
+
543
+ function loadTemplate(name) {
544
+ document.getElementById('entityText').value = TEMPLATES[name] || '';
545
+ }
546
+
547
+ // ── Step navigation ──
548
+
549
+ function goToStep(n) {
550
+ for (let i = 1; i <= 4; i++) {
551
+ const el = document.getElementById(`step${i}`);
552
+ if (i < n) {
553
+ el.classList.remove('hidden', 'active');
554
+ el.classList.add('done');
555
+ } else if (i === n) {
556
+ el.classList.remove('hidden', 'done');
557
+ el.classList.add('active');
558
+ } else {
559
+ el.classList.add('hidden');
560
+ el.classList.remove('active', 'done');
561
+ }
562
+ }
563
+ }
564
+
565
+ // ── Step 1: Entity ──
566
+
567
+ async function saveEntity() {
568
+ const text = document.getElementById('entityText').value.trim();
569
+ if (!text) return alert('Please enter your entity text.');
570
+
571
+ const resp = await fetch('/api/session', {
572
+ method: 'POST',
573
+ headers: {'Content-Type': 'application/json'},
574
+ body: JSON.stringify({entity_text: text}),
575
+ });
576
+ const data = await resp.json();
577
+ sessionId = data.session_id;
578
+
579
+ // Pre-fill cohort description from first line
580
+ const firstLine = text.split('\n').find(l => l.trim().length > 2) || '';
581
+ const desc = document.getElementById('cohortDesc');
582
+ if (!desc.value) desc.value = `People evaluating: ${firstLine.replace(/^#+\s*/, '').trim()}`;
583
+
584
+ goToStep(2);
585
+ }
586
+
587
+ // ── Step 2: Cohort ──
588
+
589
+ function addSegment(label = '', count = 8) {
590
+ const list = document.getElementById('segmentsList');
591
+ const row = document.createElement('div');
592
+ row.className = 'seg-row';
593
+ row.innerHTML = `
594
+ <input type="text" placeholder="Segment description" value="${label}">
595
+ <input type="number" value="${count}" min="1" max="20">
596
+ <button onclick="this.parentElement.remove()" title="Remove">&times;</button>
597
+ `;
598
+ list.appendChild(row);
599
+ }
600
+
601
+ async function generateCohort() {
602
+ const desc = document.getElementById('cohortDesc').value.trim();
603
+ if (!desc) return alert('Please add audience context.');
604
+
605
+ const rows = document.querySelectorAll('#segmentsList .seg-row');
606
+ const segments = [];
607
+ rows.forEach(row => {
608
+ const inputs = row.querySelectorAll('input');
609
+ const label = inputs[0].value.trim();
610
+ const count = parseInt(inputs[1].value) || 5;
611
+ if (label) segments.push({label, count});
612
+ });
613
+
614
+ if (segments.length === 0) return alert('Add at least one segment.');
615
+
616
+ const btn = document.getElementById('genCohortBtn');
617
+ btn.disabled = true;
618
+ const progress = document.getElementById('cohortProgress');
619
+ progress.classList.remove('hidden');
620
+ document.getElementById('cohortProgressBar').style.width = '30%';
621
+ document.getElementById('cohortProgressText').textContent = 'Generating personas (this may take 30-60s)...';
622
+
623
+ try {
624
+ const resp = await fetch('/api/cohort/generate', {
625
+ method: 'POST',
626
+ headers: {'Content-Type': 'application/json'},
627
+ body: JSON.stringify({description: desc, segments, parallel: 3}),
628
+ });
629
+ const data = await resp.json();
630
+ sessionId = data.session_id;
631
+
632
+ // Update entity text in session
633
+ const entityText = document.getElementById('entityText').value.trim();
634
+ if (entityText) {
635
+ await fetch('/api/session', {
636
+ method: 'POST',
637
+ headers: {'Content-Type': 'application/json'},
638
+ body: JSON.stringify({entity_text: entityText}),
639
+ }).then(r => r.json()).then(d => { sessionId = d.session_id; });
640
+ // Upload cohort to new session
641
+ await fetch(`/api/cohort/upload/${sessionId}`, {
642
+ method: 'POST',
643
+ headers: {'Content-Type': 'application/json'},
644
+ body: JSON.stringify(data.cohort),
645
+ });
646
+ }
647
+
648
+ document.getElementById('cohortProgressBar').style.width = '100%';
649
+ document.getElementById('cohortCount').textContent = data.cohort_size;
650
+ document.getElementById('cohortResult').classList.remove('hidden');
651
+ document.getElementById('cohortProgressText').textContent = 'Done!';
652
+ } catch (e) {
653
+ alert('Error generating cohort: ' + e.message);
654
+ document.getElementById('cohortProgressText').textContent = 'Failed';
655
+ } finally {
656
+ btn.disabled = false;
657
+ }
658
+ }
659
+
660
+ // ── Step 3: Evaluate ──
661
+
662
+ function runEval() {
663
+ if (!sessionId) return alert('No session.');
664
+ const parallel = parseInt(document.getElementById('evalParallel').value) || 5;
665
+ const btn = document.getElementById('evalBtn');
666
+ btn.disabled = true;
667
+
668
+ document.getElementById('evalProgress').classList.remove('hidden');
669
+ document.getElementById('evalResults').classList.add('hidden');
670
+ document.getElementById('evalLog').innerHTML = '';
671
+
672
+ const es = new EventSource(`/api/evaluate/stream/${sessionId}?parallel=${parallel}`);
673
+
674
+ es.addEventListener('start', (e) => {
675
+ const d = JSON.parse(e.data);
676
+ document.getElementById('evalProgressText').textContent =
677
+ `Evaluating ${d.total} evaluators (${d.model})...`;
678
+ });
679
+
680
+ es.addEventListener('progress', (e) => {
681
+ const d = JSON.parse(e.data);
682
+ const pct = Math.round(d.done / d.total * 100);
683
+ document.getElementById('evalProgressBar').style.width = pct + '%';
684
+ document.getElementById('evalProgressText').textContent =
685
+ `${d.done}/${d.total} evaluated`;
686
+
687
+ const log = document.getElementById('evalLog');
688
+ const cls = d.error ? 'err' : d.action === 'positive' ? 'pos' : d.action === 'negative' ? 'neg' : 'neu';
689
+ const icon = d.error ? 'ERR' : d.action === 'positive' ? '+' : d.action === 'negative' ? '-' : '~';
690
+ const score = d.score != null ? `${d.score}/10` : '';
691
+ log.innerHTML += `<div class="${cls}">[${d.done}/${d.total}] ${d.name}: ${icon} ${score}</div>`;
692
+ log.scrollTop = log.scrollHeight;
693
+ });
694
+
695
+ es.addEventListener('complete', (e) => {
696
+ es.close();
697
+ const d = JSON.parse(e.data);
698
+ evalResultsData = d.results;
699
+
700
+ document.getElementById('evalProgressBar').style.width = '100%';
701
+ document.getElementById('evalProgressText').textContent =
702
+ `Done in ${d.elapsed}s`;
703
+
704
+ document.getElementById('avgScore').innerHTML = `${d.avg_score}<span>/10</span>`;
705
+ document.getElementById('posCount').textContent = d.positive;
706
+ document.getElementById('neuCount').textContent = d.neutral;
707
+ document.getElementById('negCount').textContent = d.negative;
708
+ document.getElementById('evalAnalysis').textContent = d.analysis;
709
+ document.getElementById('evalResults').classList.remove('hidden');
710
+ btn.disabled = false;
711
+ });
712
+
713
+ es.onerror = () => {
714
+ es.close();
715
+ document.getElementById('evalProgressText').textContent = 'Connection lost';
716
+ btn.disabled = false;
717
+ };
718
+ }
719
+
720
+ // ── Step 4: Counterfactual ──
721
+
722
+ let changeCounter = 0;
723
+
724
+ function addChange(label = '', description = '') {
725
+ changeCounter++;
726
+ const list = document.getElementById('changesList');
727
+ const card = document.createElement('div');
728
+ card.className = 'change-card';
729
+ card.dataset.id = `change_${changeCounter}`;
730
+ card.innerHTML = `
731
+ <div class="change-header">
732
+ <span>Change ${changeCounter}</span>
733
+ <button class="secondary" style="padding:4px 10px;font-size:0.8rem" onclick="this.closest('.change-card').remove()">Remove</button>
734
+ </div>
735
+ <div class="field">
736
+ <label>Label</label>
737
+ <input type="text" class="change-label" placeholder="e.g. Add free tier" value="${label}">
738
+ </div>
739
+ <div class="field">
740
+ <label>Description</label>
741
+ <textarea class="change-desc" placeholder="What specifically changes? Be detailed.">${description}</textarea>
742
+ </div>
743
+ `;
744
+ list.appendChild(card);
745
+ }
746
+
747
+ function getChanges() {
748
+ const cards = document.querySelectorAll('.change-card');
749
+ const changes = [];
750
+ cards.forEach(card => {
751
+ const label = card.querySelector('.change-label').value.trim();
752
+ const desc = card.querySelector('.change-desc').value.trim();
753
+ if (label && desc) {
754
+ changes.push({id: card.dataset.id, label, description: desc});
755
+ }
756
+ });
757
+ return changes;
758
+ }
759
+
760
+ function runCounterfactual() {
761
+ const changes = getChanges();
762
+ if (changes.length === 0) return alert('Add at least one change.');
763
+ if (!sessionId) return alert('No session.');
764
+
765
+ const minScore = parseInt(document.getElementById('cfMin').value) || 4;
766
+ const maxScore = parseInt(document.getElementById('cfMax').value) || 7;
767
+ const btn = document.getElementById('cfBtn');
768
+ btn.disabled = true;
769
+
770
+ document.getElementById('cfProgress').classList.remove('hidden');
771
+ document.getElementById('cfResults').classList.add('hidden');
772
+ document.getElementById('cfLog').innerHTML = '';
773
+
774
+ const params = new URLSearchParams({
775
+ changes_json: JSON.stringify(changes),
776
+ min_score: minScore,
777
+ max_score: maxScore,
778
+ parallel: 5,
779
+ });
780
+
781
+ const es = new EventSource(`/api/counterfactual/stream/${sessionId}?${params}`);
782
+
783
+ es.addEventListener('start', (e) => {
784
+ const d = JSON.parse(e.data);
785
+ document.getElementById('cfProgressText').textContent =
786
+ `Probing ${d.total} evaluators across ${d.changes} changes...`;
787
+ });
788
+
789
+ es.addEventListener('progress', (e) => {
790
+ const d = JSON.parse(e.data);
791
+ const pct = Math.round(d.done / d.total * 100);
792
+ document.getElementById('cfProgressBar').style.width = pct + '%';
793
+ document.getElementById('cfProgressText').textContent = `${d.done}/${d.total} probed`;
794
+
795
+ const log = document.getElementById('cfLog');
796
+ const delta = d.best_delta > 0 ? `+${d.best_delta}` : d.best_delta;
797
+ log.innerHTML += `<div>${d.name} (orig ${d.original_score}): best ${delta} from "${d.best_change}"</div>`;
798
+ log.scrollTop = log.scrollHeight;
799
+ });
800
+
801
+ es.addEventListener('complete', (e) => {
802
+ es.close();
803
+ const d = JSON.parse(e.data);
804
+
805
+ document.getElementById('cfProgressBar').style.width = '100%';
806
+ document.getElementById('cfProgressText').textContent = d.elapsed ? `Done in ${d.elapsed}s` : 'Done';
807
+
808
+ if (d.error) {
809
+ document.getElementById('cfProgressText').textContent = d.error;
810
+ btn.disabled = false;
811
+ return;
812
+ }
813
+
814
+ // Parse gradient into table
815
+ renderGradientTable(d.results, changes);
816
+ document.getElementById('gradientText').textContent = d.gradient;
817
+ document.getElementById('cfResults').classList.remove('hidden');
818
+ btn.disabled = false;
819
+ });
820
+
821
+ es.onerror = () => {
822
+ es.close();
823
+ document.getElementById('cfProgressText').textContent = 'Connection lost';
824
+ btn.disabled = false;
825
+ };
826
+ }
827
+
828
+ function renderGradientTable(results, changes) {
829
+ const valid = results.filter(r => r && r.counterfactuals);
830
+ const labels = {};
831
+ changes.forEach(c => { labels[c.id] = c.label; });
832
+
833
+ // Aggregate
834
+ const byChange = {};
835
+ valid.forEach(r => {
836
+ (r.counterfactuals || []).forEach(cf => {
837
+ const cid = cf.change_id;
838
+ if (!byChange[cid]) byChange[cid] = {deltas: [], pos: 0, neg: 0};
839
+ byChange[cid].deltas.push(cf.delta || 0);
840
+ if ((cf.delta || 0) > 0) byChange[cid].pos++;
841
+ if ((cf.delta || 0) < 0) byChange[cid].neg++;
842
+ });
843
+ });
844
+
845
+ const ranked = Object.entries(byChange).map(([cid, d]) => {
846
+ const avg = d.deltas.reduce((a, b) => a + b, 0) / d.deltas.length;
847
+ const min = Math.min(...d.deltas);
848
+ const max = Math.max(...d.deltas);
849
+ return {id: cid, label: labels[cid] || cid, avg, min, max, pos: d.pos, neg: d.neg};
850
+ });
851
+ ranked.sort((a, b) => b.avg - a.avg);
852
+
853
+ const tbody = document.querySelector('#gradientTable tbody');
854
+ tbody.innerHTML = '';
855
+ ranked.forEach((r, i) => {
856
+ const cls = r.avg >= 0 ? 'delta-pos' : 'delta-neg';
857
+ const barWidth = Math.min(Math.abs(r.avg) * 30, 120);
858
+ const barColor = r.avg >= 0 ? 'var(--green)' : 'var(--red)';
859
+ tbody.innerHTML += `
860
+ <tr>
861
+ <td>${i + 1}</td>
862
+ <td>${r.label}</td>
863
+ <td class="${cls}">
864
+ ${r.avg >= 0 ? '+' : ''}${r.avg.toFixed(1)}
865
+ <span class="delta-bar" style="width:${barWidth}px;background:${barColor};margin-left:8px"></span>
866
+ </td>
867
+ <td style="color:var(--text2)">${r.min >= 0 ? '+' : ''}${r.min} to +${r.max}</td>
868
+ <td style="color:var(--green)">${r.pos}</td>
869
+ <td style="color:var(--red)">${r.neg}</td>
870
+ </tr>
871
+ `;
872
+ });
873
+ }
874
+
875
+ // Boot
876
+ init();
877
+ </script>
878
+ </body>
879
+ </html>