Claude commited on
Commit
a8d5d4c
·
unverified ·
1 Parent(s): 0a66d61

Add optional metric calibration to web UI

Browse files

After evaluation, users can optionally anchor SGO scores to a real-world
metric (CTR, conversion rate, revenue, etc.) by entering the current
known value. The gradient table then shows predicted metric deltas
alongside score deltas.

- Single anchor: linear scaling (metric = k * score)
- Multiple anchors: Platt scaling via Newton's method
- Collapsible UI panel in step 1 after evaluation results
- Calibration data included in downloaded reports
- Generic naming ("metric") not CTR-specific

https://claude.ai/code/session_0141cbZmdz7ziFkNsQbq7z5Y

Files changed (2) hide show
  1. web/app.py +137 -0
  2. web/static/index.html +168 -6
web/app.py CHANGED
@@ -39,6 +39,7 @@ import sys
39
  sys.path.insert(0, str(PROJECT_ROOT / "scripts"))
40
  from evaluate import evaluate_one, analyze as analyze_eval, SYSTEM_PROMPT, BIAS_CALIBRATION_ADDENDUM
41
  from counterfactual import probe_one, analyze_gradient, build_changes_block, compute_goal_weights
 
42
  from generate_cohort import generate_segment
43
  from bias_audit import (
44
  reframe_entity, add_authority_signals, reorder_entity,
@@ -218,6 +219,15 @@ class CounterfactualConfig(BaseModel):
218
  parallel: int = 5
219
 
220
 
 
 
 
 
 
 
 
 
 
221
  class SuggestSegmentsInput(BaseModel):
222
  entity_text: str
223
  audience_context: str
@@ -298,7 +308,9 @@ async def create_session(entity: EntityInput):
298
  "cohort": None,
299
  "eval_results": None,
300
  "gradient": None,
 
301
  "bias_audit": None,
 
302
  "created": datetime.now().isoformat(),
303
  }
304
  return {"session_id": sid}
@@ -321,6 +333,101 @@ async def update_session_meta(sid: str, meta: SessionMetaUpdate):
321
  return {"ok": True}
322
 
323
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
  @app.get("/api/session/{sid}")
325
  async def get_session(sid: str):
326
  if sid not in sessions:
@@ -797,6 +904,10 @@ async def counterfactual_stream(sid: str, ticket: str, request: Request):
797
  gradient_text, ranked_data = analyze_gradient(results, all_changes,
798
  goal_weights=goal_weights)
799
  session["gradient"] = gradient_text
 
 
 
 
800
 
801
  yield {"event": "complete", "data": json.dumps({
802
  "elapsed": round(elapsed, 1),
@@ -804,6 +915,8 @@ async def counterfactual_stream(sid: str, ticket: str, request: Request):
804
  "ranked": ranked_data,
805
  "results": results,
806
  "goal": goal if has_goal else None,
 
 
807
  })}
808
 
809
  return EventSourceResponse(event_generator(), ping=15)
@@ -995,6 +1108,30 @@ async def download_report(sid: str):
995
  lines.append(s["gradient"])
996
  lines.append("")
997
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
998
  # Bias audit
999
  if s.get("bias_audit"):
1000
  audit = s["bias_audit"]
 
39
  sys.path.insert(0, str(PROJECT_ROOT / "scripts"))
40
  from evaluate import evaluate_one, analyze as analyze_eval, SYSTEM_PROMPT, BIAS_CALIBRATION_ADDENDUM
41
  from counterfactual import probe_one, analyze_gradient, build_changes_block, compute_goal_weights
42
+ from ctr_calibrate import sigmoid, fit_platt_scaling, predict_ctr, ctr_derivative
43
  from generate_cohort import generate_segment
44
  from bias_audit import (
45
  reframe_entity, add_authority_signals, reorder_entity,
 
219
  parallel: int = 5
220
 
221
 
222
+ class CalibrationAnchor(BaseModel):
223
+ mean_score: float
224
+ metric_value: float
225
+
226
+ class CalibrationInput(BaseModel):
227
+ metric_name: str = "conversion rate"
228
+ metric_unit: str = "%"
229
+ anchors: list[CalibrationAnchor] # At least 1; first is "current entity"
230
+
231
  class SuggestSegmentsInput(BaseModel):
232
  entity_text: str
233
  audience_context: str
 
308
  "cohort": None,
309
  "eval_results": None,
310
  "gradient": None,
311
+ "gradient_ranked": None,
312
  "bias_audit": None,
313
+ "calibration": None,
314
  "created": datetime.now().isoformat(),
315
  }
316
  return {"session_id": sid}
 
333
  return {"ok": True}
334
 
335
 
336
+ @app.post("/api/calibrate/{sid}")
337
+ async def set_calibration(sid: str, cal: CalibrationInput):
338
+ """Set metric calibration for a session. Requires eval results."""
339
+ if sid not in sessions:
340
+ raise HTTPException(404, "Session not found")
341
+ session = sessions[sid]
342
+ if not session["eval_results"]:
343
+ raise HTTPException(400, "Run evaluation first")
344
+
345
+ anchors = [{"mean_score": a.mean_score, "metric_value": a.metric_value}
346
+ for a in cal.anchors if a.metric_value > 0]
347
+ if not anchors:
348
+ raise HTTPException(400, "Need at least one anchor with metric_value > 0")
349
+
350
+ if len(anchors) == 1:
351
+ # Single anchor: linear scaling. metric = k * mean_score
352
+ k = anchors[0]["metric_value"] / anchors[0]["mean_score"]
353
+ session["calibration"] = {
354
+ "metric_name": cal.metric_name,
355
+ "metric_unit": cal.metric_unit,
356
+ "method": "linear",
357
+ "k": k,
358
+ "anchors": anchors,
359
+ }
360
+ else:
361
+ # 2+ anchors: Platt scaling
362
+ platt_anchors = [{"mean_score": a["mean_score"], "real_ctr": a["metric_value"]}
363
+ for a in anchors]
364
+ a, b = fit_platt_scaling(platt_anchors)
365
+ session["calibration"] = {
366
+ "metric_name": cal.metric_name,
367
+ "metric_unit": cal.metric_unit,
368
+ "method": "platt",
369
+ "a": a, "b": b,
370
+ "anchors": anchors,
371
+ }
372
+
373
+ # Re-calibrate existing gradient if available
374
+ result = _apply_calibration(session)
375
+ return {"ok": True, "calibration": session["calibration"], "calibrated_gradient": result}
376
+
377
+
378
+ @app.delete("/api/calibrate/{sid}")
379
+ async def clear_calibration(sid: str):
380
+ """Remove metric calibration from a session."""
381
+ if sid not in sessions:
382
+ raise HTTPException(404, "Session not found")
383
+ sessions[sid]["calibration"] = None
384
+ return {"ok": True}
385
+
386
+
387
+ def _apply_calibration(session):
388
+ """Apply calibration to existing gradient data. Returns calibrated ranked list or None."""
389
+ cal = session.get("calibration")
390
+ ranked = session.get("gradient_ranked")
391
+ if not cal or not ranked:
392
+ return None
393
+
394
+ valid = [r for r in (session.get("eval_results") or []) if r and "score" in r]
395
+ if not valid:
396
+ return None
397
+ mean_score = sum(r["score"] for r in valid) / len(valid)
398
+
399
+ if cal["method"] == "linear":
400
+ k = cal["k"]
401
+ current_metric = k * mean_score
402
+ result = []
403
+ for r in ranked:
404
+ metric_delta = r["avg_delta"] * k
405
+ result.append({
406
+ "id": r["id"],
407
+ "label": r["label"],
408
+ "avg_delta": r["avg_delta"],
409
+ "metric_delta": round(metric_delta, 4),
410
+ "predicted_metric": round(current_metric + metric_delta, 4),
411
+ })
412
+ return {"current_metric": round(current_metric, 4), "items": result}
413
+ elif cal["method"] == "platt":
414
+ a, b = cal["a"], cal["b"]
415
+ current_metric = predict_ctr(a, b, mean_score)
416
+ deriv = ctr_derivative(a, b, mean_score)
417
+ result = []
418
+ for r in ranked:
419
+ metric_delta = r["avg_delta"] * deriv
420
+ result.append({
421
+ "id": r["id"],
422
+ "label": r["label"],
423
+ "avg_delta": r["avg_delta"],
424
+ "metric_delta": round(metric_delta, 4),
425
+ "predicted_metric": round(current_metric + metric_delta, 4),
426
+ })
427
+ return {"current_metric": round(current_metric, 4), "items": result}
428
+ return None
429
+
430
+
431
  @app.get("/api/session/{sid}")
432
  async def get_session(sid: str):
433
  if sid not in sessions:
 
904
  gradient_text, ranked_data = analyze_gradient(results, all_changes,
905
  goal_weights=goal_weights)
906
  session["gradient"] = gradient_text
907
+ session["gradient_ranked"] = ranked_data
908
+
909
+ # Apply metric calibration if set
910
+ calibrated = _apply_calibration(session)
911
 
912
  yield {"event": "complete", "data": json.dumps({
913
  "elapsed": round(elapsed, 1),
 
915
  "ranked": ranked_data,
916
  "results": results,
917
  "goal": goal if has_goal else None,
918
+ "calibrated": calibrated,
919
+ "calibration": session.get("calibration"),
920
  })}
921
 
922
  return EventSourceResponse(event_generator(), ping=15)
 
1108
  lines.append(s["gradient"])
1109
  lines.append("")
1110
 
1111
+ # Metric calibration
1112
+ if s.get("calibration"):
1113
+ cal = s["calibration"]
1114
+ lines.append("---\n")
1115
+ lines.append(f"## Metric Calibration ({cal['metric_name']})\n")
1116
+ lines.append(f"- **Method:** {cal['method']}")
1117
+ lines.append(f"- **Unit:** {cal['metric_unit']}")
1118
+ for anc in cal.get("anchors", []):
1119
+ lines.append(f"- Anchor: score {anc['mean_score']:.1f} = {anc['metric_value']}{cal['metric_unit']}")
1120
+
1121
+ calibrated = _apply_calibration(s)
1122
+ if calibrated:
1123
+ lines.append(f"\n**Current predicted {cal['metric_name']}:** "
1124
+ f"{calibrated['current_metric']}{cal['metric_unit']}\n")
1125
+ lines.append(f"| Change | Score Delta | {cal['metric_name']} Delta | Predicted |")
1126
+ lines.append("|--------|-----------|-------------|-----------|")
1127
+ for item in calibrated["items"]:
1128
+ lines.append(
1129
+ f"| {item['label']} | {item['avg_delta']:+.1f} | "
1130
+ f"{item['metric_delta']:+.4f}{cal['metric_unit']} | "
1131
+ f"{item['predicted_metric']}{cal['metric_unit']} |"
1132
+ )
1133
+ lines.append("")
1134
+
1135
  # Bias audit
1136
  if s.get("bias_audit"):
1137
  audit = s["bias_audit"]
web/static/index.html CHANGED
@@ -456,6 +456,44 @@
456
  <summary style="cursor:pointer;color:var(--text2);font-size:0.9rem">Full analysis</summary>
457
  <div class="results-details" id="evalAnalysis"></div>
458
  </details>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
459
  <div class="btn-row mt-16">
460
  <button onclick="runDirections()">Test what to change next</button>
461
  <button class="secondary" onclick="goToStep(3)">Check panel realism</button>
@@ -1241,7 +1279,8 @@ async function runDirections() {
1241
  return;
1242
  }
1243
 
1244
- renderGradientTable(d.results, suggestedChanges, d.ranked);
 
1245
  document.getElementById('gradientText').textContent = d.gradient;
1246
  document.getElementById('changesTested').textContent =
1247
  suggestedChanges.map(c => `${c.label}: ${c.description}`).join('\n');
@@ -1257,7 +1296,7 @@ async function runDirections() {
1257
  }
1258
  }
1259
 
1260
- function renderGradientTable(results, changes, ranked) {
1261
  // Use backend-provided ranked data (respects goal weights / VJP) when available,
1262
  // falling back to client-side aggregation only for legacy responses.
1263
  if (!ranked || !ranked.length) {
@@ -1299,6 +1338,37 @@ function renderGradientTable(results, changes, ranked) {
1299
  ranked.forEach(r => { if (!r.desc) r.desc = descs[r.id] || ''; });
1300
  }
1301
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1302
  const tbody = document.querySelector('#gradientTable tbody');
1303
  tbody.innerHTML = '';
1304
  ranked.forEach((r, i) => {
@@ -1308,6 +1378,25 @@ function renderGradientTable(results, changes, ranked) {
1308
  const barColor = avg >= 0 ? 'var(--green)' : 'var(--red)';
1309
  const rowId = `gradient-detail-${i}`;
1310
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1311
  // Summary row (clickable)
1312
  tbody.innerHTML += `
1313
  <tr onclick="document.getElementById('${rowId}').classList.toggle('hidden')" style="cursor:pointer">
@@ -1320,9 +1409,7 @@ function renderGradientTable(results, changes, ranked) {
1320
  ${avg >= 0 ? '+' : ''}${avg.toFixed(1)}
1321
  <span class="delta-bar" style="width:${barWidth}px;background:${barColor};margin-left:8px"></span>
1322
  </td>
1323
- <td style="color:var(--text2)">${r.min_delta >= 0 ? '+' : ''}${r.min_delta} to +${r.max_delta}</td>
1324
- <td style="color:var(--green)">${r.positive}</td>
1325
- <td style="color:var(--red)">${r.negative}</td>
1326
  </tr>
1327
  `;
1328
 
@@ -1352,7 +1439,7 @@ function renderGradientTable(results, changes, ranked) {
1352
 
1353
  tbody.innerHTML += `
1354
  <tr id="${rowId}" class="hidden">
1355
- <td colspan="6" style="padding:0;background:var(--bg);border-bottom:2px solid var(--border)">${detailHtml}</td>
1356
  </tr>
1357
  `;
1358
  });
@@ -1459,6 +1546,81 @@ function runBiasAudit() {
1459
  };
1460
  }
1461
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1462
  // ── Download report ──
1463
 
1464
  function downloadReport() {
 
456
  <summary style="cursor:pointer;color:var(--text2);font-size:0.9rem">Full analysis</summary>
457
  <div class="results-details" id="evalAnalysis"></div>
458
  </details>
459
+ <details class="mt-16">
460
+ <summary style="cursor:pointer;color:var(--text2);font-size:0.9rem">Anchor to a real metric (optional)</summary>
461
+ <div style="padding:12px 0">
462
+ <p style="font-size:0.8rem;color:var(--text2);margin-bottom:12px">
463
+ If you know the actual performance of this entity (e.g. CTR, conversion rate, revenue),
464
+ SGO can translate score changes into predicted metric changes.
465
+ </p>
466
+ <div style="display:flex;gap:10px;flex-wrap:wrap;align-items:flex-end">
467
+ <div class="field" style="flex:2;min-width:140px;margin-bottom:0">
468
+ <label>Metric name</label>
469
+ <input type="text" id="calMetricName" placeholder="e.g. CTR, conversion rate" value="CTR">
470
+ </div>
471
+ <div class="field" style="flex:1;min-width:80px;margin-bottom:0">
472
+ <label>Current value</label>
473
+ <input type="number" id="calMetricValue" step="any" placeholder="e.g. 2.1">
474
+ </div>
475
+ <div class="field" style="flex:1;min-width:60px;margin-bottom:0">
476
+ <label>Unit</label>
477
+ <input type="text" id="calMetricUnit" value="%" style="width:60px">
478
+ </div>
479
+ <button class="secondary" onclick="applyCalibration()" style="margin-bottom:0;white-space:nowrap">Apply</button>
480
+ <button class="secondary" onclick="clearCalibration()" id="calClearBtn" style="margin-bottom:0;display:none;padding:10px 12px;color:var(--red);border-color:var(--red)">Clear</button>
481
+ </div>
482
+ <div id="calStatus" class="hidden mt-12" style="font-size:0.85rem"></div>
483
+ <details id="calMultiAnchor" class="mt-12">
484
+ <summary style="cursor:pointer;color:var(--text2);font-size:0.8rem">Add more anchors for better calibration</summary>
485
+ <div style="padding:8px 0">
486
+ <p style="font-size:0.75rem;color:var(--text2);margin-bottom:8px">
487
+ With 2+ anchors (from other SGO runs with known metrics), calibration uses
488
+ Platt scaling instead of linear scaling for better accuracy.
489
+ </p>
490
+ <div id="extraAnchors"></div>
491
+ <button class="secondary" onclick="addAnchorRow()" style="padding:4px 12px;font-size:0.75rem">+ Add anchor</button>
492
+ </div>
493
+ </details>
494
+ </div>
495
+ </details>
496
+
497
  <div class="btn-row mt-16">
498
  <button onclick="runDirections()">Test what to change next</button>
499
  <button class="secondary" onclick="goToStep(3)">Check panel realism</button>
 
1279
  return;
1280
  }
1281
 
1282
+ if (d.calibration) currentCalibration = d.calibration;
1283
+ renderGradientTable(d.results, suggestedChanges, d.ranked, d.calibrated);
1284
  document.getElementById('gradientText').textContent = d.gradient;
1285
  document.getElementById('changesTested').textContent =
1286
  suggestedChanges.map(c => `${c.label}: ${c.description}`).join('\n');
 
1296
  }
1297
  }
1298
 
1299
+ function renderGradientTable(results, changes, ranked, calibrated) {
1300
  // Use backend-provided ranked data (respects goal weights / VJP) when available,
1301
  // falling back to client-side aggregation only for legacy responses.
1302
  if (!ranked || !ranked.length) {
 
1338
  ranked.forEach(r => { if (!r.desc) r.desc = descs[r.id] || ''; });
1339
  }
1340
 
1341
+ // Build calibration lookup if available
1342
+ const calLookup = {};
1343
+ const hasCal = calibrated && calibrated.items && calibrated.items.length > 0;
1344
+ if (hasCal) {
1345
+ calibrated.items.forEach(item => { calLookup[item.id] = item; });
1346
+ }
1347
+
1348
+ // Update table header
1349
+ const thead = document.querySelector('#gradientTable thead tr');
1350
+ thead.innerHTML = hasCal
1351
+ ? '<th>#</th><th>Change</th><th>Score Impact</th><th>Metric Impact</th><th>Predicted</th><th>Range</th>'
1352
+ : '<th>#</th><th>Change</th><th>Avg Impact</th><th>Range</th><th>Helps</th><th>Hurts</th>';
1353
+
1354
+ // Show calibration summary above table
1355
+ let calSummaryEl = document.getElementById('calSummary');
1356
+ if (!calSummaryEl) {
1357
+ calSummaryEl = document.createElement('div');
1358
+ calSummaryEl.id = 'calSummary';
1359
+ calSummaryEl.style.cssText = 'font-size:0.85rem;margin-bottom:12px';
1360
+ document.getElementById('gradientTable').parentElement.insertBefore(
1361
+ calSummaryEl, document.getElementById('gradientTable'));
1362
+ }
1363
+ if (hasCal && currentCalibration) {
1364
+ const mn = currentCalibration.metric_name || 'metric';
1365
+ const mu = currentCalibration.metric_unit || '';
1366
+ calSummaryEl.innerHTML = `<span style="color:var(--accent2)">Calibrated to ${esc(mn)}</span> — current: <strong>${calibrated.current_metric}${esc(mu)}</strong>`;
1367
+ calSummaryEl.classList.remove('hidden');
1368
+ } else {
1369
+ calSummaryEl.classList.add('hidden');
1370
+ }
1371
+
1372
  const tbody = document.querySelector('#gradientTable tbody');
1373
  tbody.innerHTML = '';
1374
  ranked.forEach((r, i) => {
 
1378
  const barColor = avg >= 0 ? 'var(--green)' : 'var(--red)';
1379
  const rowId = `gradient-detail-${i}`;
1380
 
1381
+ const calItem = calLookup[r.id];
1382
+ let calCols = '';
1383
+ if (hasCal && calItem) {
1384
+ const mu = (currentCalibration && currentCalibration.metric_unit) || '';
1385
+ const md = calItem.metric_delta;
1386
+ const mdCls = md >= 0 ? 'delta-pos' : 'delta-neg';
1387
+ calCols = `
1388
+ <td class="${mdCls}">${md >= 0 ? '+' : ''}${formatMetric(md, mu)}</td>
1389
+ <td style="font-weight:600">${formatMetric(calItem.predicted_metric, mu)}</td>
1390
+ `;
1391
+ } else if (hasCal) {
1392
+ calCols = '<td>—</td><td>—</td>';
1393
+ }
1394
+
1395
+ const rangeCols = hasCal ? '' : `
1396
+ <td style="color:var(--text2)">${r.min_delta >= 0 ? '+' : ''}${r.min_delta} to +${r.max_delta}</td>
1397
+ <td style="color:var(--green)">${r.positive}</td>
1398
+ <td style="color:var(--red)">${r.negative}</td>`;
1399
+
1400
  // Summary row (clickable)
1401
  tbody.innerHTML += `
1402
  <tr onclick="document.getElementById('${rowId}').classList.toggle('hidden')" style="cursor:pointer">
 
1409
  ${avg >= 0 ? '+' : ''}${avg.toFixed(1)}
1410
  <span class="delta-bar" style="width:${barWidth}px;background:${barColor};margin-left:8px"></span>
1411
  </td>
1412
+ ${calCols}${rangeCols}
 
 
1413
  </tr>
1414
  `;
1415
 
 
1439
 
1440
  tbody.innerHTML += `
1441
  <tr id="${rowId}" class="hidden">
1442
+ <td colspan="${hasCal ? 6 : 6}" style="padding:0;background:var(--bg);border-bottom:2px solid var(--border)">${detailHtml}</td>
1443
  </tr>
1444
  `;
1445
  });
 
1546
  };
1547
  }
1548
 
1549
+ // ── Metric Calibration ──
1550
+
1551
+ let currentCalibration = null;
1552
+
1553
+ function formatMetric(value, unit) {
1554
+ if (unit === '%') return value.toFixed(2) + '%';
1555
+ if (unit === '$') return '$' + value.toFixed(2);
1556
+ return value.toFixed(4) + (unit ? ' ' + unit : '');
1557
+ }
1558
+
1559
+ function addAnchorRow() {
1560
+ const container = document.getElementById('extraAnchors');
1561
+ const idx = container.children.length;
1562
+ const row = document.createElement('div');
1563
+ row.style.cssText = 'display:flex;gap:8px;align-items:center;margin-bottom:6px';
1564
+ row.innerHTML = `
1565
+ <input type="number" step="any" placeholder="Mean score" style="flex:1;padding:6px;font-size:0.8rem" class="anchor-score">
1566
+ <span style="font-size:0.8rem;color:var(--text2)">=</span>
1567
+ <input type="number" step="any" placeholder="Metric value" style="flex:1;padding:6px;font-size:0.8rem" class="anchor-value">
1568
+ <button class="secondary" onclick="this.parentElement.remove()" style="padding:4px 8px;font-size:0.75rem">x</button>
1569
+ `;
1570
+ container.appendChild(row);
1571
+ }
1572
+
1573
+ async function applyCalibration() {
1574
+ if (!sessionId) return alert('Run evaluation first.');
1575
+ const metricName = document.getElementById('calMetricName').value.trim() || 'metric';
1576
+ const metricValue = parseFloat(document.getElementById('calMetricValue').value);
1577
+ const metricUnit = document.getElementById('calMetricUnit').value.trim() || '';
1578
+
1579
+ if (!metricValue || metricValue <= 0) return alert('Enter a positive metric value.');
1580
+
1581
+ // Get the current mean score from eval results
1582
+ const valid = (evalResultsData || []).filter(r => r && r.score);
1583
+ if (!valid.length) return alert('No evaluation data.');
1584
+ const meanScore = valid.reduce((s, r) => s + r.score, 0) / valid.length;
1585
+
1586
+ // Build anchors: current entity + any extra
1587
+ const anchors = [{mean_score: meanScore, metric_value: metricValue}];
1588
+ document.querySelectorAll('#extraAnchors > div').forEach(row => {
1589
+ const score = parseFloat(row.querySelector('.anchor-score').value);
1590
+ const value = parseFloat(row.querySelector('.anchor-value').value);
1591
+ if (score > 0 && value > 0) anchors.push({mean_score: score, metric_value: value});
1592
+ });
1593
+
1594
+ try {
1595
+ const resp = await fetch(`/api/calibrate/${sessionId}`, {
1596
+ method: 'POST',
1597
+ headers: llmHeaders(),
1598
+ body: JSON.stringify({metric_name: metricName, metric_unit: metricUnit, anchors}),
1599
+ });
1600
+ const data = await resp.json();
1601
+ if (!resp.ok) throw new Error(data.detail || 'Calibration failed');
1602
+
1603
+ currentCalibration = data.calibration;
1604
+ const status = document.getElementById('calStatus');
1605
+ const method = anchors.length === 1 ? 'linear scaling' : 'Platt scaling';
1606
+ status.innerHTML = `<span style="color:var(--green)">Calibrated (${esc(method)})</span> — gradient will show ${esc(metricName)} deltas`;
1607
+ status.classList.remove('hidden');
1608
+ document.getElementById('calClearBtn').style.display = '';
1609
+ } catch (e) {
1610
+ const status = document.getElementById('calStatus');
1611
+ status.innerHTML = `<span style="color:var(--red)">Error: ${esc(e.message)}</span>`;
1612
+ status.classList.remove('hidden');
1613
+ }
1614
+ }
1615
+
1616
+ async function clearCalibration() {
1617
+ if (!sessionId) return;
1618
+ await fetch(`/api/calibrate/${sessionId}`, {method: 'DELETE', headers: llmHeaders()});
1619
+ currentCalibration = null;
1620
+ document.getElementById('calStatus').classList.add('hidden');
1621
+ document.getElementById('calClearBtn').style.display = 'none';
1622
+ }
1623
+
1624
  // ── Download report ──
1625
 
1626
  function downloadReport() {