Eric Xu commited on
Commit
9166125
·
unverified ·
1 Parent(s): a8d5d4c

Redesign CTR calibration for marketer workflow — metric input upfront, not buried

Browse files

Move "Anchor to a real metric" from a hidden details section inside eval results
to a first-class input field next to the goal. Default to CTR with a dropdown
for other metrics (conversion rate, open rate, revenue, custom).

- Auto-apply calibration when eval completes if metric value is set
- Debounced re-apply on metric value/unit changes
- Table headers use actual metric name ("CTR Impact", not "Metric Impact")
- Keep Helps/Hurts columns alongside metric columns (was replacing them)
- Clear calibration re-renders gradient table back to score-only view
- Fix score:0 truthiness bug in frontend mean calculation
- Guard against division by zero in linear scaling (mean_score > 0)
- Tighten backend score filtering to isinstance check

Files changed (2) hide show
  1. web/app.py +3 -1
  2. web/static/index.html +94 -74
web/app.py CHANGED
@@ -346,6 +346,8 @@ async def set_calibration(sid: str, cal: CalibrationInput):
346
  for a in cal.anchors if a.metric_value > 0]
347
  if not anchors:
348
  raise HTTPException(400, "Need at least one anchor with metric_value > 0")
 
 
349
 
350
  if len(anchors) == 1:
351
  # Single anchor: linear scaling. metric = k * mean_score
@@ -391,7 +393,7 @@ def _apply_calibration(session):
391
  if not cal or not ranked:
392
  return None
393
 
394
- valid = [r for r in (session.get("eval_results") or []) if r and "score" in r]
395
  if not valid:
396
  return None
397
  mean_score = sum(r["score"] for r in valid) / len(valid)
 
346
  for a in cal.anchors if a.metric_value > 0]
347
  if not anchors:
348
  raise HTTPException(400, "Need at least one anchor with metric_value > 0")
349
+ if any(a["mean_score"] <= 0 for a in anchors):
350
+ raise HTTPException(400, "Mean score must be positive")
351
 
352
  if len(anchors) == 1:
353
  # Single anchor: linear scaling. metric = k * mean_score
 
393
  if not cal or not ranked:
394
  return None
395
 
396
+ valid = [r for r in (session.get("eval_results") or []) if r and isinstance(r.get("score"), (int, float))]
397
  if not valid:
398
  return None
399
  mean_score = sum(r["score"] for r in valid) / len(valid)
web/static/index.html CHANGED
@@ -422,6 +422,28 @@
422
  <input type="text" id="cohortDesc" placeholder="e.g. 'Engineering managers at mid-stage startups' or 'US consumers aged 25-45'">
423
  </div>
424
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
425
  <details class="mb-8">
426
  <summary style="cursor:pointer;color:var(--text2);font-size:0.85rem">Panel settings</summary>
427
  <div style="padding:12px 0">
@@ -456,44 +478,6 @@
456
  <summary style="cursor:pointer;color:var(--text2);font-size:0.9rem">Full analysis</summary>
457
  <div class="results-details" id="evalAnalysis"></div>
458
  </details>
459
- <details class="mt-16">
460
- <summary style="cursor:pointer;color:var(--text2);font-size:0.9rem">Anchor to a real metric (optional)</summary>
461
- <div style="padding:12px 0">
462
- <p style="font-size:0.8rem;color:var(--text2);margin-bottom:12px">
463
- If you know the actual performance of this entity (e.g. CTR, conversion rate, revenue),
464
- SGO can translate score changes into predicted metric changes.
465
- </p>
466
- <div style="display:flex;gap:10px;flex-wrap:wrap;align-items:flex-end">
467
- <div class="field" style="flex:2;min-width:140px;margin-bottom:0">
468
- <label>Metric name</label>
469
- <input type="text" id="calMetricName" placeholder="e.g. CTR, conversion rate" value="CTR">
470
- </div>
471
- <div class="field" style="flex:1;min-width:80px;margin-bottom:0">
472
- <label>Current value</label>
473
- <input type="number" id="calMetricValue" step="any" placeholder="e.g. 2.1">
474
- </div>
475
- <div class="field" style="flex:1;min-width:60px;margin-bottom:0">
476
- <label>Unit</label>
477
- <input type="text" id="calMetricUnit" value="%" style="width:60px">
478
- </div>
479
- <button class="secondary" onclick="applyCalibration()" style="margin-bottom:0;white-space:nowrap">Apply</button>
480
- <button class="secondary" onclick="clearCalibration()" id="calClearBtn" style="margin-bottom:0;display:none;padding:10px 12px;color:var(--red);border-color:var(--red)">Clear</button>
481
- </div>
482
- <div id="calStatus" class="hidden mt-12" style="font-size:0.85rem"></div>
483
- <details id="calMultiAnchor" class="mt-12">
484
- <summary style="cursor:pointer;color:var(--text2);font-size:0.8rem">Add more anchors for better calibration</summary>
485
- <div style="padding:8px 0">
486
- <p style="font-size:0.75rem;color:var(--text2);margin-bottom:8px">
487
- With 2+ anchors (from other SGO runs with known metrics), calibration uses
488
- Platt scaling instead of linear scaling for better accuracy.
489
- </p>
490
- <div id="extraAnchors"></div>
491
- <button class="secondary" onclick="addAnchorRow()" style="padding:4px 12px;font-size:0.75rem">+ Add anchor</button>
492
- </div>
493
- </details>
494
- </div>
495
- </details>
496
-
497
  <div class="btn-row mt-16">
498
  <button onclick="runDirections()">Test what to change next</button>
499
  <button class="secondary" onclick="goToStep(3)">Check panel realism</button>
@@ -771,6 +755,7 @@ This only works because we have a strong product manager who keeps the backlog p
771
 
772
  let sessionId = null;
773
  let evalResultsData = null;
 
774
 
775
  // LLM credentials — stored only in browser JS memory, never persisted
776
  let llmApiKey = '';
@@ -1149,6 +1134,11 @@ async function runFullPipeline() {
1149
  document.getElementById('negCount').textContent = d.negative;
1150
  document.getElementById('evalAnalysis').textContent = d.analysis;
1151
  document.getElementById('evalResults').classList.remove('hidden');
 
 
 
 
 
1152
  resolve();
1153
  });
1154
 
@@ -1280,6 +1270,7 @@ async function runDirections() {
1280
  }
1281
 
1282
  if (d.calibration) currentCalibration = d.calibration;
 
1283
  renderGradientTable(d.results, suggestedChanges, d.ranked, d.calibrated);
1284
  document.getElementById('gradientText').textContent = d.gradient;
1285
  document.getElementById('changesTested').textContent =
@@ -1347,8 +1338,9 @@ function renderGradientTable(results, changes, ranked, calibrated) {
1347
 
1348
  // Update table header
1349
  const thead = document.querySelector('#gradientTable thead tr');
 
1350
  thead.innerHTML = hasCal
1351
- ? '<th>#</th><th>Change</th><th>Score Impact</th><th>Metric Impact</th><th>Predicted</th><th>Range</th>'
1352
  : '<th>#</th><th>Change</th><th>Avg Impact</th><th>Range</th><th>Helps</th><th>Hurts</th>';
1353
 
1354
  // Show calibration summary above table
@@ -1392,10 +1384,12 @@ function renderGradientTable(results, changes, ranked, calibrated) {
1392
  calCols = '<td>—</td><td>—</td>';
1393
  }
1394
 
1395
- const rangeCols = hasCal ? '' : `
1396
- <td style="color:var(--text2)">${r.min_delta >= 0 ? '+' : ''}${r.min_delta} to +${r.max_delta}</td>
1397
- <td style="color:var(--green)">${r.positive}</td>
1398
- <td style="color:var(--red)">${r.negative}</td>`;
 
 
1399
 
1400
  // Summary row (clickable)
1401
  tbody.innerHTML += `
@@ -1439,7 +1433,7 @@ function renderGradientTable(results, changes, ranked, calibrated) {
1439
 
1440
  tbody.innerHTML += `
1441
  <tr id="${rowId}" class="hidden">
1442
- <td colspan="${hasCal ? 6 : 6}" style="padding:0;background:var(--bg);border-bottom:2px solid var(--border)">${detailHtml}</td>
1443
  </tr>
1444
  `;
1445
  });
@@ -1556,40 +1550,55 @@ function formatMetric(value, unit) {
1556
  return value.toFixed(4) + (unit ? ' ' + unit : '');
1557
  }
1558
 
1559
- function addAnchorRow() {
1560
- const container = document.getElementById('extraAnchors');
1561
- const idx = container.children.length;
1562
- const row = document.createElement('div');
1563
- row.style.cssText = 'display:flex;gap:8px;align-items:center;margin-bottom:6px';
1564
- row.innerHTML = `
1565
- <input type="number" step="any" placeholder="Mean score" style="flex:1;padding:6px;font-size:0.8rem" class="anchor-score">
1566
- <span style="font-size:0.8rem;color:var(--text2)">=</span>
1567
- <input type="number" step="any" placeholder="Metric value" style="flex:1;padding:6px;font-size:0.8rem" class="anchor-value">
1568
- <button class="secondary" onclick="this.parentElement.remove()" style="padding:4px 8px;font-size:0.75rem">x</button>
1569
- `;
1570
- container.appendChild(row);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1571
  }
1572
 
1573
  async function applyCalibration() {
1574
- if (!sessionId) return alert('Run evaluation first.');
1575
- const metricName = document.getElementById('calMetricName').value.trim() || 'metric';
1576
  const metricValue = parseFloat(document.getElementById('calMetricValue').value);
1577
  const metricUnit = document.getElementById('calMetricUnit').value.trim() || '';
1578
 
1579
- if (!metricValue || metricValue <= 0) return alert('Enter a positive metric value.');
1580
 
1581
- // Get the current mean score from eval results
1582
- const valid = (evalResultsData || []).filter(r => r && r.score);
1583
- if (!valid.length) return alert('No evaluation data.');
1584
- const meanScore = valid.reduce((s, r) => s + r.score, 0) / valid.length;
1585
 
1586
- // Build anchors: current entity + any extra
1587
  const anchors = [{mean_score: meanScore, metric_value: metricValue}];
1588
- document.querySelectorAll('#extraAnchors > div').forEach(row => {
1589
- const score = parseFloat(row.querySelector('.anchor-score').value);
1590
- const value = parseFloat(row.querySelector('.anchor-value').value);
1591
- if (score > 0 && value > 0) anchors.push({mean_score: score, metric_value: value});
1592
- });
1593
 
1594
  try {
1595
  const resp = await fetch(`/api/calibrate/${sessionId}`, {
@@ -1602,13 +1611,16 @@ async function applyCalibration() {
1602
 
1603
  currentCalibration = data.calibration;
1604
  const status = document.getElementById('calStatus');
1605
- const method = anchors.length === 1 ? 'linear scaling' : 'Platt scaling';
1606
- status.innerHTML = `<span style="color:var(--green)">Calibrated (${esc(method)})</span> — gradient will show ${esc(metricName)} deltas`;
1607
  status.classList.remove('hidden');
1608
- document.getElementById('calClearBtn').style.display = '';
 
 
 
 
1609
  } catch (e) {
1610
  const status = document.getElementById('calStatus');
1611
- status.innerHTML = `<span style="color:var(--red)">Error: ${esc(e.message)}</span>`;
1612
  status.classList.remove('hidden');
1613
  }
1614
  }
@@ -1618,7 +1630,15 @@ async function clearCalibration() {
1618
  await fetch(`/api/calibrate/${sessionId}`, {method: 'DELETE', headers: llmHeaders()});
1619
  currentCalibration = null;
1620
  document.getElementById('calStatus').classList.add('hidden');
1621
- document.getElementById('calClearBtn').style.display = 'none';
 
 
 
 
 
 
 
 
1622
  }
1623
 
1624
  // ── Download report ──
 
422
  <input type="text" id="cohortDesc" placeholder="e.g. 'Engineering managers at mid-stage startups' or 'US consumers aged 25-45'">
423
  </div>
424
 
425
+ <div class="field" id="metricAnchorField">
426
+ <label>Know the current performance?</label>
427
+ <div style="display:flex;gap:8px;align-items:center;flex-wrap:wrap">
428
+ <select id="calMetricName" style="width:auto;min-width:100px;padding:8px 10px">
429
+ <option value="CTR">CTR</option>
430
+ <option value="conversion rate">Conversion rate</option>
431
+ <option value="open rate">Open rate</option>
432
+ <option value="revenue">Revenue</option>
433
+ <option value="">Custom...</option>
434
+ </select>
435
+ <input type="text" id="calMetricNameCustom" class="hidden" placeholder="Metric name"
436
+ style="width:120px;padding:8px 10px">
437
+ <input type="number" id="calMetricValue" step="any" placeholder="e.g. 2.1"
438
+ style="width:100px;padding:8px 10px">
439
+ <input type="text" id="calMetricUnit" value="%" style="width:50px;padding:8px 10px;text-align:center">
440
+ <div id="calStatus" class="hidden" style="font-size:0.85rem;margin-left:4px"></div>
441
+ </div>
442
+ <p style="font-size:0.75rem;color:var(--text2);margin-top:4px">
443
+ Optional — if set, SGO translates score changes into predicted metric changes.
444
+ </p>
445
+ </div>
446
+
447
  <details class="mb-8">
448
  <summary style="cursor:pointer;color:var(--text2);font-size:0.85rem">Panel settings</summary>
449
  <div style="padding:12px 0">
 
478
  <summary style="cursor:pointer;color:var(--text2);font-size:0.9rem">Full analysis</summary>
479
  <div class="results-details" id="evalAnalysis"></div>
480
  </details>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
481
  <div class="btn-row mt-16">
482
  <button onclick="runDirections()">Test what to change next</button>
483
  <button class="secondary" onclick="goToStep(3)">Check panel realism</button>
 
755
 
756
  let sessionId = null;
757
  let evalResultsData = null;
758
+ let lastGradientData = null;
759
 
760
  // LLM credentials — stored only in browser JS memory, never persisted
761
  let llmApiKey = '';
 
1134
  document.getElementById('negCount').textContent = d.negative;
1135
  document.getElementById('evalAnalysis').textContent = d.analysis;
1136
  document.getElementById('evalResults').classList.remove('hidden');
1137
+
1138
+ // Auto-apply calibration if user entered a metric value before running eval
1139
+ const calVal = parseFloat(document.getElementById('calMetricValue').value);
1140
+ if (calVal > 0) applyCalibration();
1141
+
1142
  resolve();
1143
  });
1144
 
 
1270
  }
1271
 
1272
  if (d.calibration) currentCalibration = d.calibration;
1273
+ lastGradientData = {results: d.results, changes: suggestedChanges, ranked: d.ranked};
1274
  renderGradientTable(d.results, suggestedChanges, d.ranked, d.calibrated);
1275
  document.getElementById('gradientText').textContent = d.gradient;
1276
  document.getElementById('changesTested').textContent =
 
1338
 
1339
  // Update table header
1340
  const thead = document.querySelector('#gradientTable thead tr');
1341
+ const mn = (currentCalibration && currentCalibration.metric_name) || 'Metric';
1342
  thead.innerHTML = hasCal
1343
+ ? `<th>#</th><th>Change</th><th>Score</th><th>${esc(mn)} Impact</th><th>Predicted ${esc(mn)}</th><th>Helps</th><th>Hurts</th>`
1344
  : '<th>#</th><th>Change</th><th>Avg Impact</th><th>Range</th><th>Helps</th><th>Hurts</th>';
1345
 
1346
  // Show calibration summary above table
 
1384
  calCols = '<td>—</td><td>—</td>';
1385
  }
1386
 
1387
+ const rangeCols = hasCal
1388
+ ? `<td style="color:var(--green)">${r.positive}</td>
1389
+ <td style="color:var(--red)">${r.negative}</td>`
1390
+ : `<td style="color:var(--text2)">${r.min_delta >= 0 ? '+' : ''}${r.min_delta} to +${r.max_delta}</td>
1391
+ <td style="color:var(--green)">${r.positive}</td>
1392
+ <td style="color:var(--red)">${r.negative}</td>`;
1393
 
1394
  // Summary row (clickable)
1395
  tbody.innerHTML += `
 
1433
 
1434
  tbody.innerHTML += `
1435
  <tr id="${rowId}" class="hidden">
1436
+ <td colspan="${hasCal ? 7 : 6}" style="padding:0;background:var(--bg);border-bottom:2px solid var(--border)">${detailHtml}</td>
1437
  </tr>
1438
  `;
1439
  });
 
1550
  return value.toFixed(4) + (unit ? ' ' + unit : '');
1551
  }
1552
 
1553
+ // Show/hide custom metric name input based on dropdown
1554
+ document.getElementById('calMetricName').addEventListener('change', function() {
1555
+ const custom = document.getElementById('calMetricNameCustom');
1556
+ if (this.value === '') {
1557
+ custom.classList.remove('hidden');
1558
+ custom.focus();
1559
+ } else {
1560
+ custom.classList.add('hidden');
1561
+ }
1562
+ applyCalibration();
1563
+ });
1564
+
1565
+ // Re-apply calibration when value or unit changes (debounced)
1566
+ let _calDebounce = null;
1567
+ function debouncedApplyCalibration() {
1568
+ clearTimeout(_calDebounce);
1569
+ _calDebounce = setTimeout(() => {
1570
+ const v = parseFloat(document.getElementById('calMetricValue').value);
1571
+ if (v > 0) applyCalibration();
1572
+ else if (currentCalibration) clearCalibration();
1573
+ }, 600);
1574
+ }
1575
+ document.getElementById('calMetricValue').addEventListener('input', debouncedApplyCalibration);
1576
+ document.getElementById('calMetricUnit').addEventListener('input', debouncedApplyCalibration);
1577
+
1578
+ function getMetricName() {
1579
+ const sel = document.getElementById('calMetricName').value;
1580
+ if (sel === '') return document.getElementById('calMetricNameCustom').value.trim() || 'metric';
1581
+ return sel;
1582
+ }
1583
+
1584
+ function getMeanScore() {
1585
+ const valid = (evalResultsData || []).filter(r => r && typeof r.score === 'number');
1586
+ if (!valid.length) return null;
1587
+ return valid.reduce((s, r) => s + r.score, 0) / valid.length;
1588
  }
1589
 
1590
  async function applyCalibration() {
1591
+ if (!sessionId) return;
1592
+ const metricName = getMetricName();
1593
  const metricValue = parseFloat(document.getElementById('calMetricValue').value);
1594
  const metricUnit = document.getElementById('calMetricUnit').value.trim() || '';
1595
 
1596
+ if (!metricValue || metricValue <= 0) return;
1597
 
1598
+ const meanScore = getMeanScore();
1599
+ if (!meanScore || meanScore <= 0) return;
 
 
1600
 
 
1601
  const anchors = [{mean_score: meanScore, metric_value: metricValue}];
 
 
 
 
 
1602
 
1603
  try {
1604
  const resp = await fetch(`/api/calibrate/${sessionId}`, {
 
1611
 
1612
  currentCalibration = data.calibration;
1613
  const status = document.getElementById('calStatus');
1614
+ status.innerHTML = `<span style="color:var(--green)">Anchored: score ${meanScore.toFixed(1)} = ${metricValue}${esc(metricUnit)} ${esc(metricName)}</span>`;
 
1615
  status.classList.remove('hidden');
1616
+
1617
+ // Re-render gradient table with calibration if it exists
1618
+ if (data.calibrated_gradient && lastGradientData) {
1619
+ renderGradientTable(lastGradientData.results, lastGradientData.changes, lastGradientData.ranked, data.calibrated_gradient);
1620
+ }
1621
  } catch (e) {
1622
  const status = document.getElementById('calStatus');
1623
+ status.innerHTML = `<span style="color:var(--red)">${esc(e.message)}</span>`;
1624
  status.classList.remove('hidden');
1625
  }
1626
  }
 
1630
  await fetch(`/api/calibrate/${sessionId}`, {method: 'DELETE', headers: llmHeaders()});
1631
  currentCalibration = null;
1632
  document.getElementById('calStatus').classList.add('hidden');
1633
+ document.getElementById('calMetricValue').value = '';
1634
+
1635
+ // Re-render gradient table without calibration
1636
+ if (lastGradientData) {
1637
+ renderGradientTable(lastGradientData.results, lastGradientData.changes, lastGradientData.ranked, null);
1638
+ }
1639
+ // Clear summary above gradient table
1640
+ const calSummaryEl = document.getElementById('calSummary');
1641
+ if (calSummaryEl) calSummaryEl.classList.add('hidden');
1642
  }
1643
 
1644
  // ── Download report ──