Nomearod Claude Opus 4.6 (1M context) commited on
Commit
b9c2c17
Β·
1 Parent(s): feb0afe

fix: address 5 review issues in dashboard frontend

Browse files

1. XSS: replace all innerHTML with textContent/DOM construction
where server data is interpolated (retrieval items, refusal
display, iteration nodes, running-on label)
2. Wire showRetrievalRefusal: orchestrator now emits refused flag
and refusal_threshold in retrieval stage done event; frontend
detects and renders grounded refusal display
3. Provider toggle: now display-only, reflects server config from
meta event instead of pretending to switch providers
4. PII badge: pii_redactions_count threaded through orchestrator
_orchestrator_done -> route handler done event -> frontend
5. Cache HTML: index.html read once at first request, not on
every / hit

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

agent_bench/agents/orchestrator.py CHANGED
@@ -198,6 +198,7 @@ class Orchestrator:
198
  tools = self.registry.get_definitions()
199
  all_sources: list[str] = []
200
  all_source_chunks: list[str] = []
 
201
  total_cost = 0.0
202
  total_input_tokens = 0
203
  total_output_tokens = 0
@@ -260,17 +261,31 @@ class Orchestrator:
260
 
261
  if tc.name == "search_documents":
262
  pre_rerank = result.metadata.get("pre_rerank_count", 0)
 
263
 
264
  # --- Retrieval stage: done ---
265
- yield StreamEvent(type="stage", metadata={
266
- "stage": "retrieval", "status": "done", "iteration": iteration,
 
267
  "chunks_pre_rerank": pre_rerank,
268
- })
 
 
 
 
 
 
 
 
 
 
 
269
 
270
  # --- Reranking stage (already completed inside tool execution) ---
271
- if pre_rerank > 0:
272
  yield StreamEvent(type="stage", metadata={
273
- "stage": "reranking", "status": "done", "iteration": iteration,
 
274
  "chunks": result.metadata.get("chunks", []),
275
  })
276
 
@@ -280,6 +295,9 @@ class Orchestrator:
280
  all_source_chunks.extend(
281
  result.metadata["source_chunks"]
282
  )
 
 
 
283
 
284
  # Max iterations hit β€” force text answer without tools
285
  # (same pattern as run(): explicit call after loop)
@@ -320,6 +338,7 @@ class Orchestrator:
320
  "tokens_out": total_output_tokens,
321
  "iterations": iteration if iteration else 1,
322
  "source_chunks": all_source_chunks,
 
323
  },
324
  )
325
 
 
198
  tools = self.registry.get_definitions()
199
  all_sources: list[str] = []
200
  all_source_chunks: list[str] = []
201
+ total_pii_redactions = 0
202
  total_cost = 0.0
203
  total_input_tokens = 0
204
  total_output_tokens = 0
 
261
 
262
  if tc.name == "search_documents":
263
  pre_rerank = result.metadata.get("pre_rerank_count", 0)
264
+ refused = result.metadata.get("refused", False)
265
 
266
  # --- Retrieval stage: done ---
267
+ retrieval_done_meta: dict = {
268
+ "stage": "retrieval", "status": "done",
269
+ "iteration": iteration,
270
  "chunks_pre_rerank": pre_rerank,
271
+ }
272
+ if refused:
273
+ retrieval_done_meta["refused"] = True
274
+ retrieval_done_meta["refusal_threshold"] = (
275
+ result.metadata.get("refusal_threshold", 0)
276
+ )
277
+ retrieval_done_meta["chunks"] = (
278
+ result.metadata.get("chunks", [])
279
+ )
280
+ yield StreamEvent(
281
+ type="stage", metadata=retrieval_done_meta,
282
+ )
283
 
284
  # --- Reranking stage (already completed inside tool execution) ---
285
+ if pre_rerank > 0 and not refused:
286
  yield StreamEvent(type="stage", metadata={
287
+ "stage": "reranking", "status": "done",
288
+ "iteration": iteration,
289
  "chunks": result.metadata.get("chunks", []),
290
  })
291
 
 
295
  all_source_chunks.extend(
296
  result.metadata["source_chunks"]
297
  )
298
+ total_pii_redactions += result.metadata.get(
299
+ "pii_redactions_count", 0,
300
+ )
301
 
302
  # Max iterations hit β€” force text answer without tools
303
  # (same pattern as run(): explicit call after loop)
 
338
  "tokens_out": total_output_tokens,
339
  "iterations": iteration if iteration else 1,
340
  "source_chunks": all_source_chunks,
341
+ "pii_redactions_count": total_pii_redactions,
342
  },
343
  )
344
 
agent_bench/serving/routes.py CHANGED
@@ -21,15 +21,26 @@ from agent_bench.serving.schemas import (
21
  router = APIRouter()
22
 
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  @router.get("/")
25
  async def root() -> Response:
26
  """Showcase landing page with live RAG dashboard."""
27
- from pathlib import Path
28
-
29
  from starlette.responses import HTMLResponse
30
 
31
- html_path = Path(__file__).parent / "static" / "index.html"
32
- return HTMLResponse(content=html_path.read_text())
33
 
34
 
35
  @router.post("/ask", response_model=AskResponse)
@@ -283,6 +294,9 @@ async def ask_stream(body: AskRequest, request: Request) -> StreamingResponse:
283
  "tokens_out": done_meta.get("tokens_out", 0),
284
  "cost": done_meta.get("estimated_cost_usd", 0.0),
285
  "iterations": done_meta.get("iterations", 1),
 
 
 
286
  }).to_sse()
287
 
288
  # Record metrics and persist session
 
21
  router = APIRouter()
22
 
23
 
24
+ _LANDING_HTML: str | None = None
25
+
26
+
27
+ def _get_landing_html() -> str:
28
+ """Read and cache index.html on first call."""
29
+ global _LANDING_HTML # noqa: PLW0603
30
+ if _LANDING_HTML is None:
31
+ from pathlib import Path
32
+
33
+ html_path = Path(__file__).parent / "static" / "index.html"
34
+ _LANDING_HTML = html_path.read_text()
35
+ return _LANDING_HTML
36
+
37
+
38
  @router.get("/")
39
  async def root() -> Response:
40
  """Showcase landing page with live RAG dashboard."""
 
 
41
  from starlette.responses import HTMLResponse
42
 
43
+ return HTMLResponse(content=_get_landing_html())
 
44
 
45
 
46
  @router.post("/ask", response_model=AskResponse)
 
294
  "tokens_out": done_meta.get("tokens_out", 0),
295
  "cost": done_meta.get("estimated_cost_usd", 0.0),
296
  "iterations": done_meta.get("iterations", 1),
297
+ "pii_redactions_count": done_meta.get(
298
+ "pii_redactions_count", 0,
299
+ ),
300
  }).to_sse()
301
 
302
  # Record metrics and persist session
agent_bench/serving/static/index.html CHANGED
@@ -246,9 +246,9 @@ code{background:var(--code-bg);padding:2px 6px;border-radius:3px;font-size:0.9em
246
 
247
  <!-- Right: Pipeline + Retrieval + Security -->
248
  <div class="right-panel">
249
- <div class="provider-toggle">
250
- <button class="active" data-provider="openai" onclick="setProvider('openai')">OpenAI</button>
251
- <button data-provider="anthropic" onclick="setProvider('anthropic')">Anthropic</button>
252
  <span class="disabled-provider" title="See benchmark report">Mistral-7B</span>
253
  </div>
254
 
@@ -369,11 +369,11 @@ const state = {
369
  maxIterationSeen: 1,
370
  };
371
 
372
- /* ── Provider toggle ─── */
373
- function setProvider(p) {
374
- state.provider = p;
375
  document.querySelectorAll('.provider-toggle button').forEach(b => {
376
- b.classList.toggle('active', b.dataset.provider === p);
377
  });
378
  }
379
 
@@ -489,7 +489,13 @@ function updateStage(stage, status, meta) {
489
  updateInjectionBadge(v);
490
  }
491
  if (stage === 'retrieval' && status === 'done') {
492
- detail.textContent = meta.chunks_pre_rerank ? `${meta.chunks_pre_rerank} candidates` : 'done';
 
 
 
 
 
 
493
  }
494
  if (stage === 'reranking' && status === 'done') {
495
  const chunks = meta.chunks || [];
@@ -522,7 +528,20 @@ function addIterationNodes(iteration) {
522
  row.className = 'stage-row';
523
  row.dataset.stage = s;
524
  row.dataset.iteration = iteration;
525
- row.innerHTML = `<div class="stage-dot"></div><div class="stage-connector"></div><div class="stage-info"><div class="stage-name">${s === 'llm' ? 'LLM Synthesis' : s.charAt(0).toUpperCase() + s.slice(1)}</div><div class="stage-detail" data-detail="${s}"></div></div>`;
 
 
 
 
 
 
 
 
 
 
 
 
 
526
  stages.insertBefore(row, outputRow);
527
  });
528
  }
@@ -585,7 +604,16 @@ function updateRetrievalResults(chunks, meta) {
585
  const pct = topScore > 0 ? Math.max(20, (c.score / topScore) * 95) : 20;
586
  const item = document.createElement('div');
587
  item.className = 'retrieval-item';
588
- item.innerHTML = `<div class="bar-bg" style="width:${pct}%"></div><span class="source">${c.source}</span><span class="score">${c.score.toFixed(3)}</span>`;
 
 
 
 
 
 
 
 
 
589
  item.addEventListener('click', () => {
590
  item.classList.toggle('expanded');
591
  });
@@ -605,12 +633,22 @@ function showRetrievalRefusal(meta) {
605
  badge.className = 'badge badge-refusal';
606
  const chunks = meta.chunks || [];
607
  const top = chunks[0] || {};
608
- list.innerHTML = `<div class="retrieval-refusal">
609
- <div class="threshold-detail">Top candidate: ${top.source || 'none'} &mdash; ${(top.score||0).toFixed(3)}</div>
610
- <div class="threshold-detail">Threshold: ${meta.refusal_threshold || '0.02'}</div>
611
- <div>Decision: refuse &mdash; no chunk clears threshold</div>
612
- <div style="margin-top:8px;font-size:0.8rem;font-style:italic">This is the mechanism that keeps citation accuracy at 1.00.</div>
613
- </div>`;
 
 
 
 
 
 
 
 
 
 
614
  }
615
 
616
  function showRetrievalBlocked() {
@@ -634,7 +672,6 @@ async function streamAnswer(question) {
634
  let assistantEl = null;
635
  let answerText = '';
636
  let wasBlocked = false;
637
- let piiCount = 0;
638
 
639
  try {
640
  const resp = await fetch('/ask/stream', {
@@ -677,8 +714,13 @@ async function streamAnswer(question) {
677
  switch (event.type) {
678
  case 'meta': {
679
  const m = event.metadata || {};
680
- document.getElementById('runningOn').innerHTML =
681
- `Running on: <strong>${m.provider || '?'}</strong> ${m.model || ''}`;
 
 
 
 
 
682
  break;
683
  }
684
  case 'stage': {
@@ -703,8 +745,7 @@ async function streamAnswer(question) {
703
  case 'done': {
704
  const m = event.metadata || {};
705
  showStats(m);
706
- // Update PII badge from metadata if available
707
- updatePiiBadge(piiCount);
708
  break;
709
  }
710
  }
 
246
 
247
  <!-- Right: Pipeline + Retrieval + Security -->
248
  <div class="right-panel">
249
+ <div class="provider-toggle" id="providerToggle">
250
+ <button class="active" data-provider="openai">OpenAI</button>
251
+ <button data-provider="anthropic">Anthropic</button>
252
  <span class="disabled-provider" title="See benchmark report">Mistral-7B</span>
253
  </div>
254
 
 
369
  maxIterationSeen: 1,
370
  };
371
 
372
+ /* ── Provider toggle (display-only, reflects server config) ─── */
373
+ function showActiveProvider(provider) {
374
+ const p = (provider || '').toLowerCase();
375
  document.querySelectorAll('.provider-toggle button').forEach(b => {
376
+ b.classList.toggle('active', p.includes(b.dataset.provider));
377
  });
378
  }
379
 
 
489
  updateInjectionBadge(v);
490
  }
491
  if (stage === 'retrieval' && status === 'done') {
492
+ if (meta.refused) {
493
+ detail.textContent = 'refused (below threshold)';
494
+ dot.className = 'stage-dot done';
495
+ showRetrievalRefusal(meta);
496
+ } else {
497
+ detail.textContent = meta.chunks_pre_rerank ? `${meta.chunks_pre_rerank} candidates` : 'done';
498
+ }
499
  }
500
  if (stage === 'reranking' && status === 'done') {
501
  const chunks = meta.chunks || [];
 
528
  row.className = 'stage-row';
529
  row.dataset.stage = s;
530
  row.dataset.iteration = iteration;
531
+ const dot = document.createElement('div');
532
+ dot.className = 'stage-dot';
533
+ const conn = document.createElement('div');
534
+ conn.className = 'stage-connector';
535
+ const info = document.createElement('div');
536
+ info.className = 'stage-info';
537
+ const name = document.createElement('div');
538
+ name.className = 'stage-name';
539
+ name.textContent = s === 'llm' ? 'LLM Synthesis' : s.charAt(0).toUpperCase() + s.slice(1);
540
+ const detail = document.createElement('div');
541
+ detail.className = 'stage-detail';
542
+ detail.dataset.detail = s;
543
+ info.append(name, detail);
544
+ row.append(dot, conn, info);
545
  stages.insertBefore(row, outputRow);
546
  });
547
  }
 
604
  const pct = topScore > 0 ? Math.max(20, (c.score / topScore) * 95) : 20;
605
  const item = document.createElement('div');
606
  item.className = 'retrieval-item';
607
+ const bar = document.createElement('div');
608
+ bar.className = 'bar-bg';
609
+ bar.style.width = pct + '%';
610
+ const src = document.createElement('span');
611
+ src.className = 'source';
612
+ src.textContent = c.source;
613
+ const sc = document.createElement('span');
614
+ sc.className = 'score';
615
+ sc.textContent = c.score.toFixed(3);
616
+ item.append(bar, src, sc);
617
  item.addEventListener('click', () => {
618
  item.classList.toggle('expanded');
619
  });
 
633
  badge.className = 'badge badge-refusal';
634
  const chunks = meta.chunks || [];
635
  const top = chunks[0] || {};
636
+ const container = document.createElement('div');
637
+ container.className = 'retrieval-refusal';
638
+ const d1 = document.createElement('div');
639
+ d1.className = 'threshold-detail';
640
+ d1.textContent = `Top candidate: ${top.source || 'none'} \u2014 ${(top.score||0).toFixed(3)}`;
641
+ const d2 = document.createElement('div');
642
+ d2.className = 'threshold-detail';
643
+ d2.textContent = `Threshold: ${meta.refusal_threshold || '0.02'}`;
644
+ const d3 = document.createElement('div');
645
+ d3.textContent = 'Decision: refuse \u2014 no chunk clears threshold';
646
+ const d4 = document.createElement('div');
647
+ d4.style.cssText = 'margin-top:8px;font-size:0.8rem;font-style:italic';
648
+ d4.textContent = 'This is the mechanism that keeps citation accuracy at 1.00.';
649
+ container.append(d1, d2, d3, d4);
650
+ list.innerHTML = '';
651
+ list.appendChild(container);
652
  }
653
 
654
  function showRetrievalBlocked() {
 
672
  let assistantEl = null;
673
  let answerText = '';
674
  let wasBlocked = false;
 
675
 
676
  try {
677
  const resp = await fetch('/ask/stream', {
 
714
  switch (event.type) {
715
  case 'meta': {
716
  const m = event.metadata || {};
717
+ const ro = document.getElementById('runningOn');
718
+ ro.textContent = '';
719
+ ro.append('Running on: ');
720
+ const strong = document.createElement('strong');
721
+ strong.textContent = m.provider || '?';
722
+ ro.append(strong, ' ' + (m.model || ''));
723
+ showActiveProvider(m.provider);
724
  break;
725
  }
726
  case 'stage': {
 
745
  case 'done': {
746
  const m = event.metadata || {};
747
  showStats(m);
748
+ updatePiiBadge(m.pii_redactions_count || 0);
 
749
  break;
750
  }
751
  }