Spaces:
Running
Running
fix: address 5 review issues in dashboard frontend
Browse files1. XSS: replace all innerHTML with textContent/DOM construction
where server data is interpolated (retrieval items, refusal
display, iteration nodes, running-on label)
2. Wire showRetrievalRefusal: orchestrator now emits refused flag
and refusal_threshold in retrieval stage done event; frontend
detects and renders grounded refusal display
3. Provider toggle: now display-only, reflects server config from
meta event instead of pretending to switch providers
4. PII badge: pii_redactions_count threaded through orchestrator
_orchestrator_done -> route handler done event -> frontend
5. Cache HTML: index.html read once at first request, not on
every / hit
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
agent_bench/agents/orchestrator.py
CHANGED
|
@@ -198,6 +198,7 @@ class Orchestrator:
|
|
| 198 |
tools = self.registry.get_definitions()
|
| 199 |
all_sources: list[str] = []
|
| 200 |
all_source_chunks: list[str] = []
|
|
|
|
| 201 |
total_cost = 0.0
|
| 202 |
total_input_tokens = 0
|
| 203 |
total_output_tokens = 0
|
|
@@ -260,17 +261,31 @@ class Orchestrator:
|
|
| 260 |
|
| 261 |
if tc.name == "search_documents":
|
| 262 |
pre_rerank = result.metadata.get("pre_rerank_count", 0)
|
|
|
|
| 263 |
|
| 264 |
# --- Retrieval stage: done ---
|
| 265 |
-
|
| 266 |
-
"stage": "retrieval", "status": "done",
|
|
|
|
| 267 |
"chunks_pre_rerank": pre_rerank,
|
| 268 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
|
| 270 |
# --- Reranking stage (already completed inside tool execution) ---
|
| 271 |
-
if pre_rerank > 0:
|
| 272 |
yield StreamEvent(type="stage", metadata={
|
| 273 |
-
"stage": "reranking", "status": "done",
|
|
|
|
| 274 |
"chunks": result.metadata.get("chunks", []),
|
| 275 |
})
|
| 276 |
|
|
@@ -280,6 +295,9 @@ class Orchestrator:
|
|
| 280 |
all_source_chunks.extend(
|
| 281 |
result.metadata["source_chunks"]
|
| 282 |
)
|
|
|
|
|
|
|
|
|
|
| 283 |
|
| 284 |
# Max iterations hit β force text answer without tools
|
| 285 |
# (same pattern as run(): explicit call after loop)
|
|
@@ -320,6 +338,7 @@ class Orchestrator:
|
|
| 320 |
"tokens_out": total_output_tokens,
|
| 321 |
"iterations": iteration if iteration else 1,
|
| 322 |
"source_chunks": all_source_chunks,
|
|
|
|
| 323 |
},
|
| 324 |
)
|
| 325 |
|
|
|
|
| 198 |
tools = self.registry.get_definitions()
|
| 199 |
all_sources: list[str] = []
|
| 200 |
all_source_chunks: list[str] = []
|
| 201 |
+
total_pii_redactions = 0
|
| 202 |
total_cost = 0.0
|
| 203 |
total_input_tokens = 0
|
| 204 |
total_output_tokens = 0
|
|
|
|
| 261 |
|
| 262 |
if tc.name == "search_documents":
|
| 263 |
pre_rerank = result.metadata.get("pre_rerank_count", 0)
|
| 264 |
+
refused = result.metadata.get("refused", False)
|
| 265 |
|
| 266 |
# --- Retrieval stage: done ---
|
| 267 |
+
retrieval_done_meta: dict = {
|
| 268 |
+
"stage": "retrieval", "status": "done",
|
| 269 |
+
"iteration": iteration,
|
| 270 |
"chunks_pre_rerank": pre_rerank,
|
| 271 |
+
}
|
| 272 |
+
if refused:
|
| 273 |
+
retrieval_done_meta["refused"] = True
|
| 274 |
+
retrieval_done_meta["refusal_threshold"] = (
|
| 275 |
+
result.metadata.get("refusal_threshold", 0)
|
| 276 |
+
)
|
| 277 |
+
retrieval_done_meta["chunks"] = (
|
| 278 |
+
result.metadata.get("chunks", [])
|
| 279 |
+
)
|
| 280 |
+
yield StreamEvent(
|
| 281 |
+
type="stage", metadata=retrieval_done_meta,
|
| 282 |
+
)
|
| 283 |
|
| 284 |
# --- Reranking stage (already completed inside tool execution) ---
|
| 285 |
+
if pre_rerank > 0 and not refused:
|
| 286 |
yield StreamEvent(type="stage", metadata={
|
| 287 |
+
"stage": "reranking", "status": "done",
|
| 288 |
+
"iteration": iteration,
|
| 289 |
"chunks": result.metadata.get("chunks", []),
|
| 290 |
})
|
| 291 |
|
|
|
|
| 295 |
all_source_chunks.extend(
|
| 296 |
result.metadata["source_chunks"]
|
| 297 |
)
|
| 298 |
+
total_pii_redactions += result.metadata.get(
|
| 299 |
+
"pii_redactions_count", 0,
|
| 300 |
+
)
|
| 301 |
|
| 302 |
# Max iterations hit β force text answer without tools
|
| 303 |
# (same pattern as run(): explicit call after loop)
|
|
|
|
| 338 |
"tokens_out": total_output_tokens,
|
| 339 |
"iterations": iteration if iteration else 1,
|
| 340 |
"source_chunks": all_source_chunks,
|
| 341 |
+
"pii_redactions_count": total_pii_redactions,
|
| 342 |
},
|
| 343 |
)
|
| 344 |
|
agent_bench/serving/routes.py
CHANGED
|
@@ -21,15 +21,26 @@ from agent_bench.serving.schemas import (
|
|
| 21 |
router = APIRouter()
|
| 22 |
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
@router.get("/")
|
| 25 |
async def root() -> Response:
|
| 26 |
"""Showcase landing page with live RAG dashboard."""
|
| 27 |
-
from pathlib import Path
|
| 28 |
-
|
| 29 |
from starlette.responses import HTMLResponse
|
| 30 |
|
| 31 |
-
|
| 32 |
-
return HTMLResponse(content=html_path.read_text())
|
| 33 |
|
| 34 |
|
| 35 |
@router.post("/ask", response_model=AskResponse)
|
|
@@ -283,6 +294,9 @@ async def ask_stream(body: AskRequest, request: Request) -> StreamingResponse:
|
|
| 283 |
"tokens_out": done_meta.get("tokens_out", 0),
|
| 284 |
"cost": done_meta.get("estimated_cost_usd", 0.0),
|
| 285 |
"iterations": done_meta.get("iterations", 1),
|
|
|
|
|
|
|
|
|
|
| 286 |
}).to_sse()
|
| 287 |
|
| 288 |
# Record metrics and persist session
|
|
|
|
| 21 |
router = APIRouter()
|
| 22 |
|
| 23 |
|
| 24 |
+
_LANDING_HTML: str | None = None
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def _get_landing_html() -> str:
|
| 28 |
+
"""Read and cache index.html on first call."""
|
| 29 |
+
global _LANDING_HTML # noqa: PLW0603
|
| 30 |
+
if _LANDING_HTML is None:
|
| 31 |
+
from pathlib import Path
|
| 32 |
+
|
| 33 |
+
html_path = Path(__file__).parent / "static" / "index.html"
|
| 34 |
+
_LANDING_HTML = html_path.read_text()
|
| 35 |
+
return _LANDING_HTML
|
| 36 |
+
|
| 37 |
+
|
| 38 |
@router.get("/")
|
| 39 |
async def root() -> Response:
|
| 40 |
"""Showcase landing page with live RAG dashboard."""
|
|
|
|
|
|
|
| 41 |
from starlette.responses import HTMLResponse
|
| 42 |
|
| 43 |
+
return HTMLResponse(content=_get_landing_html())
|
|
|
|
| 44 |
|
| 45 |
|
| 46 |
@router.post("/ask", response_model=AskResponse)
|
|
|
|
| 294 |
"tokens_out": done_meta.get("tokens_out", 0),
|
| 295 |
"cost": done_meta.get("estimated_cost_usd", 0.0),
|
| 296 |
"iterations": done_meta.get("iterations", 1),
|
| 297 |
+
"pii_redactions_count": done_meta.get(
|
| 298 |
+
"pii_redactions_count", 0,
|
| 299 |
+
),
|
| 300 |
}).to_sse()
|
| 301 |
|
| 302 |
# Record metrics and persist session
|
agent_bench/serving/static/index.html
CHANGED
|
@@ -246,9 +246,9 @@ code{background:var(--code-bg);padding:2px 6px;border-radius:3px;font-size:0.9em
|
|
| 246 |
|
| 247 |
<!-- Right: Pipeline + Retrieval + Security -->
|
| 248 |
<div class="right-panel">
|
| 249 |
-
<div class="provider-toggle">
|
| 250 |
-
<button class="active" data-provider="openai"
|
| 251 |
-
<button data-provider="anthropic"
|
| 252 |
<span class="disabled-provider" title="See benchmark report">Mistral-7B</span>
|
| 253 |
</div>
|
| 254 |
|
|
@@ -369,11 +369,11 @@ const state = {
|
|
| 369 |
maxIterationSeen: 1,
|
| 370 |
};
|
| 371 |
|
| 372 |
-
/* ββ Provider toggle βββ */
|
| 373 |
-
function
|
| 374 |
-
|
| 375 |
document.querySelectorAll('.provider-toggle button').forEach(b => {
|
| 376 |
-
b.classList.toggle('active', b.dataset.provider
|
| 377 |
});
|
| 378 |
}
|
| 379 |
|
|
@@ -489,7 +489,13 @@ function updateStage(stage, status, meta) {
|
|
| 489 |
updateInjectionBadge(v);
|
| 490 |
}
|
| 491 |
if (stage === 'retrieval' && status === 'done') {
|
| 492 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 493 |
}
|
| 494 |
if (stage === 'reranking' && status === 'done') {
|
| 495 |
const chunks = meta.chunks || [];
|
|
@@ -522,7 +528,20 @@ function addIterationNodes(iteration) {
|
|
| 522 |
row.className = 'stage-row';
|
| 523 |
row.dataset.stage = s;
|
| 524 |
row.dataset.iteration = iteration;
|
| 525 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 526 |
stages.insertBefore(row, outputRow);
|
| 527 |
});
|
| 528 |
}
|
|
@@ -585,7 +604,16 @@ function updateRetrievalResults(chunks, meta) {
|
|
| 585 |
const pct = topScore > 0 ? Math.max(20, (c.score / topScore) * 95) : 20;
|
| 586 |
const item = document.createElement('div');
|
| 587 |
item.className = 'retrieval-item';
|
| 588 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 589 |
item.addEventListener('click', () => {
|
| 590 |
item.classList.toggle('expanded');
|
| 591 |
});
|
|
@@ -605,12 +633,22 @@ function showRetrievalRefusal(meta) {
|
|
| 605 |
badge.className = 'badge badge-refusal';
|
| 606 |
const chunks = meta.chunks || [];
|
| 607 |
const top = chunks[0] || {};
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 614 |
}
|
| 615 |
|
| 616 |
function showRetrievalBlocked() {
|
|
@@ -634,7 +672,6 @@ async function streamAnswer(question) {
|
|
| 634 |
let assistantEl = null;
|
| 635 |
let answerText = '';
|
| 636 |
let wasBlocked = false;
|
| 637 |
-
let piiCount = 0;
|
| 638 |
|
| 639 |
try {
|
| 640 |
const resp = await fetch('/ask/stream', {
|
|
@@ -677,8 +714,13 @@ async function streamAnswer(question) {
|
|
| 677 |
switch (event.type) {
|
| 678 |
case 'meta': {
|
| 679 |
const m = event.metadata || {};
|
| 680 |
-
document.getElementById('runningOn')
|
| 681 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 682 |
break;
|
| 683 |
}
|
| 684 |
case 'stage': {
|
|
@@ -703,8 +745,7 @@ async function streamAnswer(question) {
|
|
| 703 |
case 'done': {
|
| 704 |
const m = event.metadata || {};
|
| 705 |
showStats(m);
|
| 706 |
-
|
| 707 |
-
updatePiiBadge(piiCount);
|
| 708 |
break;
|
| 709 |
}
|
| 710 |
}
|
|
|
|
| 246 |
|
| 247 |
<!-- Right: Pipeline + Retrieval + Security -->
|
| 248 |
<div class="right-panel">
|
| 249 |
+
<div class="provider-toggle" id="providerToggle">
|
| 250 |
+
<button class="active" data-provider="openai">OpenAI</button>
|
| 251 |
+
<button data-provider="anthropic">Anthropic</button>
|
| 252 |
<span class="disabled-provider" title="See benchmark report">Mistral-7B</span>
|
| 253 |
</div>
|
| 254 |
|
|
|
|
| 369 |
maxIterationSeen: 1,
|
| 370 |
};
|
| 371 |
|
| 372 |
+
/* ββ Provider toggle (display-only, reflects server config) βββ */
|
| 373 |
+
function showActiveProvider(provider) {
|
| 374 |
+
const p = (provider || '').toLowerCase();
|
| 375 |
document.querySelectorAll('.provider-toggle button').forEach(b => {
|
| 376 |
+
b.classList.toggle('active', p.includes(b.dataset.provider));
|
| 377 |
});
|
| 378 |
}
|
| 379 |
|
|
|
|
| 489 |
updateInjectionBadge(v);
|
| 490 |
}
|
| 491 |
if (stage === 'retrieval' && status === 'done') {
|
| 492 |
+
if (meta.refused) {
|
| 493 |
+
detail.textContent = 'refused (below threshold)';
|
| 494 |
+
dot.className = 'stage-dot done';
|
| 495 |
+
showRetrievalRefusal(meta);
|
| 496 |
+
} else {
|
| 497 |
+
detail.textContent = meta.chunks_pre_rerank ? `${meta.chunks_pre_rerank} candidates` : 'done';
|
| 498 |
+
}
|
| 499 |
}
|
| 500 |
if (stage === 'reranking' && status === 'done') {
|
| 501 |
const chunks = meta.chunks || [];
|
|
|
|
| 528 |
row.className = 'stage-row';
|
| 529 |
row.dataset.stage = s;
|
| 530 |
row.dataset.iteration = iteration;
|
| 531 |
+
const dot = document.createElement('div');
|
| 532 |
+
dot.className = 'stage-dot';
|
| 533 |
+
const conn = document.createElement('div');
|
| 534 |
+
conn.className = 'stage-connector';
|
| 535 |
+
const info = document.createElement('div');
|
| 536 |
+
info.className = 'stage-info';
|
| 537 |
+
const name = document.createElement('div');
|
| 538 |
+
name.className = 'stage-name';
|
| 539 |
+
name.textContent = s === 'llm' ? 'LLM Synthesis' : s.charAt(0).toUpperCase() + s.slice(1);
|
| 540 |
+
const detail = document.createElement('div');
|
| 541 |
+
detail.className = 'stage-detail';
|
| 542 |
+
detail.dataset.detail = s;
|
| 543 |
+
info.append(name, detail);
|
| 544 |
+
row.append(dot, conn, info);
|
| 545 |
stages.insertBefore(row, outputRow);
|
| 546 |
});
|
| 547 |
}
|
|
|
|
| 604 |
const pct = topScore > 0 ? Math.max(20, (c.score / topScore) * 95) : 20;
|
| 605 |
const item = document.createElement('div');
|
| 606 |
item.className = 'retrieval-item';
|
| 607 |
+
const bar = document.createElement('div');
|
| 608 |
+
bar.className = 'bar-bg';
|
| 609 |
+
bar.style.width = pct + '%';
|
| 610 |
+
const src = document.createElement('span');
|
| 611 |
+
src.className = 'source';
|
| 612 |
+
src.textContent = c.source;
|
| 613 |
+
const sc = document.createElement('span');
|
| 614 |
+
sc.className = 'score';
|
| 615 |
+
sc.textContent = c.score.toFixed(3);
|
| 616 |
+
item.append(bar, src, sc);
|
| 617 |
item.addEventListener('click', () => {
|
| 618 |
item.classList.toggle('expanded');
|
| 619 |
});
|
|
|
|
| 633 |
badge.className = 'badge badge-refusal';
|
| 634 |
const chunks = meta.chunks || [];
|
| 635 |
const top = chunks[0] || {};
|
| 636 |
+
const container = document.createElement('div');
|
| 637 |
+
container.className = 'retrieval-refusal';
|
| 638 |
+
const d1 = document.createElement('div');
|
| 639 |
+
d1.className = 'threshold-detail';
|
| 640 |
+
d1.textContent = `Top candidate: ${top.source || 'none'} \u2014 ${(top.score||0).toFixed(3)}`;
|
| 641 |
+
const d2 = document.createElement('div');
|
| 642 |
+
d2.className = 'threshold-detail';
|
| 643 |
+
d2.textContent = `Threshold: ${meta.refusal_threshold || '0.02'}`;
|
| 644 |
+
const d3 = document.createElement('div');
|
| 645 |
+
d3.textContent = 'Decision: refuse \u2014 no chunk clears threshold';
|
| 646 |
+
const d4 = document.createElement('div');
|
| 647 |
+
d4.style.cssText = 'margin-top:8px;font-size:0.8rem;font-style:italic';
|
| 648 |
+
d4.textContent = 'This is the mechanism that keeps citation accuracy at 1.00.';
|
| 649 |
+
container.append(d1, d2, d3, d4);
|
| 650 |
+
list.innerHTML = '';
|
| 651 |
+
list.appendChild(container);
|
| 652 |
}
|
| 653 |
|
| 654 |
function showRetrievalBlocked() {
|
|
|
|
| 672 |
let assistantEl = null;
|
| 673 |
let answerText = '';
|
| 674 |
let wasBlocked = false;
|
|
|
|
| 675 |
|
| 676 |
try {
|
| 677 |
const resp = await fetch('/ask/stream', {
|
|
|
|
| 714 |
switch (event.type) {
|
| 715 |
case 'meta': {
|
| 716 |
const m = event.metadata || {};
|
| 717 |
+
const ro = document.getElementById('runningOn');
|
| 718 |
+
ro.textContent = '';
|
| 719 |
+
ro.append('Running on: ');
|
| 720 |
+
const strong = document.createElement('strong');
|
| 721 |
+
strong.textContent = m.provider || '?';
|
| 722 |
+
ro.append(strong, ' ' + (m.model || ''));
|
| 723 |
+
showActiveProvider(m.provider);
|
| 724 |
break;
|
| 725 |
}
|
| 726 |
case 'stage': {
|
|
|
|
| 745 |
case 'done': {
|
| 746 |
const m = event.metadata || {};
|
| 747 |
showStats(m);
|
| 748 |
+
updatePiiBadge(m.pii_redactions_count || 0);
|
|
|
|
| 749 |
break;
|
| 750 |
}
|
| 751 |
}
|