Spaces:

Nomearod
/

agentbench

Running

Nomearod Claude Opus 4.6 (1M context) commited on Mar 31

Commit

02f7f66

1 Parent(s): 14985f8

fix(security): output validation on /ask/stream, correct audit endpoint

- /ask/stream now runs output validation on the assembled answer
after streaming completes. PII in streamed output triggers a
"[Output filtered for safety]" SSE chunk.
- _write_audit() takes an endpoint parameter instead of hardcoding
"/ask". Stream audit records are labeled "/ask/stream".
- _write_audit() records output_validation independently of result
metadata, so streaming audit includes validation verdicts.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Files changed (2) hide show

agent_bench/serving/routes.py +48 -16
tests/test_security_integration.py +28 -1

agent_bench/serving/routes.py CHANGED Viewed

@@ -190,7 +190,10 @@ async def ask_stream(body: AskRequest, request: Request) -> StreamingResponse:
         sec_config = getattr(request.app.state.config, "security", None)
         action = sec_config.injection.action if sec_config else "block"
         if not verdict.safe and action == "block":
-            _write_audit(request, body, request_id, injection_verdict_data, blocked=True)
             from fastapi.responses import JSONResponse
             return JSONResponse(
                 status_code=403,
@@ -209,9 +212,12 @@ async def ask_stream(body: AskRequest, request: Request) -> StreamingResponse:
     start = time.perf_counter()
     async def event_generator():
         full_answer: list[str] = []
         cost_usd = 0.0
         async for event in orchestrator.run_stream(
             question=body.question,
             system_prompt=system_prompt,
@@ -219,24 +225,47 @@ async def ask_stream(body: AskRequest, request: Request) -> StreamingResponse:
             strategy=body.retrieval_strategy,
             history=history,
         ):
             if event.type == "chunk" and event.content:
                 full_answer.append(event.content)
             if event.type == "done" and event.metadata:
                 cost_usd = event.metadata.get("estimated_cost_usd", 0.0)
             yield event.to_sse()
         # Record metrics and persist session after streaming completes
         latency_ms = (time.perf_counter() - start) * 1000
         metrics.record(latency_ms=latency_ms, cost_usd=cost_usd)
         if body.session_id and conversation_store:
             conversation_store.append(body.session_id, "user", body.question)
-            conversation_store.append(
-                body.session_id, "assistant", "".join(full_answer)
-            )
         # --- Security: audit log for streaming ---
-        _write_audit(request, body, request_id, injection_verdict_data)
     return StreamingResponse(
         event_generator(),
@@ -317,6 +346,7 @@ def _write_audit(
     body: AskRequest,
     request_id: str,
     injection_verdict: dict,
     blocked: bool = False,
     result: object | None = None,
     output_verdict_data: dict | None = None,
@@ -332,22 +362,24 @@ def _write_audit(
         "request_id": request_id,
         "session_id": body.session_id,
         "client_ip": audit_logger.hash_ip(client_ip),
-        "endpoint": "/ask",
         "input_query": body.question,
         "injection_verdict": injection_verdict,
     }
     if blocked:
         record["blocked"] = True
-    elif result is not None:
-        record.update({
-            "retrieved_chunks": [s.source for s in getattr(result, "sources", [])],
-            "llm_provider": getattr(result, "provider", ""),
-            "llm_model": getattr(result, "model", ""),
-            "output_tokens": getattr(result, "usage", None) and result.usage.output_tokens,
-            "output_validation": output_verdict_data or {},
-            "grounded_refusal": not bool(getattr(result, "sources", [])),
-            "response_latency_ms": getattr(result, "latency_ms", 0),
-        })
     audit_logger.log(record)

         sec_config = getattr(request.app.state.config, "security", None)
         action = sec_config.injection.action if sec_config else "block"
         if not verdict.safe and action == "block":
+            _write_audit(
+                request, body, request_id, injection_verdict_data,
+                endpoint="/ask/stream", blocked=True,
+            )
             from fastapi.responses import JSONResponse
             return JSONResponse(
                 status_code=403,
     start = time.perf_counter()
+    output_validator = getattr(request.app.state, "output_validator", None)
     async def event_generator():
         full_answer: list[str] = []
         cost_usd = 0.0
+        all_sources: list[str] = []
         async for event in orchestrator.run_stream(
             question=body.question,
             system_prompt=system_prompt,
             strategy=body.retrieval_strategy,
             history=history,
         ):
+            if event.type == "sources" and event.sources:
+                all_sources = [s.get("source", "") for s in event.sources]
             if event.type == "chunk" and event.content:
                 full_answer.append(event.content)
             if event.type == "done" and event.metadata:
                 cost_usd = event.metadata.get("estimated_cost_usd", 0.0)
             yield event.to_sse()
+        # --- Security: output validation (post-generation) ---
+        answer_text = "".join(full_answer)
+        output_verdict_data: dict = {"passed": True, "violations": []}
+        if output_validator:
+            from agent_bench.serving.schemas import StreamEvent as SE
+            out_verdict = output_validator.validate(
+                output=answer_text,
+                retrieved_chunks=[],  # chunks already redacted by SearchTool
+            )
+            output_verdict_data = {
+                "passed": out_verdict.passed,
+                "violations": out_verdict.violations,
+            }
+            if not out_verdict.passed and out_verdict.action == "block":
+                yield SE(
+                    type="chunk",
+                    content="\n\n[Output filtered for safety]",
+                ).to_sse()
         # Record metrics and persist session after streaming completes
         latency_ms = (time.perf_counter() - start) * 1000
         metrics.record(latency_ms=latency_ms, cost_usd=cost_usd)
         if body.session_id and conversation_store:
             conversation_store.append(body.session_id, "user", body.question)
+            conversation_store.append(body.session_id, "assistant", answer_text)
         # --- Security: audit log for streaming ---
+        _write_audit(
+            request, body, request_id, injection_verdict_data,
+            endpoint="/ask/stream",
+            output_verdict_data=output_verdict_data,
+        )
     return StreamingResponse(
         event_generator(),
     body: AskRequest,
     request_id: str,
     injection_verdict: dict,
+    endpoint: str = "/ask",
     blocked: bool = False,
     result: object | None = None,
     output_verdict_data: dict | None = None,
         "request_id": request_id,
         "session_id": body.session_id,
         "client_ip": audit_logger.hash_ip(client_ip),
+        "endpoint": endpoint,
         "input_query": body.question,
         "injection_verdict": injection_verdict,
     }
     if blocked:
         record["blocked"] = True
+    else:
+        if result is not None:
+            record.update({
+                "retrieved_chunks": [s.source for s in getattr(result, "sources", [])],
+                "llm_provider": getattr(result, "provider", ""),
+                "llm_model": getattr(result, "model", ""),
+                "output_tokens": getattr(result, "usage", None) and result.usage.output_tokens,
+                "grounded_refusal": not bool(getattr(result, "sources", [])),
+                "response_latency_ms": getattr(result, "latency_ms", 0),
+            })
+        if output_verdict_data is not None:
+            record["output_validation"] = output_verdict_data
     audit_logger.log(record)

tests/test_security_integration.py CHANGED Viewed

@@ -143,7 +143,7 @@ class TestStreamInjectionBlocking:
         assert resp.status_code == 200
     @pytest.mark.asyncio
-    async def test_stream_audit_written(self, tmp_path):
         app = _make_security_app(tmp_path)
         audit_path = tmp_path / "audit.jsonl"
         transport = ASGITransport(app=app)
@@ -157,6 +157,33 @@ class TestStreamInjectionBlocking:
         record = json.loads(audit_path.read_text().strip().split("\n")[0])
         assert "request_id" in record
         assert "injection_verdict" in record
 class TestAuditLogging:

         assert resp.status_code == 200
     @pytest.mark.asyncio
+    async def test_stream_audit_written_with_correct_endpoint(self, tmp_path):
         app = _make_security_app(tmp_path)
         audit_path = tmp_path / "audit.jsonl"
         transport = ASGITransport(app=app)
         record = json.loads(audit_path.read_text().strip().split("\n")[0])
         assert "request_id" in record
         assert "injection_verdict" in record
+        assert record["endpoint"] == "/ask/stream"
+        assert "output_validation" in record
+    @pytest.mark.asyncio
+    async def test_stream_output_validation_runs(self, tmp_path):
+        """Output containing PII should trigger output validation on stream."""
+        from unittest.mock import AsyncMock, patch
+        from agent_bench.core.types import TokenUsage
+        from agent_bench.serving.schemas import StreamEvent
+        app = _make_security_app(tmp_path)
+        # Mock the orchestrator to return PII in the streamed answer
+        async def fake_run_stream(**kwargs):
+            yield StreamEvent(type="sources", sources=[])
+            yield StreamEvent(type="chunk", content="Contact john@example.com for help.")
+            yield StreamEvent(type="done", metadata={"estimated_cost_usd": 0.0})
+        app.state.orchestrator.run_stream = fake_run_stream
+        transport = ASGITransport(app=app)
+        async with AsyncClient(transport=transport, base_url="http://test") as client:
+            resp = await client.post("/ask/stream", json={
+                "question": "How do I contact support?",
+            })
+        # The response should contain the safety filter message
+        assert "[Output filtered for safety]" in resp.text
 class TestAuditLogging: