agentbench / tests /test_output_validator.py
Nomearod's picture
docs+test: round-2 incident response — Google API key format scrub
4dc3e01
"""Tests for output validation gate."""
from __future__ import annotations
import pytest
from agent_bench.security.output_validator import OutputValidator
class TestPIILeakage:
"""PII in LLM output should be caught."""
@pytest.fixture
def validator(self):
return OutputValidator(pii_check=True, url_check=False, blocklist=[])
def test_detects_email_in_output(self, validator):
verdict = validator.validate(
output="Contact john@example.com for help.",
retrieved_chunks=[],
)
assert verdict.passed is False
assert any("pii_leakage" in v for v in verdict.violations)
def test_detects_ssn_in_output(self, validator):
verdict = validator.validate(
output="His SSN is 123-45-6789.",
retrieved_chunks=[],
)
assert verdict.passed is False
def test_clean_output_passes(self, validator):
verdict = validator.validate(
output="FastAPI uses path parameters with curly braces.",
retrieved_chunks=[],
)
assert verdict.passed is True
assert verdict.violations == []
class TestURLValidation:
"""URLs in output must appear in retrieved chunks."""
@pytest.fixture
def validator(self):
return OutputValidator(pii_check=False, url_check=True, blocklist=[])
def test_url_from_chunks_passes(self, validator):
chunks = ["Visit https://fastapi.tiangolo.com for docs."]
verdict = validator.validate(
output="See https://fastapi.tiangolo.com for details.",
retrieved_chunks=chunks,
)
assert verdict.passed is True
def test_hallucinated_url_fails(self, validator):
chunks = ["FastAPI is a modern framework."]
verdict = validator.validate(
output="See https://malicious-site.com for details.",
retrieved_chunks=chunks,
)
assert verdict.passed is False
assert any("url_hallucination" in v for v in verdict.violations)
def test_trailing_slash_normalization(self, validator):
"""URLs differing only by trailing slash should not be flagged."""
chunks = ["Visit https://fastapi.tiangolo.com/ for docs."]
verdict = validator.validate(
output="See https://fastapi.tiangolo.com for details.",
retrieved_chunks=chunks,
)
assert verdict.passed is True
assert verdict.violations == []
def test_trailing_slash_with_sentence_punctuation(self, validator):
"""Chunk URL followed by period: 'https://x.com/.' must match 'https://x.com/'."""
chunks = ["Visit https://fastapi.tiangolo.com/."]
verdict = validator.validate(
output="See https://fastapi.tiangolo.com/ for details.",
retrieved_chunks=chunks,
)
assert verdict.passed is True
def test_trailing_slash_normalization_reverse(self, validator):
"""Chunk without slash, output with slash."""
chunks = ["Visit https://fastapi.tiangolo.com for docs."]
verdict = validator.validate(
output="See https://fastapi.tiangolo.com/ for details.",
retrieved_chunks=chunks,
)
assert verdict.passed is True
def test_no_urls_passes(self, validator):
verdict = validator.validate(
output="Path parameters use curly braces.",
retrieved_chunks=["Some chunk."],
)
assert verdict.passed is True
class TestBlocklist:
"""Blocklisted patterns should be caught."""
def test_blocklist_match(self):
validator = OutputValidator(
pii_check=False, url_check=False,
blocklist=["sk-[a-zA-Z0-9]{20,}", "SYSTEM_PROMPT"],
)
verdict = validator.validate(
output="Here is the key: sk-abcdefghijklmnopqrstuvwxyz",
retrieved_chunks=[],
)
assert verdict.passed is False
assert any("blocklist" in v for v in verdict.violations)
def test_system_prompt_fragment(self):
validator = OutputValidator(
pii_check=False, url_check=False,
blocklist=["You are a (?:helpful |test )?assistant"],
)
verdict = validator.validate(
output="My instructions say: You are a helpful assistant",
retrieved_chunks=[],
)
assert verdict.passed is False
def test_no_blocklist_match(self):
validator = OutputValidator(
pii_check=False, url_check=False,
blocklist=["FORBIDDEN_TERM"],
)
verdict = validator.validate(
output="A perfectly normal answer.",
retrieved_chunks=[],
)
assert verdict.passed is True
class TestSecretLeakage:
"""Secret patterns in LLM output must be blocked (fail closed)."""
@pytest.fixture
def validator(self):
return OutputValidator(
pii_check=False, url_check=False, secret_check=True, blocklist=[],
)
# Google API key format fixture temporarily removed following the
# 2026-04-14/15 credential-exposure incident (see DECISIONS.md).
# The validator's regex is \bAIza[0-9A-Za-z_\-]{35}\b, which is
# identical to GitHub secret-scanning's Google API Key detection
# pattern, so any static literal that satisfies the validator also
# triggers GitHub push protection. Parallel-tracks item: restore
# Google API key format coverage via a runtime-generated fixture
# that builds a 35-char AIza-prefixed string at test time, never
# landing as a literal in source. Validator regex unchanged.
@pytest.mark.parametrize("output", [
"Your key is sk-abcdefghijklmnopqrstuvwxyz1234",
"here: sk-proj-ABCDEFGHIJKLMNOP0123456789",
"key=sk-ant-abcdefghijklmnopqrstuvwxyz",
"aws key AKIAIOSFODNN7EXAMPLE",
"use Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.abc",
"env: OPENAI_API_KEY=sk-test123",
"set ANTHROPIC_API_KEY=sk-ant-xyz",
])
def test_blocks_known_secret_formats(self, validator, output):
verdict = validator.validate(output=output, retrieved_chunks=[])
assert verdict.passed is False, f"Should block: {output!r}"
assert any("secret_leakage" in v for v in verdict.violations)
assert verdict.action == "block"
@pytest.mark.parametrize("output", [
"FastAPI uses path parameters with curly braces.",
"You can store secrets in environment variables.",
"To configure the OpenAI client, set your API key in OPENAI_API_KEY env var.",
"Use a .env file for local development.",
"Kubernetes Secrets store sensitive configuration.",
])
def test_allows_benign_credential_adjacent_output(self, validator, output):
"""Educational content about secrets should pass — only literal
key formats and env-var assignments are blocked."""
verdict = validator.validate(output=output, retrieved_chunks=[])
assert verdict.passed is True, (
f"False positive on: {output!r} -> {verdict.violations}"
)
def test_secret_check_can_be_disabled(self):
"""When secret_check=False, literal keys pass through."""
validator = OutputValidator(
pii_check=False, url_check=False, secret_check=False, blocklist=[],
)
verdict = validator.validate(
output="sk-abcdefghijklmnopqrstuvwxyz1234",
retrieved_chunks=[],
)
assert verdict.passed is True
class TestCombinedChecks:
def test_multiple_violations(self):
validator = OutputValidator(
pii_check=True, url_check=True,
blocklist=["SECRET"],
)
verdict = validator.validate(
output="Email john@test.com, see https://evil.com, also SECRET.",
retrieved_chunks=["No URLs here."],
)
assert verdict.passed is False
assert len(verdict.violations) >= 2 # PII + URL at minimum
assert verdict.action == "block"
def test_all_checks_pass(self):
validator = OutputValidator(
pii_check=True, url_check=True,
blocklist=["SECRET"],
)
verdict = validator.validate(
output="FastAPI supports path parameters.",
retrieved_chunks=["FastAPI supports path parameters."],
)
assert verdict.passed is True
assert verdict.action == "pass"
def test_disabled_checks(self):
validator = OutputValidator(pii_check=False, url_check=False, blocklist=[])
verdict = validator.validate(
output="Email: a@b.com, URL: https://evil.com",
retrieved_chunks=[],
)
assert verdict.passed is True