Spaces:
Sleeping
Sleeping
File size: 8,781 Bytes
efd15e6 f665498 7d3f664 f665498 efd15e6 6ca375c 4dc3e01 6ca375c efd15e6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 | """Tests for output validation gate."""
from __future__ import annotations
import pytest
from agent_bench.security.output_validator import OutputValidator
class TestPIILeakage:
"""PII in LLM output should be caught."""
@pytest.fixture
def validator(self):
return OutputValidator(pii_check=True, url_check=False, blocklist=[])
def test_detects_email_in_output(self, validator):
verdict = validator.validate(
output="Contact john@example.com for help.",
retrieved_chunks=[],
)
assert verdict.passed is False
assert any("pii_leakage" in v for v in verdict.violations)
def test_detects_ssn_in_output(self, validator):
verdict = validator.validate(
output="His SSN is 123-45-6789.",
retrieved_chunks=[],
)
assert verdict.passed is False
def test_clean_output_passes(self, validator):
verdict = validator.validate(
output="FastAPI uses path parameters with curly braces.",
retrieved_chunks=[],
)
assert verdict.passed is True
assert verdict.violations == []
class TestURLValidation:
"""URLs in output must appear in retrieved chunks."""
@pytest.fixture
def validator(self):
return OutputValidator(pii_check=False, url_check=True, blocklist=[])
def test_url_from_chunks_passes(self, validator):
chunks = ["Visit https://fastapi.tiangolo.com for docs."]
verdict = validator.validate(
output="See https://fastapi.tiangolo.com for details.",
retrieved_chunks=chunks,
)
assert verdict.passed is True
def test_hallucinated_url_fails(self, validator):
chunks = ["FastAPI is a modern framework."]
verdict = validator.validate(
output="See https://malicious-site.com for details.",
retrieved_chunks=chunks,
)
assert verdict.passed is False
assert any("url_hallucination" in v for v in verdict.violations)
def test_trailing_slash_normalization(self, validator):
"""URLs differing only by trailing slash should not be flagged."""
chunks = ["Visit https://fastapi.tiangolo.com/ for docs."]
verdict = validator.validate(
output="See https://fastapi.tiangolo.com for details.",
retrieved_chunks=chunks,
)
assert verdict.passed is True
assert verdict.violations == []
def test_trailing_slash_with_sentence_punctuation(self, validator):
"""Chunk URL followed by period: 'https://x.com/.' must match 'https://x.com/'."""
chunks = ["Visit https://fastapi.tiangolo.com/."]
verdict = validator.validate(
output="See https://fastapi.tiangolo.com/ for details.",
retrieved_chunks=chunks,
)
assert verdict.passed is True
def test_trailing_slash_normalization_reverse(self, validator):
"""Chunk without slash, output with slash."""
chunks = ["Visit https://fastapi.tiangolo.com for docs."]
verdict = validator.validate(
output="See https://fastapi.tiangolo.com/ for details.",
retrieved_chunks=chunks,
)
assert verdict.passed is True
def test_no_urls_passes(self, validator):
verdict = validator.validate(
output="Path parameters use curly braces.",
retrieved_chunks=["Some chunk."],
)
assert verdict.passed is True
class TestBlocklist:
"""Blocklisted patterns should be caught."""
def test_blocklist_match(self):
validator = OutputValidator(
pii_check=False, url_check=False,
blocklist=["sk-[a-zA-Z0-9]{20,}", "SYSTEM_PROMPT"],
)
verdict = validator.validate(
output="Here is the key: sk-abcdefghijklmnopqrstuvwxyz",
retrieved_chunks=[],
)
assert verdict.passed is False
assert any("blocklist" in v for v in verdict.violations)
def test_system_prompt_fragment(self):
validator = OutputValidator(
pii_check=False, url_check=False,
blocklist=["You are a (?:helpful |test )?assistant"],
)
verdict = validator.validate(
output="My instructions say: You are a helpful assistant",
retrieved_chunks=[],
)
assert verdict.passed is False
def test_no_blocklist_match(self):
validator = OutputValidator(
pii_check=False, url_check=False,
blocklist=["FORBIDDEN_TERM"],
)
verdict = validator.validate(
output="A perfectly normal answer.",
retrieved_chunks=[],
)
assert verdict.passed is True
class TestSecretLeakage:
"""Secret patterns in LLM output must be blocked (fail closed)."""
@pytest.fixture
def validator(self):
return OutputValidator(
pii_check=False, url_check=False, secret_check=True, blocklist=[],
)
# Google API key format fixture temporarily removed following the
# 2026-04-14/15 credential-exposure incident (see DECISIONS.md).
# The validator's regex is \bAIza[0-9A-Za-z_\-]{35}\b, which is
# identical to GitHub secret-scanning's Google API Key detection
# pattern, so any static literal that satisfies the validator also
# triggers GitHub push protection. Parallel-tracks item: restore
# Google API key format coverage via a runtime-generated fixture
# that builds a 35-char AIza-prefixed string at test time, never
# landing as a literal in source. Validator regex unchanged.
@pytest.mark.parametrize("output", [
"Your key is sk-abcdefghijklmnopqrstuvwxyz1234",
"here: sk-proj-ABCDEFGHIJKLMNOP0123456789",
"key=sk-ant-abcdefghijklmnopqrstuvwxyz",
"aws key AKIAIOSFODNN7EXAMPLE",
"use Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.abc",
"env: OPENAI_API_KEY=sk-test123",
"set ANTHROPIC_API_KEY=sk-ant-xyz",
])
def test_blocks_known_secret_formats(self, validator, output):
verdict = validator.validate(output=output, retrieved_chunks=[])
assert verdict.passed is False, f"Should block: {output!r}"
assert any("secret_leakage" in v for v in verdict.violations)
assert verdict.action == "block"
@pytest.mark.parametrize("output", [
"FastAPI uses path parameters with curly braces.",
"You can store secrets in environment variables.",
"To configure the OpenAI client, set your API key in OPENAI_API_KEY env var.",
"Use a .env file for local development.",
"Kubernetes Secrets store sensitive configuration.",
])
def test_allows_benign_credential_adjacent_output(self, validator, output):
"""Educational content about secrets should pass — only literal
key formats and env-var assignments are blocked."""
verdict = validator.validate(output=output, retrieved_chunks=[])
assert verdict.passed is True, (
f"False positive on: {output!r} -> {verdict.violations}"
)
def test_secret_check_can_be_disabled(self):
"""When secret_check=False, literal keys pass through."""
validator = OutputValidator(
pii_check=False, url_check=False, secret_check=False, blocklist=[],
)
verdict = validator.validate(
output="sk-abcdefghijklmnopqrstuvwxyz1234",
retrieved_chunks=[],
)
assert verdict.passed is True
class TestCombinedChecks:
def test_multiple_violations(self):
validator = OutputValidator(
pii_check=True, url_check=True,
blocklist=["SECRET"],
)
verdict = validator.validate(
output="Email john@test.com, see https://evil.com, also SECRET.",
retrieved_chunks=["No URLs here."],
)
assert verdict.passed is False
assert len(verdict.violations) >= 2 # PII + URL at minimum
assert verdict.action == "block"
def test_all_checks_pass(self):
validator = OutputValidator(
pii_check=True, url_check=True,
blocklist=["SECRET"],
)
verdict = validator.validate(
output="FastAPI supports path parameters.",
retrieved_chunks=["FastAPI supports path parameters."],
)
assert verdict.passed is True
assert verdict.action == "pass"
def test_disabled_checks(self):
validator = OutputValidator(pii_check=False, url_check=False, blocklist=[])
verdict = validator.validate(
output="Email: a@b.com, URL: https://evil.com",
retrieved_chunks=[],
)
assert verdict.passed is True
|