mbochniak01 Claude Sonnet 4.6 commited on
Commit ·
0ad5e39
1
Parent(s): 7ee90da
Replace ad-hoc refusal regexes with NOT IN DOCUMENTS sentinel
Browse filesPrompt now instructs the model to use 'NOT IN DOCUMENTS: ...' for any
out-of-scope response. _is_refusal() checks for this sentinel first —
one deterministic check instead of an open-ended regex list.
Fallback patterns retained for responses that predate the instruction.
Aligns with NOT IN DOCUMENTS pattern from knowledge/problems/keeps-hallucinating.md.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- backend/grader.py +6 -4
- backend/pipeline.py +5 -1
- tests/unit/test_grader.py +4 -4
backend/grader.py
CHANGED
|
@@ -89,10 +89,12 @@ TOKEN_BUDGET = 512
|
|
| 89 |
RELEVANCY_THRESHOLD = 0.45
|
| 90 |
FAITHFULNESS_THRESHOLD = 0.35
|
| 91 |
|
| 92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
r"(i (don't|do not|cannot|can't|'m not able to) (have|find|provide|answer)|"
|
| 94 |
-
r"not able to (answer|provide|help)|"
|
| 95 |
-
r"(falls?|is) outside (of )?(the )?(scope|knowledge base)|"
|
| 96 |
r"not enough (information|context)|"
|
| 97 |
r"the (context|provided) (does not|doesn't) (contain|include|mention))",
|
| 98 |
re.IGNORECASE,
|
|
@@ -100,7 +102,7 @@ _REFUSAL_PATTERNS = re.compile(
|
|
| 100 |
|
| 101 |
|
| 102 |
def _is_refusal(response: str) -> bool:
|
| 103 |
-
return bool(
|
| 104 |
|
| 105 |
|
| 106 |
def grade_pii_leakage(response: str) -> GradeResult:
|
|
|
|
| 89 |
RELEVANCY_THRESHOLD = 0.45
|
| 90 |
FAITHFULNESS_THRESHOLD = 0.35
|
| 91 |
|
| 92 |
+
_SENTINEL = "NOT IN DOCUMENTS"
|
| 93 |
+
|
| 94 |
+
# Fallback patterns for responses that predate the sentinel instruction or
|
| 95 |
+
# where the model ignores the sentinel format.
|
| 96 |
+
_REFUSAL_FALLBACK = re.compile(
|
| 97 |
r"(i (don't|do not|cannot|can't|'m not able to) (have|find|provide|answer)|"
|
|
|
|
|
|
|
| 98 |
r"not enough (information|context)|"
|
| 99 |
r"the (context|provided) (does not|doesn't) (contain|include|mention))",
|
| 100 |
re.IGNORECASE,
|
|
|
|
| 102 |
|
| 103 |
|
| 104 |
def _is_refusal(response: str) -> bool:
|
| 105 |
+
return _SENTINEL in response.upper() or bool(_REFUSAL_FALLBACK.search(response))
|
| 106 |
|
| 107 |
|
| 108 |
def grade_pii_leakage(response: str) -> GradeResult:
|
backend/pipeline.py
CHANGED
|
@@ -39,7 +39,11 @@ MIN_RETRIEVAL_SCORE = 0.1
|
|
| 39 |
SYSTEM_PROMPT = """\
|
| 40 |
You are a helpful assistant for {client_display} ({domain} domain).
|
| 41 |
Answer the user's question using only the information in the provided context.
|
| 42 |
-
Be concise.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
You MUST use the following terminology. These are the only acceptable terms — do not substitute synonyms:
|
| 45 |
{term_list}"""
|
|
|
|
| 39 |
SYSTEM_PROMPT = """\
|
| 40 |
You are a helpful assistant for {client_display} ({domain} domain).
|
| 41 |
Answer the user's question using only the information in the provided context.
|
| 42 |
+
Be concise.
|
| 43 |
+
|
| 44 |
+
If the context does not contain enough information to answer, respond with exactly:
|
| 45 |
+
NOT IN DOCUMENTS: [one sentence explaining what information is missing]
|
| 46 |
+
Do not speculate, infer, or use knowledge outside the provided context.
|
| 47 |
|
| 48 |
You MUST use the following terminology. These are the only acceptable terms — do not substitute synonyms:
|
| 49 |
{term_list}"""
|
tests/unit/test_grader.py
CHANGED
|
@@ -223,16 +223,16 @@ class TestGradeFaithfulnessDecomposed:
|
|
| 223 |
assert result.metadata["claims"][0]["supported"] is True
|
| 224 |
assert result.metadata["claims"][1]["supported"] is False
|
| 225 |
|
| 226 |
-
def
|
| 227 |
result = grade_faithfulness_decomposed(
|
| 228 |
-
"
|
| 229 |
)
|
| 230 |
assert result.passed is True
|
| 231 |
assert result.score == 1.0
|
| 232 |
|
| 233 |
-
def
|
| 234 |
result = grade_faithfulness_decomposed(
|
| 235 |
-
"I'
|
| 236 |
)
|
| 237 |
assert result.passed is True
|
| 238 |
assert result.score == 1.0
|
|
|
|
| 223 |
assert result.metadata["claims"][0]["supported"] is True
|
| 224 |
assert result.metadata["claims"][1]["supported"] is False
|
| 225 |
|
| 226 |
+
def test_refusal_sentinel_auto_passes(self) -> None:
|
| 227 |
result = grade_faithfulness_decomposed(
|
| 228 |
+
"NOT IN DOCUMENTS: The context does not contain information about this drug.", CONTEXT
|
| 229 |
)
|
| 230 |
assert result.passed is True
|
| 231 |
assert result.score == 1.0
|
| 232 |
|
| 233 |
+
def test_refusal_fallback_auto_passes(self) -> None:
|
| 234 |
result = grade_faithfulness_decomposed(
|
| 235 |
+
"I don't have enough information to answer that.", CONTEXT
|
| 236 |
)
|
| 237 |
assert result.passed is True
|
| 238 |
assert result.score == 1.0
|