mbochniak01 Claude Sonnet 4.6 commited on
Commit ·
7ee90da
1
Parent(s): e6d6240
Fix refusal detector missing 'not able to' and 'outside scope' phrases
Browse filesResponses like "I'm not able to answer ... falls outside the knowledge base"
were scoring faithfulness 0/1 instead of auto-passing. Added two new branches
to _REFUSAL_PATTERNS to cover these phrasings.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- backend/grader.py +3 -1
- tests/unit/test_grader.py +7 -0
backend/grader.py
CHANGED
|
@@ -90,7 +90,9 @@ RELEVANCY_THRESHOLD = 0.45
|
|
| 90 |
FAITHFULNESS_THRESHOLD = 0.35
|
| 91 |
|
| 92 |
_REFUSAL_PATTERNS = re.compile(
|
| 93 |
-
r"(i (don't|do not|cannot|can't) (have|find|provide|answer)|"
|
|
|
|
|
|
|
| 94 |
r"not enough (information|context)|"
|
| 95 |
r"the (context|provided) (does not|doesn't) (contain|include|mention))",
|
| 96 |
re.IGNORECASE,
|
|
|
|
| 90 |
FAITHFULNESS_THRESHOLD = 0.35
|
| 91 |
|
| 92 |
_REFUSAL_PATTERNS = re.compile(
|
| 93 |
+
r"(i (don't|do not|cannot|can't|'m not able to) (have|find|provide|answer)|"
|
| 94 |
+
r"not able to (answer|provide|help)|"
|
| 95 |
+
r"(falls?|is) outside (of )?(the )?(scope|knowledge base)|"
|
| 96 |
r"not enough (information|context)|"
|
| 97 |
r"the (context|provided) (does not|doesn't) (contain|include|mention))",
|
| 98 |
re.IGNORECASE,
|
tests/unit/test_grader.py
CHANGED
|
@@ -230,6 +230,13 @@ class TestGradeFaithfulnessDecomposed:
|
|
| 230 |
assert result.passed is True
|
| 231 |
assert result.score == 1.0
|
| 232 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
def test_empty_context_fails(self) -> None:
|
| 234 |
with patch("grader.get_nli_model"):
|
| 235 |
result = grade_faithfulness_decomposed("The product costs five dollars.", "")
|
|
|
|
| 230 |
assert result.passed is True
|
| 231 |
assert result.score == 1.0
|
| 232 |
|
| 233 |
+
def test_refusal_not_able_to_auto_passes(self) -> None:
|
| 234 |
+
result = grade_faithfulness_decomposed(
|
| 235 |
+
"I'm not able to answer that as it falls outside the knowledge base.", CONTEXT
|
| 236 |
+
)
|
| 237 |
+
assert result.passed is True
|
| 238 |
+
assert result.score == 1.0
|
| 239 |
+
|
| 240 |
def test_empty_context_fails(self) -> None:
|
| 241 |
with patch("grader.get_nli_model"):
|
| 242 |
result = grade_faithfulness_decomposed("The product costs five dollars.", "")
|