mbochniak01 Claude Sonnet 4.6 commited on
Commit ·
29f3273
1
Parent(s): c79d967
Switch faithfulness to text_pair encoding, promote score logging to INFO
Browse filestext_pair passes both sequences to T5Tokenizer separately — it inserts
the </s> separator between them, matching T5's pre-training format.
Concatenated string skipped that separator, likely causing the model to
receive malformed input and score faithful responses at ~0.14.
INFO log shows (label, score) per chunk — visible in HF Spaces logs
for threshold calibration without a debug flag.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- backend/grader.py +4 -5
backend/grader.py
CHANGED
|
@@ -163,11 +163,10 @@ def grade_faithfulness(response: str, context: str) -> GradeResult:
|
|
| 163 |
if not raw_chunks:
|
| 164 |
return GradeResult(metric="faithfulness", passed=False, score=0.0, detail="No context")
|
| 165 |
chunks = [_strip_chunk_title(c) for c in raw_chunks]
|
| 166 |
-
#
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
log.debug("Vectara raw results: %s", results)
|
| 171 |
scores = [
|
| 172 |
r["score"] if r["label"].lower().startswith("factually consistent") else 1.0 - r["score"]
|
| 173 |
for r in results
|
|
|
|
| 163 |
if not raw_chunks:
|
| 164 |
return GradeResult(metric="faithfulness", passed=False, score=0.0, detail="No context")
|
| 165 |
chunks = [_strip_chunk_title(c) for c in raw_chunks]
|
| 166 |
+
# text_pair encodes sequences with T5 </s> separator — correct for T5-based models.
|
| 167 |
+
pairs = [{"text": chunk, "text_pair": response} for chunk in chunks]
|
| 168 |
+
results = model(pairs)
|
| 169 |
+
log.info("Vectara raw: %s", [(r["label"], round(r["score"], 3)) for r in results])
|
|
|
|
| 170 |
scores = [
|
| 171 |
r["score"] if r["label"].lower().startswith("factually consistent") else 1.0 - r["score"]
|
| 172 |
for r in results
|