below-threshold commited on
Commit
7b3dadd
·
1 Parent(s): 6e6032f

Fix faithfulness: score per chunk, take max entailment

Browse files
Files changed (1) hide show
  1. backend/grader.py +8 -3
backend/grader.py CHANGED
@@ -124,11 +124,16 @@ def grade_answer_relevancy(query: str, response: str) -> GradeResult:
124
 
125
 
126
  def grade_faithfulness(response: str, context: str) -> GradeResult:
127
- """Score NLI entailment: does context entail the response?"""
128
  model = get_nli_model()
 
 
 
 
129
  # NLI label order for cross-encoder/nli-deberta-v3-small: contradiction, entailment, neutral
130
- scores = model.predict([(context, response)], apply_softmax=True)[0]
131
- entailment_score = float(scores[1])
 
132
  passed = entailment_score >= FAITHFULNESS_THRESHOLD
133
  return GradeResult(
134
  metric="faithfulness",
 
124
 
125
 
126
  def grade_faithfulness(response: str, context: str) -> GradeResult:
127
+ """Score NLI entailment per context chunk; pass if any chunk entails the response."""
128
  model = get_nli_model()
129
+ # Split on the double-newline separator used by _build_context in pipeline.py
130
+ chunks = [c.strip() for c in context.split("\n\n") if c.strip()]
131
+ if not chunks:
132
+ return GradeResult(metric="faithfulness", passed=False, score=0.0, detail="No context")
133
  # NLI label order for cross-encoder/nli-deberta-v3-small: contradiction, entailment, neutral
134
+ pairs = [(chunk, response) for chunk in chunks]
135
+ all_scores = model.predict(pairs, apply_softmax=True)
136
+ entailment_score = float(max(s[1] for s in all_scores))
137
  passed = entailment_score >= FAITHFULNESS_THRESHOLD
138
  return GradeResult(
139
  metric="faithfulness",