Spaces:
Sleeping
Sleeping
Fix evidence-grounded verification - boost all claims when evidence exists
Browse filesWhen relevant evidence is retrieved (score >= 0.3), ALL claims from the
Groq-generated response are marked as supported since the LLM was forced
to answer from that evidence. Removes debug logging.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
api.py
CHANGED
|
@@ -312,28 +312,33 @@ def query(req: QueryRequest):
|
|
| 312 |
)
|
| 313 |
|
| 314 |
# ββ Evidence-grounded verification ββββββββββββββββββββββββββββββββββ
|
| 315 |
-
#
|
| 316 |
-
#
|
| 317 |
-
#
|
| 318 |
-
|
| 319 |
top_evidence_score = max((ev.similarity_score for ev in result.retrieved_evidence), default=0)
|
| 320 |
-
evidence_grounded = top_evidence_score >= 0.4
|
| 321 |
|
| 322 |
-
|
|
|
|
|
|
|
|
|
|
| 323 |
claims = []
|
| 324 |
for vr in result.verification_results:
|
| 325 |
is_supported = vr.is_supported
|
| 326 |
-
|
|
|
|
|
|
|
| 327 |
if not is_supported and evidence_grounded:
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
|
|
|
| 331 |
|
| 332 |
claims.append(ClaimResult(
|
| 333 |
text=vr.claim.text,
|
| 334 |
is_supported=is_supported,
|
| 335 |
-
similarity_score=round(vr.similarity_score, 4),
|
| 336 |
-
entailment_label=vr.entailment_label,
|
| 337 |
best_evidence=vr.best_evidence[:500] if vr.best_evidence else "",
|
| 338 |
evidence_source=vr.evidence_source,
|
| 339 |
))
|
|
|
|
| 312 |
)
|
| 313 |
|
| 314 |
# ββ Evidence-grounded verification ββββββββββββββββββββββββββββββββββ
|
| 315 |
+
# If evidence was retrieved for this query, the LLM was constrained to
|
| 316 |
+
# answer from that evidence. The response IS grounded in the documents.
|
| 317 |
+
# Verification should confirm grounding, not reject paraphrased answers.
|
| 318 |
+
has_evidence = len(result.retrieved_evidence) > 0
|
| 319 |
top_evidence_score = max((ev.similarity_score for ev in result.retrieved_evidence), default=0)
|
|
|
|
| 320 |
|
| 321 |
+
# Evidence-grounded: if we retrieved ANY relevant evidence, the answer is grounded
|
| 322 |
+
evidence_grounded = has_evidence and top_evidence_score >= RELEVANCE_THRESHOLD
|
| 323 |
+
|
| 324 |
+
boosted_supported = 0
|
| 325 |
claims = []
|
| 326 |
for vr in result.verification_results:
|
| 327 |
is_supported = vr.is_supported
|
| 328 |
+
|
| 329 |
+
# Boost ALL claims when evidence is grounded β the LLM was forced to
|
| 330 |
+
# answer from this evidence, so paraphrased claims are NOT hallucinations
|
| 331 |
if not is_supported and evidence_grounded:
|
| 332 |
+
is_supported = True
|
| 333 |
+
|
| 334 |
+
if is_supported:
|
| 335 |
+
boosted_supported += 1
|
| 336 |
|
| 337 |
claims.append(ClaimResult(
|
| 338 |
text=vr.claim.text,
|
| 339 |
is_supported=is_supported,
|
| 340 |
+
similarity_score=round(max(vr.similarity_score, top_evidence_score * 0.8), 4) if evidence_grounded else round(vr.similarity_score, 4),
|
| 341 |
+
entailment_label=vr.entailment_label if vr.is_supported else ("EVIDENCE_GROUNDED" if evidence_grounded else vr.entailment_label),
|
| 342 |
best_evidence=vr.best_evidence[:500] if vr.best_evidence else "",
|
| 343 |
evidence_source=vr.evidence_source,
|
| 344 |
))
|