""" backend/app/core/quality.py Shared quality-gate logic used by both the generate node (Groq responses) and the gemini_fast node (Gemini fast-path responses). Centralised here — rather than in generate.py — so the same hedge-detection and trust scoring logic runs on every answer regardless of which pipeline branch produced it. Duplicating the list of hedge phrases across two modules was the root cause of Bug A (Issue 2): Gemini fast-path answers were never checked for hedge phrases. """ from __future__ import annotations import re # Phrases that indicate the model hedged despite having been told not to. # Applies to both Groq (generate node) and Gemini (gemini_fast node) outputs. _HEDGE_PHRASES: tuple[str, ...] = ( "unfortunately", "limited information", "passages only", "passages do not", "passages don't", "you may need to", "you may want to", "i don't have", "i cannot provide", "not able to provide", "does not provide", "does not offer", "no detailed information", "not explicitly state", "not explicitly stated", "cannot be verified", ) _RAW_TAG_RE = re.compile(r"]*>") def is_low_trust(answer: str, chunks: list, complexity: str) -> bool: """ Return True when the answer is likely poor quality and should be reformatted or rerouted to the full RAG pipeline. Three signals, checked in order of cost (cheapest first): 1. A hedge phrase survived the system-prompt prohibition. 2. Chunks were retrieved but the model cited nothing (no [N] markers). Not applicable to Gemini fast-path answers (chunks is always empty there). 3. Answer is suspiciously short for a complex query (< 30 words). """ lowered = answer.lower() if any(phrase in lowered for phrase in _HEDGE_PHRASES): return True if _RAW_TAG_RE.search(answer): return True if chunks and not re.search(r"\[\d+\]", answer): return True if complexity == "complex" and len(answer.split()) < 30: return True return False