import re from typing import List, Dict, Tuple def tokenize(text: str) -> List[str]: """Tokenize a string into lowercase words >3 chars.""" return [w.lower() for w in re.findall(r"\w+", text) if len(w) > 3] def compute_quality_scores( question: str, answer: str, sources: List[Dict], ) -> Tuple[float, float]: """Compute retrieval quality metrics (coverage & grounding). Args: question: User's question text. answer: Model-generated answer text. sources: Retrieved documents/chunks, each with a 'content' field. Returns: (coverage, grounding) as floats in [0.0, 1.0]. """ all_chunk_text = " ".join(s.get("content", "") for s in sources) q_tokens = tokenize(question) a_tokens = tokenize(answer) c_tokens = set(tokenize(all_chunk_text)) if not c_tokens: return 0.0, 0.0 coverage = ( sum(1 for t in q_tokens if t in c_tokens) / len(q_tokens) if q_tokens else 0.0 ) grounding = ( sum(1 for t in a_tokens if t in c_tokens) / len(a_tokens) if a_tokens else 0.0 ) return coverage, grounding