| import re | |
| from typing import List, Dict, Tuple | |
| def tokenize(text: str) -> List[str]: | |
| """Tokenize a string into lowercase words >3 chars.""" | |
| return [w.lower() for w in re.findall(r"\w+", text) if len(w) > 3] | |
| def compute_quality_scores( | |
| question: str, | |
| answer: str, | |
| sources: List[Dict], | |
| ) -> Tuple[float, float]: | |
| """Compute retrieval quality metrics (coverage & grounding). | |
| Args: | |
| question: User's question text. | |
| answer: Model-generated answer text. | |
| sources: Retrieved documents/chunks, each with a 'content' field. | |
| Returns: | |
| (coverage, grounding) as floats in [0.0, 1.0]. | |
| """ | |
| all_chunk_text = " ".join(s.get("content", "") for s in sources) | |
| q_tokens = tokenize(question) | |
| a_tokens = tokenize(answer) | |
| c_tokens = set(tokenize(all_chunk_text)) | |
| if not c_tokens: | |
| return 0.0, 0.0 | |
| coverage = ( | |
| sum(1 for t in q_tokens if t in c_tokens) / len(q_tokens) | |
| if q_tokens | |
| else 0.0 | |
| ) | |
| grounding = ( | |
| sum(1 for t in a_tokens if t in c_tokens) / len(a_tokens) | |
| if a_tokens | |
| else 0.0 | |
| ) | |
| return coverage, grounding | |