cmd0160's picture
Updating package file structure
18ef2cd
import re
from typing import List, Dict, Tuple
def tokenize(text: str) -> List[str]:
"""Tokenize a string into lowercase words >3 chars."""
return [w.lower() for w in re.findall(r"\w+", text) if len(w) > 3]
def compute_quality_scores(
question: str,
answer: str,
sources: List[Dict],
) -> Tuple[float, float]:
"""Compute retrieval quality metrics (coverage & grounding).
Args:
question: User's question text.
answer: Model-generated answer text.
sources: Retrieved documents/chunks, each with a 'content' field.
Returns:
(coverage, grounding) as floats in [0.0, 1.0].
"""
all_chunk_text = " ".join(s.get("content", "") for s in sources)
q_tokens = tokenize(question)
a_tokens = tokenize(answer)
c_tokens = set(tokenize(all_chunk_text))
if not c_tokens:
return 0.0, 0.0
coverage = (
sum(1 for t in q_tokens if t in c_tokens) / len(q_tokens)
if q_tokens
else 0.0
)
grounding = (
sum(1 for t in a_tokens if t in c_tokens) / len(a_tokens)
if a_tokens
else 0.0
)
return coverage, grounding