Spaces:
Running
Running
File size: 1,155 Bytes
414dc55 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | """Selecting which anchored lie is salient for the current question.
The model is handed every anchored lie, but we surface the most topically relevant
one so a small model reliably reaches for the right pre-authored claim.
"""
from __future__ import annotations
from ..projections.suspect_brief import LieBrief
_STOPWORDS = frozenset(
{"the", "a", "an", "you", "your", "were", "was", "did", "do", "where", "when",
"what", "who", "how", "why", "at", "in", "on", "to", "of", "and", "is", "are"}
)
def _tokens(text: str) -> set[str]:
return {w for w in "".join(c.lower() if c.isalnum() else " " for c in text).split()
if w and w not in _STOPWORDS}
def most_relevant_lie(question: str, lies: tuple[LieBrief, ...]) -> LieBrief | None:
"""Return the anchored lie whose topic/claim best overlaps the question, if any."""
q = _tokens(question)
if not q or not lies:
return None
best: LieBrief | None = None
best_score = 0
for lie in lies:
score = len(q & (_tokens(lie.topic) | _tokens(lie.claimed)))
if score > best_score:
best, best_score = lie, score
return best
|