File size: 1,155 Bytes
414dc55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
"""Selecting which anchored lie is salient for the current question.

The model is handed every anchored lie, but we surface the most topically relevant
one so a small model reliably reaches for the right pre-authored claim.
"""

from __future__ import annotations

from ..projections.suspect_brief import LieBrief

_STOPWORDS = frozenset(
    {"the", "a", "an", "you", "your", "were", "was", "did", "do", "where", "when",
     "what", "who", "how", "why", "at", "in", "on", "to", "of", "and", "is", "are"}
)


def _tokens(text: str) -> set[str]:
    return {w for w in "".join(c.lower() if c.isalnum() else " " for c in text).split()
            if w and w not in _STOPWORDS}


def most_relevant_lie(question: str, lies: tuple[LieBrief, ...]) -> LieBrief | None:
    """Return the anchored lie whose topic/claim best overlaps the question, if any."""
    q = _tokens(question)
    if not q or not lies:
        return None
    best: LieBrief | None = None
    best_score = 0
    for lie in lies:
        score = len(q & (_tokens(lie.topic) | _tokens(lie.claimed)))
        if score > best_score:
            best, best_score = lie, score
    return best