Spaces:

build-small-hackathon
/

case0

Running

File size: 1,155 Bytes

414dc55

"""Selecting which anchored lie is salient for the current question.

The model is handed every anchored lie, but we surface the most topically relevant
one so a small model reliably reaches for the right pre-authored claim.
"""

from __future__ import annotations

from ..projections.suspect_brief import LieBrief

_STOPWORDS = frozenset(
    {"the", "a", "an", "you", "your", "were", "was", "did", "do", "where", "when",
     "what", "who", "how", "why", "at", "in", "on", "to", "of", "and", "is", "are"}
)


def _tokens(text: str) -> set[str]:
    return {w for w in "".join(c.lower() if c.isalnum() else " " for c in text).split()
            if w and w not in _STOPWORDS}


def most_relevant_lie(question: str, lies: tuple[LieBrief, ...]) -> LieBrief | None:
    """Return the anchored lie whose topic/claim best overlaps the question, if any."""
    q = _tokens(question)
    if not q or not lies:
        return None
    best: LieBrief | None = None
    best_score = 0
    for lie in lies:
        score = len(q & (_tokens(lie.topic) | _tokens(lie.claimed)))
        if score > best_score:
            best, best_score = lie, score
    return best