case0 / src /case_zero /suspects /deception.py
HusseinEid's picture
Case Zero - initial public release (fully local: Qwen2.5-1.5B via llama.cpp + Supertonic, custom pixel-noir SPA via gradio.Server)
414dc55
raw
history blame
1.16 kB
"""Selecting which anchored lie is salient for the current question.
The model is handed every anchored lie, but we surface the most topically relevant
one so a small model reliably reaches for the right pre-authored claim.
"""
from __future__ import annotations
from ..projections.suspect_brief import LieBrief
_STOPWORDS = frozenset(
{"the", "a", "an", "you", "your", "were", "was", "did", "do", "where", "when",
"what", "who", "how", "why", "at", "in", "on", "to", "of", "and", "is", "are"}
)
def _tokens(text: str) -> set[str]:
return {w for w in "".join(c.lower() if c.isalnum() else " " for c in text).split()
if w and w not in _STOPWORDS}
def most_relevant_lie(question: str, lies: tuple[LieBrief, ...]) -> LieBrief | None:
"""Return the anchored lie whose topic/claim best overlaps the question, if any."""
q = _tokens(question)
if not q or not lies:
return None
best: LieBrief | None = None
best_score = 0
for lie in lies:
score = len(q & (_tokens(lie.topic) | _tokens(lie.claimed)))
if score > best_score:
best, best_score = lie, score
return best