f-id / src /id /engine /extractor.py
marcodsn's picture
Initial Gradio Space
0423b99
Raw
History Blame Contribute Delete
3.27 kB
"""Utterance -> structured claims (Section 7).
The extractor (cheap tier) turns a character utterance into structured
propositions. It is given engine-only ground truth so it can also stamp each
claim's ``engine_truth_value`` (true/false/unknown) — this powers confrontation
and the guard. The player never sees these values.
"""
from __future__ import annotations
from ..llm.client import LLMClient
from ..llm.prompts import PromptRegistry
from ..models import Claim
class ClaimExtractor:
def __init__(self, client: LLMClient, prompts: PromptRegistry) -> None:
self.client = client
self.prompts = prompts
def extract(
self,
*,
character: str,
utterance: str,
turn: int,
truth_context: str,
) -> list[Claim]:
prompt = self.prompts.render(
"extractor/claims.md.j2",
character=character,
utterance=utterance,
truth_context=truth_context,
)
try:
data, _ = self.client.complete_json(
tier="extractor", task="claim_extract", user=prompt,
)
except Exception:
return []
rows = data.get("claims", data) if isinstance(data, dict) else data
if not isinstance(rows, list):
return []
claims: list[Claim] = []
for i, row in enumerate(rows):
if not isinstance(row, dict):
continue
polarity = row.get("polarity", "neutral")
if polarity not in ("affirm", "deny", "neutral"):
polarity = "neutral"
tv = row.get("engine_truth_value", row.get("truth_value", "unknown"))
if tv not in ("true", "false", "unknown"):
tv = "unknown"
claims.append(
Claim(
claim_id=f"{character.lower().replace(' ', '_')}_t{turn}_{i}",
topic=str(row.get("topic", "general")).strip().lower(),
proposition=str(row.get("proposition", "")).strip(),
turn=turn,
polarity=polarity,
engine_truth_value=tv,
)
)
return [c for c in claims if c.proposition]
def confirmed_testimony(
self, *, question: str, reply: str, candidates: list[dict[str, str]]
) -> list[str]:
"""Of the candidate facts, which does this reply genuinely substantiate?
Uses the cheap extractor tier for robust paraphrase-tolerant matching
(names/times reworded). The engine still owns *whether* a clue is
unlocked; this only judges whether the witness spoke to it.
"""
if not candidates:
return []
prompt = self.prompts.render(
"extractor/testimony.md.j2",
question=question, reply=reply, candidates=candidates,
)
try:
data, _ = self.client.complete_json(
tier="extractor", task="testimony_detect", user=prompt,
)
except Exception:
return []
ids = data.get("confirmed", []) if isinstance(data, dict) else []
valid = {c["id"] for c in candidates}
return [cid for cid in ids if cid in valid]