Spaces:

build-small-hackathon
/

f-id

Sleeping

App Files Files Community

f-id / src /id /engine /extractor.py

marcodsn

Initial Gradio Space

0423b99 16 days ago

Raw

History Blame Contribute Delete

3.27 kB

	"""Utterance -> structured claims (Section 7).

	The extractor (cheap tier) turns a character utterance into structured
	propositions. It is given engine-only ground truth so it can also stamp each
	claim's ``engine_truth_value`` (true/false/unknown) — this powers confrontation
	and the guard. The player never sees these values.
	"""

	from __future__ import annotations

	from ..llm.client import LLMClient
	from ..llm.prompts import PromptRegistry
	from ..models import Claim


	class ClaimExtractor:
	def __init__(self, client: LLMClient, prompts: PromptRegistry) -> None:
	self.client = client
	self.prompts = prompts

	def extract(
	self,
	*,
	character: str,
	utterance: str,
	turn: int,
	truth_context: str,
	) -> list[Claim]:
	prompt = self.prompts.render(
	"extractor/claims.md.j2",
	character=character,
	utterance=utterance,
	truth_context=truth_context,
	)
	try:
	data, _ = self.client.complete_json(
	tier="extractor", task="claim_extract", user=prompt,
	)
	except Exception:
	return []
	rows = data.get("claims", data) if isinstance(data, dict) else data
	if not isinstance(rows, list):
	return []
	claims: list[Claim] = []
	for i, row in enumerate(rows):
	if not isinstance(row, dict):
	continue
	polarity = row.get("polarity", "neutral")
	if polarity not in ("affirm", "deny", "neutral"):
	polarity = "neutral"
	tv = row.get("engine_truth_value", row.get("truth_value", "unknown"))
	if tv not in ("true", "false", "unknown"):
	tv = "unknown"
	claims.append(
	Claim(
	claim_id=f"{character.lower().replace(' ', '_')}_t{turn}_{i}",
	topic=str(row.get("topic", "general")).strip().lower(),
	proposition=str(row.get("proposition", "")).strip(),
	turn=turn,
	polarity=polarity,
	engine_truth_value=tv,
	)
	)
	return [c for c in claims if c.proposition]

	def confirmed_testimony(
	self, *, question: str, reply: str, candidates: list[dict[str, str]]
	) -> list[str]:
	"""Of the candidate facts, which does this reply genuinely substantiate?

	Uses the cheap extractor tier for robust paraphrase-tolerant matching
	(names/times reworded). The engine still owns whether a clue is
	unlocked; this only judges whether the witness spoke to it.
	"""
	if not candidates:
	return []
	prompt = self.prompts.render(
	"extractor/testimony.md.j2",
	question=question, reply=reply, candidates=candidates,
	)
	try:
	data, _ = self.client.complete_json(
	tier="extractor", task="testimony_detect", user=prompt,
	)
	except Exception:
	return []
	ids = data.get("confirmed", []) if isinstance(data, dict) else []
	valid = {c["id"] for c in candidates}
	return [cid for cid in ids if cid in valid]