"""RosettaStone: canonical term -> client-specific term translation.""" from functools import lru_cache from typing import Any import yaml from config import DISPLAY_NAMES, domain_for, term_catalog_path @lru_cache(maxsize=8) def _load_catalog(domain: str) -> dict[str, dict[str, str]]: """Returns {client_id: {CANONICAL_KEY: "client term"}}.""" data = yaml.safe_load(term_catalog_path(domain).read_text()) return { client_id: client_data["terms"] for client_id, client_data in data["clients"].items() } def translate(canonical_key: str, client: str) -> str | None: """Return client-specific term for a canonical key, or None if not mapped.""" catalog = _load_catalog(domain_for(client)) return catalog.get(client, {}).get(canonical_key) def client_terms(client: str) -> dict[str, str]: """Return full {CANONICAL_KEY: client_term} mapping for a client.""" catalog = _load_catalog(domain_for(client)) return dict(catalog.get(client, {})) def client_terms_doc(client: str) -> dict[str, str]: """Return the term catalog as a pinned KB document for context injection.""" terms = client_terms(client) display = DISPLAY_NAMES.get(client, client.title()) lines = "\n".join(f"- {k.replace('_', ' ').title()}: use '{v}'" for k, v in terms.items()) return { "id": f"terms_{client}", "title": f"{display} Terminology Guide", "content": f"Always use these exact terms when responding to {display} users:\n{lines}", } def check_terminology(response_text: str, client: str) -> dict[str, Any]: """ Deterministic chain_terminology check. Flags cases where a rival client's term appears in the response for a canonical key, without the correct client term also being present. Returns: {"pass": bool, "violations": [...], "checked": int} """ catalog = _load_catalog(domain_for(client)) expected = catalog.get(client, {}) other_clients = {c: terms for c, terms in catalog.items() if c != client} text_lower = response_text.lower() violations = [] for canonical_key, client_term in expected.items(): client_term_present = client_term.lower() in text_lower for other_terms in other_clients.values(): rival_term = other_terms.get(canonical_key, "") if rival_term and rival_term.lower() in text_lower and not client_term_present: violations.append({ "canonical": canonical_key, "expected": client_term, "found": rival_term, }) return { "pass": len(violations) == 0, "violations": violations, "checked": len(expected), }