File size: 2,736 Bytes
ebe934f
 
 
e181667
ebe934f
e181667
 
ebe934f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e181667
76be5a0
 
 
 
 
 
 
 
 
54a5940
 
e181667
ebe934f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
"""RosettaStone: canonical term -> client-specific term translation."""

from functools import lru_cache
from typing import Any

import yaml
from config import DISPLAY_NAMES, domain_for, term_catalog_path


@lru_cache(maxsize=8)
def _load_catalog(domain: str) -> dict[str, dict[str, str]]:
    """Returns {client_id: {CANONICAL_KEY: "client term"}}."""
    data = yaml.safe_load(term_catalog_path(domain).read_text())
    return {
        client_id: client_data["terms"]
        for client_id, client_data in data["clients"].items()
    }


def translate(canonical_key: str, client: str) -> str | None:
    """Return client-specific term for a canonical key, or None if not mapped."""
    catalog = _load_catalog(domain_for(client))
    return catalog.get(client, {}).get(canonical_key)


def client_terms(client: str) -> dict[str, str]:
    """Return full {CANONICAL_KEY: client_term} mapping for a client."""
    catalog = _load_catalog(domain_for(client))
    return dict(catalog.get(client, {}))


def client_terms_doc(client: str) -> dict[str, str]:
    """Return the term catalog as a pinned KB document for context injection."""
    terms = client_terms(client)
    display = DISPLAY_NAMES.get(client, client.title())
    lines = "\n".join(f"- {k.replace('_', ' ').title()}: use '{v}'" for k, v in terms.items())
    return {
        "id": f"terms_{client}",
        "title": f"{display} Terminology Guide",
        "content": f"Always use these exact terms when responding to {display} users:\n{lines}",
    }


def check_terminology(response_text: str, client: str) -> dict[str, Any]:
    """
    Deterministic chain_terminology check.

    Flags cases where a rival client's term appears in the response for a
    canonical key, without the correct client term also being present.

    Returns:
        {"pass": bool, "violations": [...], "checked": int}
    """
    catalog = _load_catalog(domain_for(client))
    expected = catalog.get(client, {})
    other_clients = {c: terms for c, terms in catalog.items() if c != client}
    text_lower = response_text.lower()
    violations = []

    for canonical_key, client_term in expected.items():
        client_term_present = client_term.lower() in text_lower
        for other_terms in other_clients.values():
            rival_term = other_terms.get(canonical_key, "")
            if rival_term and rival_term.lower() in text_lower and not client_term_present:
                violations.append({
                    "canonical": canonical_key,
                    "expected": client_term,
                    "found": rival_term,
                })

    return {
        "pass": len(violations) == 0,
        "violations": violations,
        "checked": len(expected),
    }