File size: 9,284 Bytes
d0e28fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
064d08b
 
 
 
 
 
 
 
 
 
 
d0e28fa
 
 
 
 
 
 
9e99c2c
 
 
 
 
 
d0e28fa
757e833
 
 
 
 
d0e28fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e99c2c
 
 
 
 
 
d0e28fa
 
 
 
 
 
 
064d08b
 
 
 
9e99c2c
 
 
064d08b
 
 
 
 
 
 
d0e28fa
 
 
 
 
 
 
 
 
 
 
 
cc82bd8
d0e28fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
064d08b
 
 
 
 
 
d0e28fa
 
064d08b
 
 
 
 
d0e28fa
 
 
064d08b
d0e28fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
757e833
 
 
 
 
 
 
 
 
 
 
 
d0e28fa
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
"""MinimalClient — dialect-anchored plain-text LLM client for the Month 1–3 rebuild.

Why this exists (and not GemmaClient):
  GemmaClient wraps every reply in a JSON object and runs a "teacher / child"
  intent-classification flow. That's fine for the full app, but for the minimal
  baseline it (a) spends model capacity on JSON compliance, (b) lets the model
  drift into neighbouring languages (Wolof, Hausa, Pulaar of Senegal, Fulfulde
  of Nigeria, Jula of Côte d'Ivoire), and (c) produces text that isn't clean
  for TTS.

This client instead:
  - pins the target dialect explicitly (Bambara / Bamako–Mali or Pular / Fuuta
    Jallon–Guinea),
  - injects the curated 30-phrase gold list for the target language as
    few-shot anchoring in the system prompt,
  - names forbidden neighbouring languages the model must not code-switch to,
  - returns a plain string, ready for MMS-TTS.

GemmaClient and app.py are intentionally untouched.
"""
from __future__ import annotations

import json
import logging
from functools import lru_cache
from pathlib import Path
from typing import Optional

logger = logging.getLogger(__name__)

# configs/dialect_anchors/*.json lives at <repo>/configs/dialect_anchors
_ANCHOR_DIR = (
    Path(__file__).resolve().parent.parent.parent / "configs" / "dialect_anchors"
)

_ANCHOR_FILE = {
    "bam": "bambara_mali.json",
    "ful": "pular_guinea.json",
}

LANG_FULL_NAME = {
    "bam": "Bambara as spoken in Bamako, Mali",
    "ful": "Pular of Fuuta Jallon, as spoken in Guinea",
    "fr":  "French",
    "en":  "English",
}

# Neighbouring languages the model is most likely to drift into. Empty for
# fr/en — we don't need to fence those.
FORBIDDEN_DRIFT = {
    "bam": (
        "Jula / Dyula of Côte d'Ivoire, Wolof, Hausa, Swahili, Lingala, "
        "or any other African language"
    ),
    "ful": (
        "Pulaar of Senegal, Fulfulde of Nigeria or Cameroon, Wolof, Hausa, "
        "Swahili, or any other African language"
    ),
    "fr":  "",
    "en":  "",
}


@lru_cache(maxsize=4)
def _load_anchors(lang: str) -> list[dict]:
    """Load the curated gold-phrase list for `lang`. Cached per process."""
    fname = _ANCHOR_FILE.get(lang)
    if not fname:
        return []
    path = _ANCHOR_DIR / fname
    if not path.exists():
        logger.warning("Dialect anchor file missing: %s", path)
        return []
    with path.open("r", encoding="utf-8") as f:
        data = json.load(f)
    return data.get("pairs", [])


def _build_system_prompt(
    target_lang: str,
    extra_examples: Optional[list[dict]] = None,
) -> str:
    """Assemble the per-call system prompt for a target output language.

    `extra_examples`, when supplied, are appended after the curated 30-pair
    gold list as additional dynamic few-shot anchoring — used by app_minimal
    to inject the top-K nearest phrasebook entries when the strict short-
    circuit misses.
    """
    full = LANG_FULL_NAME.get(target_lang, "English")
    forbidden = FORBIDDEN_DRIFT.get(target_lang, "")
    anchors = _load_anchors(target_lang)

    lines: list[str] = [
        f"You are a warm, concise conversational assistant that replies ONLY in {full}.",
        "",
        "Your task is to REPLY to the user's message as a person would in "
        "conversation — NOT to translate it. If the user greets you, greet them "
        "back and ask how they are. If they ask a question, answer it. If they "
        "make a statement, respond appropriately. Never simply repeat or "
        "translate what they said back to them.",
        "",
        "Output format: plain natural text only. No JSON, no code fences, no "
        "markdown, no translations, no romanisation, no explanations, and "
        "ABSOLUTELY no parenthetical glosses, literal translations, or "
        "English/French annotations of any kind (do NOT write things like "
        "'(Lit: ...)', '(meaning ...)', or any '(English ...)' aside). The "
        f"output must be 100% {full} characters and punctuation only. Reply in "
        "1–3 short sentences suitable to be read aloud by a text-to-speech voice.",
    ]

    if forbidden:
        lines += [
            "",
            (
                f"CRITICAL — dialect fidelity: do NOT use, mix, or substitute words "
                f"from {forbidden}. If you are not confident a word belongs to "
                f"{full}, rephrase using simpler vocabulary you are certain of, or "
                f"apologise briefly in {full} (for example that you did not "
                f"understand)."
            ),
        ]

    if anchors:
        lines += [
            "",
            f"Reference phrases in {full} — these pairs are STYLE/ORTHOGRAPHY "
            "examples ONLY (showing how English/French maps to the correct "
            "dialect). Do NOT treat them as a translation task: when the user "
            "writes one of these source phrases, do not just output its target "
            "verbatim — instead REPLY conversationally in the same dialectal "
            "style:",
        ]
        for item in anchors:
            src = item.get("source", "").strip()
            tgt = item.get("target", "").strip()
            if src and tgt:
                lines.append(f"- {src}{tgt}")

    if extra_examples:
        lines += [
            "",
            "Additional reference phrases relevant to the current user input "
            f"(curated gold {full} translations — STYLE references only, not a "
            "translation task; reply conversationally, do not echo the target "
            "verbatim):",
        ]
        for item in extra_examples:
            src = (item.get("source") or "").strip()
            tgt = (item.get("target") or "").strip()
            if src and tgt:
                lines.append(f"- {src}{tgt}")

    lines += [
        "",
        f"Always reply in {full}, even if the user writes to you in English, "
        "French, or another language. Never translate your own reply.",
    ]
    return "\n".join(lines)


class MinimalClient:
    """Dialect-anchored plain-text LLM client over HF Serverless Inference.

    Usage:
        client = MinimalClient(model_id="CohereLabs/aya-expanse-32b", hf_token=TOK)
        reply  = client.chat("Good morning", target_lang="bam")
        # → "I ni sɔgɔma. I ka kɛnɛ wa?"
    """

    def __init__(
        self,
        model_id: str = "CohereLabs/aya-expanse-32b",
        hf_token: Optional[str] = None,
    ) -> None:
        self.model_id = model_id
        self.hf_token = hf_token
        self._client = None  # lazy init

    def _get_client(self):
        if self._client is None:
            from huggingface_hub import InferenceClient
            self._client = InferenceClient(token=self.hf_token)
        return self._client

    def chat(
        self,
        user_text: str,
        target_lang: str = "bam",
        extra_examples: Optional[list[dict]] = None,
    ) -> str:
        """Return a plain-text reply in `target_lang`.

        `extra_examples` (optional) — list of {source, target} dicts that get
        appended to the system prompt as additional dynamic few-shot. Used by
        app_minimal to RAG-inject the top-K nearest phrasebook entries when
        the strict phrasebook short-circuit misses.

        On any error returns a short parenthetical error string so the caller
        can still feed something into TTS / display.
        """
        system_prompt = _build_system_prompt(target_lang, extra_examples)
        try:
            client = self._get_client()
            completion = client.chat_completion(
                model=self.model_id,
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user",   "content": user_text},
                ],
                max_tokens=256,
                temperature=0.3,
            )
            raw = (completion.choices[0].message.content or "").strip()
            # Defensive: strip any stray code fences the model may emit anyway.
            if raw.startswith("```"):
                raw = raw.strip("`").strip()
                # If a language tag slipped in on the first line, drop it.
                if "\n" in raw:
                    first, rest = raw.split("\n", 1)
                    if len(first) < 20 and " " not in first:
                        raw = rest.strip()
            # Defensive: strip parenthetical English/French glosses the model
            # sometimes appends despite the prompt — e.g. "Foo bar (Lit: ...)".
            # We only strip parentheticals that LOOK like glosses (start with
            # Lit/Literal/Meaning/Translation/English/French, or contain ≥3
            # consecutive ASCII letters that aren't part of the target script).
            import re as _re
            raw = _re.sub(
                r"\s*\((?:lit\.?|literal(?:ly)?|meaning|translation|english|french|fr|en)[^)]*\)",
                "",
                raw,
                flags=_re.IGNORECASE,
            ).strip()
            return raw
        except Exception as exc:  # pragma: no cover — surfaced to UI
            logger.error("MinimalClient error: %s", exc)
            return f"(LLM unavailable: {exc})"