Spaces:

MataStrategy
/

ground-zero

Sleeping

File size: 9,284 Bytes

"""MinimalClient — dialect-anchored plain-text LLM client for the Month 1–3 rebuild.

Why this exists (and not GemmaClient):
  GemmaClient wraps every reply in a JSON object and runs a "teacher / child"
  intent-classification flow. That's fine for the full app, but for the minimal
  baseline it (a) spends model capacity on JSON compliance, (b) lets the model
  drift into neighbouring languages (Wolof, Hausa, Pulaar of Senegal, Fulfulde
  of Nigeria, Jula of Côte d'Ivoire), and (c) produces text that isn't clean
  for TTS.

This client instead:
  - pins the target dialect explicitly (Bambara / Bamako–Mali or Pular / Fuuta
    Jallon–Guinea),
  - injects the curated 30-phrase gold list for the target language as
    few-shot anchoring in the system prompt,
  - names forbidden neighbouring languages the model must not code-switch to,
  - returns a plain string, ready for MMS-TTS.

GemmaClient and app.py are intentionally untouched.
"""
from __future__ import annotations

import json
import logging
from functools import lru_cache
from pathlib import Path
from typing import Optional

logger = logging.getLogger(__name__)

# configs/dialect_anchors/*.json lives at <repo>/configs/dialect_anchors
_ANCHOR_DIR = (
    Path(__file__).resolve().parent.parent.parent / "configs" / "dialect_anchors"
)

_ANCHOR_FILE = {
    "bam": "bambara_mali.json",
    "ful": "pular_guinea.json",
}

LANG_FULL_NAME = {
    "bam": "Bambara as spoken in Bamako, Mali",
    "ful": "Pular of Fuuta Jallon, as spoken in Guinea",
    "fr":  "French",
    "en":  "English",
}

# Neighbouring languages the model is most likely to drift into. Empty for
# fr/en — we don't need to fence those.
FORBIDDEN_DRIFT = {
    "bam": (
        "Jula / Dyula of Côte d'Ivoire, Wolof, Hausa, Swahili, Lingala, "
        "or any other African language"
    ),
    "ful": (
        "Pulaar of Senegal, Fulfulde of Nigeria or Cameroon, Wolof, Hausa, "
        "Swahili, or any other African language"
    ),
    "fr":  "",
    "en":  "",
}


@lru_cache(maxsize=4)
def _load_anchors(lang: str) -> list[dict]:
    """Load the curated gold-phrase list for `lang`. Cached per process."""
    fname = _ANCHOR_FILE.get(lang)
    if not fname:
        return []
    path = _ANCHOR_DIR / fname
    if not path.exists():
        logger.warning("Dialect anchor file missing: %s", path)
        return []
    with path.open("r", encoding="utf-8") as f:
        data = json.load(f)
    return data.get("pairs", [])


def _build_system_prompt(
    target_lang: str,
    extra_examples: Optional[list[dict]] = None,
) -> str:
    """Assemble the per-call system prompt for a target output language.

    `extra_examples`, when supplied, are appended after the curated 30-pair
    gold list as additional dynamic few-shot anchoring — used by app_minimal
    to inject the top-K nearest phrasebook entries when the strict short-
    circuit misses.
    """
    full = LANG_FULL_NAME.get(target_lang, "English")
    forbidden = FORBIDDEN_DRIFT.get(target_lang, "")
    anchors = _load_anchors(target_lang)

    lines: list[str] = [
        f"You are a warm, concise conversational assistant that replies ONLY in {full}.",
        "",
        "Your task is to REPLY to the user's message as a person would in "
        "conversation — NOT to translate it. If the user greets you, greet them "
        "back and ask how they are. If they ask a question, answer it. If they "
        "make a statement, respond appropriately. Never simply repeat or "
        "translate what they said back to them.",
        "",
        "Output format: plain natural text only. No JSON, no code fences, no "
        "markdown, no translations, no romanisation, no explanations, and "
        "ABSOLUTELY no parenthetical glosses, literal translations, or "
        "English/French annotations of any kind (do NOT write things like "
        "'(Lit: ...)', '(meaning ...)', or any '(English ...)' aside). The "
        f"output must be 100% {full} characters and punctuation only. Reply in "
        "1–3 short sentences suitable to be read aloud by a text-to-speech voice.",
    ]

    if forbidden:
        lines += [
            "",
            (
                f"CRITICAL — dialect fidelity: do NOT use, mix, or substitute words "
                f"from {forbidden}. If you are not confident a word belongs to "
                f"{full}, rephrase using simpler vocabulary you are certain of, or "
                f"apologise briefly in {full} (for example that you did not "
                f"understand)."
            ),
        ]

    if anchors:
        lines += [
            "",
            f"Reference phrases in {full} — these pairs are STYLE/ORTHOGRAPHY "
            "examples ONLY (showing how English/French maps to the correct "
            "dialect). Do NOT treat them as a translation task: when the user "
            "writes one of these source phrases, do not just output its target "
            "verbatim — instead REPLY conversationally in the same dialectal "
            "style:",
        ]
        for item in anchors:
            src = item.get("source", "").strip()
            tgt = item.get("target", "").strip()
            if src and tgt:
                lines.append(f"- {src}  →  {tgt}")

    if extra_examples:
        lines += [
            "",
            "Additional reference phrases relevant to the current user input "
            f"(curated gold {full} translations — STYLE references only, not a "
            "translation task; reply conversationally, do not echo the target "
            "verbatim):",
        ]
        for item in extra_examples:
            src = (item.get("source") or "").strip()
            tgt = (item.get("target") or "").strip()
            if src and tgt:
                lines.append(f"- {src}  →  {tgt}")

    lines += [
        "",
        f"Always reply in {full}, even if the user writes to you in English, "
        "French, or another language. Never translate your own reply.",
    ]
    return "\n".join(lines)


class MinimalClient:
    """Dialect-anchored plain-text LLM client over HF Serverless Inference.

    Usage:
        client = MinimalClient(model_id="CohereLabs/aya-expanse-32b", hf_token=TOK)
        reply  = client.chat("Good morning", target_lang="bam")
        # → "I ni sɔgɔma. I ka kɛnɛ wa?"
    """

    def __init__(
        self,
        model_id: str = "CohereLabs/aya-expanse-32b",
        hf_token: Optional[str] = None,
    ) -> None:
        self.model_id = model_id
        self.hf_token = hf_token
        self._client = None  # lazy init

    def _get_client(self):
        if self._client is None:
            from huggingface_hub import InferenceClient
            self._client = InferenceClient(token=self.hf_token)
        return self._client

    def chat(
        self,
        user_text: str,
        target_lang: str = "bam",
        extra_examples: Optional[list[dict]] = None,
    ) -> str:
        """Return a plain-text reply in `target_lang`.

        `extra_examples` (optional) — list of {source, target} dicts that get
        appended to the system prompt as additional dynamic few-shot. Used by
        app_minimal to RAG-inject the top-K nearest phrasebook entries when
        the strict phrasebook short-circuit misses.

        On any error returns a short parenthetical error string so the caller
        can still feed something into TTS / display.
        """
        system_prompt = _build_system_prompt(target_lang, extra_examples)
        try:
            client = self._get_client()
            completion = client.chat_completion(
                model=self.model_id,
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user",   "content": user_text},
                ],
                max_tokens=256,
                temperature=0.3,
            )
            raw = (completion.choices[0].message.content or "").strip()
            # Defensive: strip any stray code fences the model may emit anyway.
            if raw.startswith("```"):
                raw = raw.strip("`").strip()
                # If a language tag slipped in on the first line, drop it.
                if "\n" in raw:
                    first, rest = raw.split("\n", 1)
                    if len(first) < 20 and " " not in first:
                        raw = rest.strip()
            # Defensive: strip parenthetical English/French glosses the model
            # sometimes appends despite the prompt — e.g. "Foo bar (Lit: ...)".
            # We only strip parentheticals that LOOK like glosses (start with
            # Lit/Literal/Meaning/Translation/English/French, or contain ≥3
            # consecutive ASCII letters that aren't part of the target script).
            import re as _re
            raw = _re.sub(
                r"\s*\((?:lit\.?|literal(?:ly)?|meaning|translation|english|french|fr|en)[^)]*\)",
                "",
                raw,
                flags=_re.IGNORECASE,
            ).strip()
            return raw
        except Exception as exc:  # pragma: no cover — surfaced to UI
            logger.error("MinimalClient error: %s", exc)
            return f"(LLM unavailable: {exc})"