# app.py — Traductor Español ↔ Neoíbero (BI-ONLY 1:1 estricto, determinista)
# UI completa + CSS “íbero” + TTS + Línea ibérica (codificación appOld)
# Requiere un ÚNICO CSV con superficies exactas (UTF-8) y columnas:
#   - source_es (o es/es_surface)
#   - target_ni (o ni/ni_surface)
#   - pair_id (opcional)
#
# El motor NO hace heurísticas ni morfología: 1:1 exacto por superficie.
# Puntuación y números pasan tal cual. Desconocidos -> [SIN-LEX:...] / [?:...]
# Determinismo NI→ES: entradas NI duplicadas (ambigüas) quedan bloqueadas y se rinden como [AMB-NI:...]

import gradio as gr
import os, csv, re, base64, unicodedata, gzip
import torch
from transformers import AutoProcessor, VitsModel
import numpy as np
from html import escape

# ====== cache ======
os.environ['TRANSFORMERS_CACHE'] = os.environ.get('TRANSFORMERS_CACHE', '/tmp/cache')
os.environ['HF_HOME'] = os.environ.get('HF_HOME', '/tmp/hf')

DEBUG_MODE = False
def debug_print(msg):
    if DEBUG_MODE: print(f"[DEBUG] {msg}")

# ====== util ======
def _open_maybe_gzip(path):
    if str(path).endswith(".gz"):
        # CSV debe venir en UTF-8 (evita mojibake)
        return gzip.open(path, "rt", encoding="utf-8", newline="")
    return open(path, "r", encoding="utf-8", newline="")

def norm(x): return (str(x).strip()) if x is not None else ""
def lower(x): return norm(x).lower()
def fold(s:str)->str:
    return ''.join(c for c in unicodedata.normalize('NFD', s or "") if unicodedata.category(c)!="Mn")

# ====== rutas ======
def _cand(*names):
    for n in names:
        if os.path.exists(n): return n
        p = os.path.join("salida", n)
        if os.path.exists(p): return p
    return names[0]  # último recurso para mensajes

# Prioriza los “master/surface-ready”; luego retrocompatibles
CSV_BI = _cand(
    "LEXICON_UNICO_1a1.csv.gz",
    "MASTER_SURFACE_READY.csv.gz",
    "MASTER_REEXTENDED.csv.gz",
    "BI_SURFACE_READY.csv.gz",
    "HF_Pairs_BI_REEXTENDED.csv.gz",
    "HF_Pairs_BI_EXPANDED1_EXTENDED_FILLED.csv.gz",
    "HF_Pairs_BI_EXPANDED1.csv.gz"
)

# ====== estructuras strict BI ======
# Clave = superficie exacta en minúsculas. Valor = (superficie_original_opuesta, pair_id)
ES2NI = {}   # es_surface_lower -> (ni_surface, pair_id)
NI2ES = {}   # ni_surface_lower -> (es_surface, pair_id)

# N-gramas/frases:
ESPHRASE2NI = {}  # "el saco" -> (ni_surface, pair_id)
NIPHRASE2ES = {}  # "…-ke ni etxe-ka" -> (es_surface, pair_id)
MAX_NGRAM = 3

# ====== signos / tokenización mínima ======
VISIBLE_PUNCT = set(list(",.;:!?¡¿…()[]{}\"'«»—–“”‘’"))
_num_re = re.compile(r"^\d+([.,]\d+)?$")
def is_number(tok:str)->bool: return bool(_num_re.fullmatch(tok or ""))

# --- separadores de cláusula + placeholders atómicos ---
CLAUSE_BREAKS = {",", ";", "—", "–", ":"}
PLACEHOLDER_RE = re.compile(r"^\[[^\]]+\]$")
def is_placeholder(tok: str) -> bool:
    return bool(PLACEHOLDER_RE.match(tok or ""))

def _restore_brk(tok, protected):
    m = re.fullmatch(r"__BRK(\d+)__(?:-(na|ba))?", tok or "")
    if not m: return tok
    idx = int(m.group(1))
    suf = m.group(2)
    base = protected[idx] if 0 <= idx < len(protected) else tok
    return base + (f"-{suf}" if suf else "")

def simple_tokenize(text:str):
    """Tokenización mínima, sin romper [ ... ] ni [ ... ]-na/-ba."""
    if not text:
        return []
    protected = []
    def _repl(m):
        key = f"__BRK{len(protected)}__"
        protected.append(m.group(0))
        return key
    t = re.sub(r"\[[^\]]*\]", _repl, (text or "").strip())
    t = re.sub(r"\s+"," ", t)
    t = re.sub(r"([,.;:!?¡¿…()\[\]{}\"'«»—–“”‘’])", r" \1 ", t)
    toks = [tok for tok in t.split() if tok]
    for i, tok in enumerate(toks):
        if tok.startswith("__BRK") and "__" in tok:
            toks[i] = _restore_brk(tok, protected)
    return toks

def detokenize(tokens):
    s = " ".join(tokens)
    s = re.sub(r"\s+([,.;:!?])", r"\1", s)
    s = re.sub(r"([¿¡])\s+", r"\1", s)
    s = re.sub(r"\(\s+", "(", s)
    s = re.sub(r"\s+\)", ")", s)
    s = re.sub(r"\s{2,}", " ", s).strip()
    return s

# ====== Modalidad vascoide (-na / -ba) ======
MODAL_SUFFIX_ENABLE = True
MODAL_ONLY_ON_FINITE = True
MODAL_STRIP_QE_IN_NI = True

SENT_END = {".", "!", "?", "…"}
OPEN_FOR = {"?": "¿", "!": "¡"}
WRAP_PREFIX = set(list("«“‘([{\"'"))
PERS_ENDINGS = ("-n","-zu","-gu","-zuk","-zuek","-k")
TAM_FINITE   = ("-ke","-bo","-ta","-ni","-tu")

def looks_like_finite_ni(tok:str)->bool:
    t = (tok or "").lower()
    if not t or t.startswith("["): return False
    base = re.sub(r"-(na|ba)$","", t)
    for tam in TAM_FINITE:
        if base.endswith(tam) or any(base.endswith(tam+pe) for pe in PERS_ENDINGS):
            return True
    return False

def last_content_index(tokens, start, end_exclusive):
    i = end_exclusive - 1
    while i >= start and tokens[i] in VISIBLE_PUNCT:
        i -= 1
    return i if i >= start else -1

def strip_qe_punct(tokens):
    return [t for t in tokens if t not in ("¿","?","¡","!")]

# --- helpers numéricos para no cortar decimales/horas ---
def _is_numeric_comma(tokens, i):
    return (0 < i < len(tokens)-1 and tokens[i] == "," and
            is_number(tokens[i-1]) and is_number(tokens[i+1]))

def _is_time_colon(tokens, i):
    return (0 < i < len(tokens)-1 and tokens[i] == ":" and
            is_number(tokens[i-1]) and is_number(tokens[i+1]))

def _is_true_clause_break(tokens, i):
    if tokens[i] not in CLAUSE_BREAKS: return False
    if _is_numeric_comma(tokens, i): return False
    if _is_time_colon(tokens, i): return False
    return True

def add_modal_suffixes_es2ni(tokens):
    """Añade -na/-ba al último verbo finito (o último constituyente) por oración."""
    if not MODAL_SUFFIX_ENABLE:
        return tokens
    out = tokens[:]
    n = len(out)
    i = 0
    sent_start = 0
    while i < n:
        if out[i] in ("?", "!"):
            closer = out[i]
            target = -1
            j = i - 1
            while j >= sent_start:
                if out[j] not in VISIBLE_PUNCT and (not MODAL_ONLY_ON_FINITE or looks_like_finite_ni(out[j])):
                    target = j; break
                j -= 1
            if target == -1:
                target = last_content_index(out, sent_start, i)
            if target != -1:
                suf = "na" if closer == "?" else "ba"
                if not re.search(rf"-(?:{suf})$", out[target].lower()):
                    out[target] = out[target] + "-" + suf
            sent_start = i + 1
        elif out[i] in SENT_END:
            sent_start = i + 1
        i += 1
    if MODAL_STRIP_QE_IN_NI:
        out = strip_qe_punct(out)
    return out

def strip_modal_suffixes_ni(tokens):
    """
    Interpreta -na/-ba como modalidad; ahora SOLO cerramos al final de oración.
    (No cerramos en comas/“:”, salvo que ya haya ?/! explícitos.)
    """
    if not MODAL_SUFFIX_ENABLE:
        return tokens

    out = []
    buf = []
    pending_end = None
    mode = None  # "?" / "!"

    def _emit(end_override=None, also_append=None):
        nonlocal buf, mode, pending_end, out
        local = [t for t in buf if t not in ("¿","?","¡","!")]
        if local:
            end_tok = end_override or ("?" if mode == "?" else "!" if mode == "!" else pending_end or ".")
            out.extend(local)
            out.append(end_tok)
        buf.clear(); mode = None; pending_end = None
        if also_append:
            out.append(also_append)

    toks = tokens + ["."]
    for i, t in enumerate(toks):
        if t in ("¿", "¡"):
            _emit(); mode = "?" if t == "¿" else "!"
            continue
        if t in ("?", "!"):
            pending_end = t; _emit(); continue
        if t in SENT_END:
            pending_end = t; _emit(); continue

        # ✦ MODALIDAD: en separadores de cláusula NO cerramos todavía:
        if t in CLAUSE_BREAKS and mode in ("?","!"):
            buf.append(t)
            continue

        m = re.search(r"-(na|ba)$", (t or "").lower())
        if m:
            if mode and buf: _emit()
            mode = "?" if m.group(1) == "na" else "!"
            t = t[:-len(m.group(0))]

        if t:
            buf.append(t)

    if len(out) >= 2 and out[-1] == "." and out[-2] == ".": out.pop()
    return out

def add_inverted_openers(tokens):
    """Inserta ¿/¡ al inicio de cada tramo que acaba en ?/!, ignorando comas/“:” numéricos."""
    out = tokens[:]
    START_BREAKS = SENT_END | CLAUSE_BREAKS
    def _is_true_start_break(idx):
        if out[idx] in SENT_END: return True
        if out[idx] in CLAUSE_BREAKS: return _is_true_clause_break(out, idx)
        return False

    i = 0
    while i < len(out):
        if out[i] in ("?", "!"):
            closer = out[i]; opener = OPEN_FOR[closer]
            j = i - 1
            while j >= 0 and not _is_true_start_break(j):
                j -= 1
            start = j + 1
            k = start
            while k < i and out[k] in WRAP_PREFIX:
                k += 1
            if not (k < len(out) and out[k] == opener):
                out.insert(k, opener); i += 1
        i += 1
    return out

# ====== EXPANSIONES (deterministas, sólo ES→NI) ======
EXPANSION_ENABLE = True
FLAG_COLNAMES = ("flags","FLAGS","expand","EXPAND","tags","TAGS","morph","MORPH")
FLAG_PLURAL = ("S",)
FLAG_3PL    = ("3","V3")

VOWELS = "aeiouáéíóúüAEIOUÁÉÍÓÚÜ"

def _has_flag(cell:str, wanted:tuple)->bool:
    c = (cell or "")
    return any(w in c for w in wanted)

def _pluralize_es_form(s: str) -> str:
    if not s: return s
    sl = s.lower()
    if sl.endswith("z"):
        return s[:-1] + ("ces" if s[-1].islower() else "CES")
    if s[-1] not in VOWELS:
        return s + ("es" if s[-1].islower() else "ES")
    return s + ("s" if s[-1].islower() else "S")

def _present_3pl_from_3sg(s: str) -> str:
    if not s: return s
    return s + ("n" if s[-1].islower() else "N")

# ====== TTS (appOld) ======
print("Cargando modelo de voz (opcional)…")
device = "cuda" if torch.cuda.is_available() else "cpu"
processor = model = None
try:
    processor = AutoProcessor.from_pretrained("facebook/mms-tts-spa")
    model = VitsModel.from_pretrained("facebook/mms-tts-spa").to(device)
    print("Modelo de voz cargado.")
except Exception as e:
    print(f"AVISO TTS: {e}")

def add_reading_pauses(text: str, level:int=3) -> str:
    if level <= 1: return text
    t = re.sub(r",\s*", ", , ", text)
    t = re.sub(r"\.\s*", ". . ", text)
    return re.sub(r'\s+',' ',t).strip()

def hispanize_for_tts(ni_text: str) -> str:
    text=(ni_text or "").lower()
    text=text.replace('ŕ','rr').replace('ś','s').replace('eś','es').replace('-', ' ')
    text=re.sub(r'\[.*?\]','',text); text=re.sub(r'\s+',' ',text).strip()
    return add_reading_pauses(text, 3)

def synthesize_speech(text):
    if not text or not text.strip() or model is None or processor is None: return None
    try:
        inputs = processor(text=hispanize_for_tts(text), return_tensors="pt").to(device)
        with torch.no_grad(): output = model(**inputs).waveform
        speech_np = output.cpu().numpy().squeeze()
        mx = max(abs(speech_np.min()), abs(speech_np.max()))
        if mx>0: speech_np = speech_np/mx*0.9
        return (16000, speech_np.astype(np.float32))
    except Exception as e:
        print(f"Error TTS: {e}"); return None

# ====== Línea ibérica (appOld) ======
V = "aeiou"
SYL_FOR = {
    "b":["‹BA›","‹BE›","‹BI›","‹BO›","‹BU›"],
    "d":["‹DA›","‹DE›","‹DI›","‹DO›","‹DU›"],
    "t":["‹TA›","‹TE›","‹TI›","‹TO›","‹TU›"],
    "g":["‹GA›","‹GE›","‹GI›","‹DO›","‹GU›"] if False else ["‹GA›","‹GE›","‹GI›","‹GO›","‹GU›"],
    "k":["‹KA›","‹KE›","‹KI›","‹KO›","‹KU›"]
}
ALPHA_FOR={"a":"‹A›","e":"‹E›","i":"‹I›","o":"‹O›","u":"‹U›","s":"‹S›","ś":"‹Ś›",
           "l":"‹L›","r":"‹R›","ŕ":"‹Ŕ›","n":"‹N›","m":"‹M›"}
CODA_FOR={"":"","n":"‹N›","s":"‹S›","ś":"‹Ś›","r":"‹R›","ŕ":"‹Ŕ›","l":"‹L›","m":"‹M›","k":"‹K›","t":"‹T›"}

def tokens_from_latin(ni:str)->str:
    out=[]; i=0; ni=(ni or "").lower()
    while i<len(ni):
        c=ni[i]
        if c=="p": c="b"
        if c=="-": out.append("—"); i+=1; continue
        if c in V:
            out.append(ALPHA_FOR.get(c, c.upper())); i+=1; continue
        if c in SYL_FOR and i+1<len(ni) and ni[i+1] in V:
            idx=V.index(ni[i+1]); tok=SYL_FOR[c][idx]
            coda=ni[i+2] if i+2<len(ni) else ""
            if coda in CODA_FOR and coda!="": tok+=CODA_FOR[coda]; i+=3
            else: i+=2
            out.append(tok); continue
        out.append(ALPHA_FOR.get(c, c.upper())); i+=1
    return "".join(out)

KEYS_MODE = "compact"
KEYS_OVERRIDE = {}

def georgeos_keys(token_str:str, ni_plain:str)->str:
    low=(ni_plain or "").lower()
    if low in KEYS_OVERRIDE: return KEYS_OVERRIDE[low]
    m=re.findall(r"‹(.*?)›", token_str)
    out=[]
    for t in m:
        if KEYS_MODE == "compact":
            if len(t)==2 and t[0] in "BDTGK": out.append(t[0])
            elif t in ("A","E","I","O","U"): out.append(t)
            elif t=="Ś": out.append("X")
            elif t=="Ŕ": out.append("r")
            else: out.append(t[0].upper())
        else:
            if len(t)==2 and t[0] in "BDTGK": out.append(t)
            elif t=="Ś": out.append("X")
            elif t=="Ŕ": out.append("r")
            else: out.append(t)
    return "".join(out)

TRIDOT = "/"
def render_ib_with_tridots(ib_toks):
    res=[]; prev_word=False
    for tk in ib_toks:
        is_punct = tk in VISIBLE_PUNCT
        if is_punct:
            res.append(" "+tk+" "); prev_word=False
        else:
            if prev_word: res.append(" "+TRIDOT+" ")
            res.append(tk); prev_word=True
    return "".join(res).strip()

# ====== BI loader + diagnóstico ======

# ### ★ MODO ESTRICTO Y DETERMINISTA
STRICT_BI_ENFORCE = True              # si True, no se admite NI ambigua
AMBIG_NI = {}                         # ni_lower -> set de ES conflictivos
BI_DIAG_HTML = "<em>Sin CSV cargado.</em>"

def load_bi_strict_and_diagnose():
    """Carga el CSV, llena ES2NI/NI2ES y prepara un HTML de diagnóstico."""
    global BI_DIAG_HTML
    # vaciar estructuras antes de cargar (determinismo)
    ES2NI.clear(); NI2ES.clear(); ESPHRASE2NI.clear(); NIPHRASE2ES.clear()
    AMBIG_NI.clear()

    if not os.path.exists(CSV_BI):
        msg=f"[ERROR] No se encontró el CSV bilingüe: {CSV_BI}"
        print(msg); BI_DIAG_HTML=f"<b>Error:</b> {escape(msg)}"
        return False

    rows=0; dup_es=0; dup_ni=0; empty_pid=0
    mismatch_backmap = 0
    mismatch_samples = []
    pid_seen=set()

    print(f"Detectado CSV bilingüe: {CSV_BI}")
    try:
        with _open_maybe_gzip(CSV_BI) as f:
            rd = csv.DictReader(f)
            flds=set(rd.fieldnames or [])
            ES_COL = "source_es" if "source_es" in flds else "es_surface" if "es_surface" in flds else "es"
            NI_COL = "target_ni" if "target_ni" in flds else "ni_surface" if "ni_surface" in flds else "ni"
            IDCOL  = "pair_id"   if "pair_id"   in flds else "id" if "id" in flds else None
            FLAGCOL = None
            for cand in FLAG_COLNAMES:
                if cand in flds:
                    FLAGCOL = cand; break

            base_rows = []
            for r in rd:
                es_orig = (r.get(ES_COL) or "").strip()
                ni_orig = (r.get(NI_COL) or "").strip()
                if not (es_orig and ni_orig): continue
                pid = (r.get(IDCOL) or "").strip() if IDCOL else ""
                if not pid: empty_pid += 1
                else: pid_seen.add(pid)
                flags = (r.get(FLAGCOL) or "") if FLAGCOL else ""

                es = lower(es_orig)
                ni = lower(ni_orig)

                # Frases
                if " " in es:
                    if es not in ESPHRASE2NI:  # determinista: primera manda
                        ESPHRASE2NI[es] = (ni_orig, pid)
                if " " in ni:
                    if ni not in NIPHRASE2ES:
                        NIPHRASE2ES[ni] = (es_orig, pid)

                # ES→NI (determinista: primera fila gana)
                if es in ES2NI:
                    dup_es += 1
                else:
                    ES2NI[es] = (ni_orig, pid)

                # NI→ES (determinista + bloqueo de ambigüedad)
                if ni in NI2ES:
                    dup_ni += 1
                    # registra ambigüedad
                    s = AMBIG_NI.get(ni, set())
                    s.add(NI2ES[ni][0]); s.add(es_orig)
                    AMBIG_NI[ni] = s
                    if STRICT_BI_ENFORCE:
                        NI2ES.pop(ni, None)  # invalida la superficie NI conflictiva
                else:
                    if STRICT_BI_ENFORCE and ni in AMBIG_NI:
                        # ya marcada ambigua: no insertar
                        pass
                    else:
                        NI2ES[ni] = (es_orig, pid)

                base_rows.append((es_orig, ni_orig, pid, flags))
                rows += 1

        # Expansiones deterministas (solo añaden ES2NI; NO tocan NI2ES)
        if EXPANSION_ENABLE:
            for es_orig, ni_orig, pid, flags in base_rows:
                if not flags: continue
                if _has_flag(flags, FLAG_PLURAL):
                    pl = _pluralize_es_form(es_orig)
                    pl_key = lower(pl)
                    if pl_key not in ES2NI:
                        ES2NI[pl_key] = (ni_orig, pid)
                if _has_flag(flags, FLAG_3PL):
                    p3 = _present_3pl_from_3sg(es_orig)
                    p3_key = lower(p3)
                    if p3_key not in ES2NI:
                        ES2NI[p3_key] = (ni_orig, pid)

        # Diagnóstico asimetrías (no afecta determinismo)
        for es_low, (ni_surf, _) in ES2NI.items():
            ni_low = lower(ni_surf)
            back = NI2ES.get(ni_low)
            if back and lower(back[0]) != es_low:
                mismatch_backmap += 1
                if len(mismatch_samples) < 10:
                    mismatch_samples.append((es_low, ni_low, lower(back[0])))

    except Exception as e:
        msg=f"[ERROR] Al leer {CSV_BI}: {e}"
        print(msg); BI_DIAG_HTML=f"<b>Error:</b> {escape(msg)}"
        return False

    es_unique = len(ES2NI)
    ni_unique = len(NI2ES)
    pid_unique = len(pid_seen)

    print(f"✓ BI-ONLY ESTRICTO cargado: {rows:,} filas.")
    if dup_es: print(f"[AVISO] {dup_es:,} duplicados ES (se usó la primera).")
    if dup_ni: print(f"[AVISO] {dup_ni:,} duplicados NI (bloqueados en modo estricto).")
    if empty_pid: print(f"[AVISO] {empty_pid:,} filas sin pair_id.")
    if mismatch_backmap:
        print(f"[ALERTA] {mismatch_backmap:,} asimetrías ES↔NI (misma NI apunta a otro ES).")

    sam_html = ""
    if mismatch_samples:
        sam_rows = "".join(
            f"<li><code>{escape(es)}</code> → <code>{escape(ni)}</code> → <code>{escape(es2)}</code></li>"
            for es,ni,es2 in mismatch_samples 
        )
        sam_html = f"<details><summary>Muestras</summary><ul>{sam_rows}</ul></details>"

    ambN = sum(len(v) > 1 for v in AMBIG_NI.values())
    ambList = ", ".join(f"{k}→{sorted(list(v))[:3]}" for k,v in list(AMBIG_NI.items())[:5])

    BI_DIAG_HTML = f"""
    <div style="font-family:Georgia,serif">
      <b>Diagnóstico del CSV BI</b><br>
      Archivo: <b>{escape(CSV_BI)}</b><br>
      Filas base (CSV): <b>{rows:,}</b><br>
      ES únicas (tras expansiones): <b>{es_unique:,}</b> &nbsp;|&nbsp; NI únicas: <b>{ni_unique:,}</b> &nbsp;|&nbsp; pair_id únicos: <b>{pid_unique:,}</b><br>
      Duplicados ES: <b>{dup_es:,}</b> &nbsp;|&nbsp; Duplicados NI: <b>{dup_ni:,}</b> (bloqueados en estricto) &nbsp;|&nbsp; Sin pair_id: <b>{empty_pid:,}</b><br>
      Asimetrías ES↔NI: <b>{mismatch_backmap:,}</b>
      {sam_html}
      <hr style="border:0;border-top:1px solid #caa">
      <small>NI ambiguas bloqueadas: <b>{ambN:,}</b>{(' · ej.: ' + escape(ambList)) if ambN else ''}</small><br>
      <small>Regla: el motor usa <b>sólo</b> tablas 1:1; NI duplicadas se bloquean y se muestran como <code>[AMB-NI:...]</code>.</small>
    </div>
    """
    return rows > 0

print("Cargando léxico/pares (BI-estricto)…")
load_bi_strict_and_diagnose()

# ====== Utilidad n-grama (longest-match, BI-only) ======
def _longest_match(tokens, i, phrase_map):
    """Devuelve (span, surface) si hay frase que comience en i."""
    if not phrase_map: return (0, None)
    max_span = 0; surface = None
    # determinista: prioriza el span más largo
    for span in range(1, MAX_NGRAM+1):
        if i+span > len(tokens): break
        cand = " ".join(lower(t) for t in tokens[i:i+span])
        if cand in phrase_map:
            max_span = span
            surface = phrase_map[cand][0]
    return (max_span, surface)

# ====== Post-proceso ES (espacios + mayúsculas de oración) ======
def sentence_case_spanish(s: str) -> str:
    out = []
    start = True
    in_br = False  # dentro de [ ... ]
    WRAPS = "¿¡\"'«(“‘["

    for ch in s:
        if ch == '[':
            in_br = True

        if not in_br and start:
            if ch.isspace():
                out.append(ch)
            elif ch in WRAPS:
                out.append(ch)
            elif ch.isalpha():
                out.append(ch.upper()); start = False
            else:
                out.append(ch)
                start = ch in "¿¡"
        else:
            out.append(ch)
            if not in_br and ch in ".?!…":
                start = True
            elif not in_br and ch in "¿¡":
                start = True

        if ch == ']':
            in_br = False

    return "".join(out)

# ✦ FIX: no re-espaciar horas/decimales y no añadir espacios tras “:”/“,”
def postprocess_spanish(s: str) -> str:
    # 1) compactar horas y decimales
    s = re.sub(r"(\d)\s*:\s*(\d)", r"\1:\2", s)             # 18:30
    s = re.sub(r"(\d)\s*([.,])\s*(\d)", r"\1\2\3", s)       # 12,65 / 3.1415
    # 2) espacios y signos
    s = re.sub(r"\s+([,.;:!?])", r"\1", s)                  # nada antes de signos
    # añadir espacio SOLO tras . ! ? ;   (NO tras coma/“:”)
    s = re.sub(r"([?.!;])(?!\s|$)([^\s])", r"\1 \2", s)
    # 3) signos invertidos
    s = re.sub(r"([¿¡])\s+", r"\1", s)
    # 4) colapsar espacios
    s = re.sub(r"\s{2,}", " ", s).strip()
    # 5) mayúscula inicial de oración
    return sentence_case_spanish(s)

# ====== Traducción BI estricta ======
def translate_es_to_ni_bi(text:str):
    toks = simple_tokenize(text)

    out=[]; ib_toks=[]
    i=0
    while i < len(toks):
        t = toks[i]
        if t in VISIBLE_PUNCT:
            out.append(t); ib_toks.append(t); i+=1; continue
        if is_placeholder(t):
            out.append(t); ib_toks.append(t); i+=1; continue
        span, ni_surface = _longest_match(toks, i, ESPHRASE2NI)
        if span > 1:
            out.append(ni_surface)
            ib_toks.append(georgeos_keys(tokens_from_latin(ni_surface), ni_surface))
            i += span; continue
        key = lower(t)
        if key in ES2NI:
            ni = ES2NI[key][0]
            out.append(ni)
            ib_toks.append(georgeos_keys(tokens_from_latin(ni), ni))
        elif is_number(key):
            out.append(t); ib_toks.append(t)
        else:
            ph = f"[SIN-LEX:{t}]"
            out.append(ph); ib_toks.append(ph)
        i += 1

    if MODAL_SUFFIX_ENABLE:
        out = add_modal_suffixes_es2ni(out)
        ib_toks = []
        for tt in out:
            if tt in VISIBLE_PUNCT or tt.startswith("["):
                ib_toks.append(tt)
            else:
                ib_toks.append(georgeos_keys(tokens_from_latin(tt), tt))

    ni_text = detokenize(out)
    ib_html = "<div class='ib-line'>" + escape(render_ib_with_tridots(ib_toks)) + "</div>"
    return ni_text, ib_html

def translate_ni_to_es_bi(text:str):
    toks = simple_tokenize(text)

    if MODAL_SUFFIX_ENABLE:
        toks = strip_modal_suffixes_ni(toks)

    out=[]
    i=0
    while i < len(toks):
        t = toks[i]
        if t in VISIBLE_PUNCT:
            out.append(t); i+=1; continue
        if is_placeholder(t):
            out.append(t); i+=1; continue
        span, es_surface = _longest_match(toks, i, NIPHRASE2ES)
        if span > 1:
            out.append(es_surface); i += span; continue

        key = lower(t)
        if key in NI2ES:
            es = NI2ES[key][0] or ""
            out.append(es if es else f"[?:{t}]")
        elif key in AMBIG_NI and STRICT_BI_ENFORCE:
            # ★ determinista: no elegimos al azar superficies NI con colisión
            out.append(f"[AMB-NI:{t}]")
        elif is_number(key):
            out.append(t)
        else:
            out.append(f"[?:{t}]")
        i += 1

    if MODAL_SUFFIX_ENABLE:
        out = add_inverted_openers(out)

    es_text = detokenize(out)
    es_text = postprocess_spanish(es_text)
    return es_text

# ====== Diagnóstico ======
def diagnose_text(text, dir_label):
    if not text or not text.strip():
        return "<em>Introduce texto para diagnosticar.</em>"

    toks = simple_tokenize(text)
    unknown=set(); asym=set(); amb=set()
    total_tokens=0; covered=0

    if dir_label.startswith("ES"):
        head = "ES→NI"
        i=0
        while i < len(toks):
            t = toks[i]
            if t in VISIBLE_PUNCT or is_number(t):
                i+=1; continue
            total_tokens += 1
            span, _ = _longest_match(toks, i, ESPHRASE2NI)
            if span > 1:
                covered += 1; i += span; continue
            k=lower(t)
            if k not in ES2NI:
                unknown.add(t); i+=1; continue
            covered += 1
            ni = ES2NI[k][0]
            back = NI2ES.get(lower(ni))
            if back and lower(back[0]) != k:
                asym.add(f"{t} → {ni} → {back[0]}")
            i+=1
    else:
        head = "NI→ES"
        i=0
        while i < len(toks):
            t = toks[i]
            if t in VISIBLE_PUNCT or is_number(t):
                i+=1; continue
            total_tokens += 1
            span, _ = _longest_match(toks, i, NIPHRASE2ES)
            if span > 1:
                covered += 1; i += span; continue
            k=lower(t)
            if k in AMBIG_NI:
                amb.add(t); i+=1; continue
            if k not in NI2ES:
                unknown.add(t); i+=1; continue
            covered += 1
            es = NI2ES[k][0]
            back = ES2NI.get(lower(es))
            if back and lower(back[0]) != k:
                asym.add(f"{t} → {es} → {back[0]}")
            i+=1

    cov_pct = (covered/total_tokens*100) if total_tokens else 100.0
    cov_html = f"<div><b>Tokens (sin puntuación/numéricos):</b> {total_tokens} &nbsp;|&nbsp; <b>Cubiertos:</b> {covered} ({cov_pct:.1f}%)</div>"

    unk_html = "".join(f"<li><code>{escape(u)}</code></li>" for u in sorted(unknown, key=lambda x: lower(x))) or "<li><i>—</i></li>"
    amb_html = "".join(f"<li><code>{escape(a)}</code></li>" for a in sorted(amb, key=lambda x: lower(x))) or "<li><i>—</i></li>"
    asy_html = "".join(f"<li><code>{escape(a)}</code></li>" for a in sorted(asym)) or "<li><i>—</i></li>"

    return f"<b>Diagnóstico {head}</b>{cov_html}<b>Ambiguas (NI duplicada):</b><ul>{amb_html}</ul><b>Faltantes:</b><ul>{unk_html}</ul><b>Asimetrías:</b><ul>{asy_html}</ul>"

# ====== UI (CSS / acordeones / fuentes) ======
LABELS={
    "ES":{
        "title":"Traductor Español ↔ Neoíbero",
        "subtitle":"CSV estricto (BI-only 1:1; sin heurísticas; .gz) — determinista",
        "in_label_es":"✏️ Entrada (Español)",
        "in_label_ni":"✏️ Entrada (Neoíbero)",
        "in_ph_es":"Escribe aquí. Ej.: Veo a Ana y doy pan a Marta.",
        "in_ph_ni":"Idatzi hemen. Adib.: nuker-ke ni etxe-ka.",
        "out_lat_esni":"📜 Salida: Neoíbero (latín)",
        "out_lat_nies":"📜 Salida: Español",
        "out_ib":"🗿 Línea ibérica",
        "out_audio":"🔊 Locución (Audio)",
        "btn":"🔄 Traducir",
        "combo":"🌍 Idioma (UI + explicación)",
        "dir":"🔁 Dirección",
        "dir_opts":["ES → NI","NI → ES"],
        "doc_header":"📚 Documentación y Referencia",
        "acc_titles":[
            "🎓 Marco académico y decisiones del neoíbero",
            "🏛️ Herencia posible del íbero histórico",
            "🎨 Diseño de la conlang (neoíbero)",
            "⚙️ Pipeline del traductor (BI-estricto 1:1)",
            "🔤 Ortografía, línea ibérica y claves",
            "❓/❗ Modalidad vascoide (-na / -ba)",
            "🧩 Expansiones por CSV: plurales (S) y 3pl (3/V3)",
            "📖 Gramática de referencia (v1.2)",
            "📚 Bibliografía de base",
            "🧾 Siglas y glosario",
            "🔗 Simetría por pair_id (modo bilingüe)"
        ]
    },
    "EN":{
        "title":"Spanish ↔ Neo-Iberian Translator",
        "subtitle":"Strict BI-only (1:1 surfaces; no heuristics; .gz) — deterministic",
        "in_label_es":"✏️ Input (Spanish)",
        "in_label_ni":"✏️ Input (Neo-Iberian)",
        "in_ph_es":"Type here. E.g., Veo a Ana y doy pan a Marta.",
        "in_ph_ni":"Type here. E.g., nuker-ke ni etxe-ka.",
        "out_lat_esni":"📜 Output: Neo-Iberian (Latin)",
        "out_lat_nies":"📜 Output: Spanish",
        "out_ib":"🗿 Iberian line",
        "out_audio":"🔊 Speech (Audio)",
        "btn":"🔄 Translate",
        "combo":"🌍 Language (UI + docs)",
        "dir":"🔁 Direction",
        "dir_opts":["ES → NI","NI → ES"],
        "doc_header":"📚 Documentation & Reference",
        "acc_titles":[
            "🎓 Background & design choices",
            "🏛️ Possible inheritance from ancient Iberian",
            "🎨 Conlang design (Neo-Iberian)",
            "⚙️ Translator pipeline (strict 1:1)",
            "🔤 Orthography, Iberian line & keys",
            "❓/❗ Vascoid modality (-na / -ba)",
            "🧩 CSV-driven expansions: plurals (S) & 3pl (3/V3)",
            "📖 Reference grammar (v1.2)",
            "📚 Core references",
            "🧾 Acronyms & glossary",
            "🔗 Pair-id symmetry (bilingual mode)"
        ]
    }
}
DOC = {
    "ES":[
        "**Escritura y datos.** Un **único CSV con `pair_id`** y superficies exactas. La traducción ES↔NI es **1:1** por superficie.",
        "**Herencia plausible del íbero.** Fonotaxis CV(C); p→b; r/ŕ; casos -k/-te/-ka/-ar/-en/-i.",
        "**Diseño del neoíbero.** TAM: PRS -ke, PST -bo, FUT -ta, IPFV -ri, IMP -tu, COND/SBJV -ni, FUT_SBJV -ra.",
        "**Pipeline (BI-estricto 1:1).** Tokeniza; sustitución exacta; NI ambigua **se bloquea** y sale como `[AMB-NI:…]`.",
        "**Ortografía y línea ibérica.** Tokens BA/BE/…; tridots '/'; p→b; codas N/S/Ś/R/Ŕ/L/M/K/T.",
        "**Modalidad (-na/-ba).** ES→NI puede omitir ¿?¡! (si está activo). NI→ES inserta `¿…?`/`¡…!` al final de la oración marcada, **no en comas**.",
        "**Expansiones por CSV (deterministas).** `flags=S` plural regular; `flags=3|V3` 3ª plural del presente. Solo si lo marcas.",
        "**Gramática mínima.** Visualización; la gramática no se “calcula”.",
        "**Bibliografía.** Untermann; de Hoz; Ferrer i Jané; Correa…",
        "**Glosario & datasets.** Faltas → `[SIN-LEX:…]` / `[?:…]`. Ambiguas → `[AMB-NI:…]` (limpia tu CSV).",
        "**Simetría por pair_id.** El diagnóstico avisa si una NI apunta a dos ES distintos."
    ],
    "EN":[
        "One bilingual CSV with `pair_id` and exact surfaces. ES↔NI is strictly 1:1.",
        "Possible inheritance (non-palaeographic).",
        "Neo-Iberian design (phonology & morphology).",
        "Pipeline: tokenise → exact replacement. Ambiguous NI are **blocked** and rendered as `[AMB-NI:…]`.",
        "Orthography, Iberian line & keys.",
        "Modality (-na/-ba): ES→NI can drop ¿?¡!. NI→ES places them at sentence end, not at commas.",
        "CSV-driven expansions (deterministic): `S` plural; `3|V3` present 3pl.",
        "Minimal grammar (v1.2).",
        "Selected references.",
        "Glossary & datasets.",
        "Pair-id symmetry diagnostics."
    ]
}

# ====== CSS + fuente ======
def build_css():
    b64=None
    if os.path.exists("Iberia-Georgeos.ttf"):
        with open("Iberia-Georgeos.ttf","rb") as f:
            b64=base64.b64encode(f.read()).decode("ascii")
    font_src = f"url(data:font/ttf;base64,{b64}) format('truetype')" if b64 else "local('sans-serif')"
    return f"""
@font-face {{
  font-family: 'IberiaGeorgeos';
  src: {font_src};
  font-weight: normal; font-style: normal;
}}
:root {{
  --iberian-clay:#8B4513; --iberian-ochre:#CC7722; --iberian-stone:#5C5C5C;
  --iberian-sand:#D2B48C; --iberian-rust:#A0522D; --iberian-bronze:#CD7F32;
}}
.gradio-container {{ background:linear-gradient(135deg,#f4e8d8 0%,#e8d5c4 50%,#d4c4b0 100%)!important;
  font-family:'Georgia','Times New Roman',serif!important; }}
.gradio-container h1, .gradio-container h2, .gradio-container h3 {{
  color:var(--iberian-clay)!important; text-shadow:2px 2px 4px rgba(139,69,19,.15)!important;
  border-bottom:3px solid var(--iberian-bronze)!important; padding-bottom:.5rem!important; letter-spacing:.5px!important;
}}
.gradio-container .gr-group {{ background:linear-gradient(to bottom,#f9f6f0,#ede6dc)!important;
  border:2px solid var(--iberian-sand)!important; border-radius:8px!important; box-shadow:0 4px 12px rgba(139,69,19,.2), inset 0 1px 0 rgba(255,255,255,.5)!important;
  padding:1.5rem!important; margin-bottom:1.5rem!important; }}
.gradio-container .gr-accordion {{ background:linear-gradient(145deg,#ebe3d5,#d9cec0)!important;
  border:2px solid var(--iberian-rust)!important; border-radius:6px!important; margin-bottom:.8rem!important; box-shadow:2px 2px 6px rgba(0,0,0,.15)!important; }}
.gradio-container .gr-accordion .label-wrap {{ background:linear-gradient(to right,var(--iberian-ochre),var(--iberian-rust))!important;
  color:#fff!important; font-weight:600!important; padding:.8rem 1rem!important; border-radius:4px!important; text-shadow:1px 1px 2px rgba(0,0,0,.3)!important; }}
.gradio-container .gr-textbox textarea, .gradio-container .gr-textbox input {{ background:linear-gradient(to bottom,#faf8f3,#f5f0e8)!important;
  border:2px solid var(--iberian-sand)!important; border-radius:6px!important; color:var(--iberian-stone)!important;
  font-family:'Georgia',serif!important; box-shadow:inset 2px 2px 4px rgba(139,69,19,.1)!important; }}
.gradio-container .gr-textbox textarea:focus, .gradio-container .gr-textbox input:focus {{
  border-color:var(--iberian-bronze)!important; box-shadow:inset 2px 2px 4px rgba(139,69,19,.1), 0 0 8px rgba(205,127,50,.3)!important; }}
.gradio-container .gr-button.gr-button-primary {{ background:linear-gradient(145deg,var(--iberian-bronze),var(--iberian-rust))!important;
  border:2px solid var(--iberian-clay)!important; color:#fff!important; font-weight:bold!important; text-shadow:1px 2px 2px rgba(0,0,0,.4)!important;
  box-shadow:0 4px 8px rgba(139,69,19,.3), inset 0 1px 0 rgba(255,255,255,.2)!important; border-radius:8px!important; padding:.8rem 1.5rem!important; transition:all .3s ease!important; }}
.gradio-container .gr-button.gr-button-primary:hover {{ background:linear-gradient(145deg,var(--iberian-rust),var(--iberian-bronze))!important;
  transform:translateY(-2px)!important; box-shadow:0 6px 12px rgba(139,69,19,.4)!important; }}
.ib-line {{ font-family:'IberiaGeorgeos',monospace,sans-serif!important; font-size:1.9rem!important; line-height:2.4rem!important; white-space:pre-wrap!important;
  background:linear-gradient(135deg,#e8dcc8 0%,#d4c4a8 50%,#c4b098 100%)!important; padding:24px!important; border-radius:10px!important;
  border:3px solid var(--iberian-rust)!important; border-left:6px solid var(--iberian-bronze)!important;
  box-shadow:0 4px 15px rgba(139,69,19,.25), inset 0 2px 4px rgba(0,0,0,.1)!important; color:var(--iberian-clay)!important; position:relative!important; }}
.ib-line::before {{ content:''!important; position:absolute!important; inset:0!important;
  background-image:repeating-linear-gradient(0deg,transparent,transparent 2px, rgba(139,69,19,.03) 2px, rgba(139,69,19,.03) 4px)!important;
  pointer-events:none!important; border-radius:10px!important; }}
@media (max-width:768px) {{
  .ib-line {{ font-size:1.5rem!important; line-height:2rem!important; padding:16px!important; }}
  .gradio-container .gr-group {{ padding:1rem!important; }}
  .gradio-container h1 {{ font-size:1.8rem!important; }}
}}
@media (max-width:480px) {{
  .ib-line {{ font-size:1.3rem!important; line-height:1.8rem!important; padding:12px!important; }}
  .gradio-container h1 {{ font-size:1.5rem!important; }}
}}
"""
CSS = build_css()

with gr.Blocks(css=CSS, theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple")) as demo:
    with gr.Group():
        title = gr.Markdown(f"# {LABELS['ES']['title']}")
        subtitle = gr.Markdown(f"*{LABELS['ES']['subtitle']}*")
    with gr.Row():
        combo = gr.Dropdown(choices=["ES","EN"], value="ES", label=LABELS["ES"]["combo"])
        direction = gr.Radio(choices=LABELS["ES"]["dir_opts"], value="ES → NI", label=LABELS["ES"]["dir"])
    with gr.Group():
        doc_header = gr.Markdown(f"## {LABELS['ES']['doc_header']}")
        acc_titles = LABELS["ES"]["acc_titles"]
        with gr.Accordion(acc_titles[0], open=False) as acc1:  md1  = gr.Markdown(DOC["ES"][0])
        with gr.Accordion(acc_titles[1], open=False) as acc2:  md2  = gr.Markdown(DOC["ES"][1])
        with gr.Accordion(acc_titles[2], open=False) as acc3:  md3  = gr.Markdown(DOC["ES"][2])
        with gr.Accordion(acc_titles[3], open=False) as acc4:  md4  = gr.Markdown(DOC["ES"][3])
        with gr.Accordion(acc_titles[4], open=False) as acc5:  md5  = gr.Markdown(DOC["ES"][4])
        with gr.Accordion(acc_titles[5], open=False) as acc6:  md6  = gr.Markdown(DOC["ES"][5])
        with gr.Accordion(acc_titles[6], open=False) as acc7:  md7  = gr.Markdown(DOC["ES"][6])
        with gr.Accordion(acc_titles[7], open=False) as acc8:  md8  = gr.Markdown(DOC["ES"][7])
        with gr.Accordion(acc_titles[8], open=False) as acc9:  md9  = gr.Markdown(DOC["ES"][8])
        with gr.Accordion(acc_titles[9], open=False) as acc10: md10 = gr.Markdown(DOC["ES"][9])
        with gr.Accordion(acc_titles[10], open=False) as acc11: md11 = gr.Markdown(DOC["ES"][10])
        with gr.Accordion("🧪 Diagnóstico del CSV BI (al cargar)", open=False):
            bi_diag_box = gr.HTML(value=BI_DIAG_HTML)

    with gr.Group():
        es_in = gr.Textbox(label=LABELS["ES"]["in_label_es"], placeholder=LABELS["ES"]["in_ph_es"], lines=5)
        with gr.Row():
            btn_tr   = gr.Button(LABELS["ES"]["btn"], variant="primary")
            btn_diag = gr.Button("🔎 Diagnosticar BI con este texto", variant="secondary")
        with gr.Row():
            with gr.Column(scale=2):
                ni_out = gr.Textbox(label=LABELS["ES"]["out_lat_esni"], lines=5, interactive=False)
                loc_btn = gr.Button("🔊 Locutar", variant="secondary", visible=True)
                audio_out = gr.Audio(label=LABELS["ES"]["out_audio"], type="numpy")
            with gr.Column(scale=1):
                ib_out = gr.HTML(label=LABELS["ES"]["out_ib"])
                diag_out = gr.HTML(value="")

    def do_translate(text, dir_label):
        if not text or not text.strip():
            return (gr.update(value=""),
                    gr.update(value="<div class='ib-line'></div>"),
                    gr.update(visible=False),
                    gr.update(value=None),
                    gr.update(value=""))
        if dir_label.startswith("ES"):
            latin, ib = translate_es_to_ni_bi(text)
            return (gr.update(label=LABELS["ES"]["out_lat_esni"], value=latin),
                    gr.update(value=ib),
                    gr.update(visible=True),
                    gr.update(value=None),
                    gr.update(value=""))
        else:
            es_text = translate_ni_to_es_bi(text)
            return (gr.update(label=LABELS["ES"]["out_lat_nies"], value=es_text),
                    gr.update(value="<div class='ib-line'></div>"),
                    gr.update(visible=False),
                    gr.update(value=None),
                    gr.update(value=""))

    btn_tr.click(do_translate, [es_in, direction], [ni_out, ib_out, loc_btn, audio_out, diag_out])

    def run_locution(latin_text, dir_label):
        if dir_label.startswith("ES"):
            return synthesize_speech(latin_text)
        return None
    loc_btn.click(run_locution, [ni_out, direction], audio_out)

    def do_diagnose(text, dir_label):
        return gr.update(value=diagnose_text(text, dir_label))
    btn_diag.click(do_diagnose, [es_in, direction], [diag_out])

    def switch_lang(sel_lang, dir_label):
        L=LABELS[sel_lang]; T=L["acc_titles"]; D=DOC[sel_lang]
        in_label = L["in_label_es"] if dir_label.startswith("ES") else L["in_label_ni"]
        in_ph    = L["in_ph_es"]    if dir_label.startswith("ES") else L["in_ph_ni"]
        out_lab  = L["out_lat_esni"] if dir_label.startswith("ES") else L["out_lat_nies"]
        return (
            gr.update(value=f"# {L['title']}"),
            gr.update(value=f"*{L['subtitle']}*"),
            gr.update(label=L["combo"], value=sel_lang),
            gr.update(label=L["dir"], choices=L["dir_opts"], value=dir_label),
            gr.update(value=f"## {L['doc_header']}"),
            gr.update(label=T[0]), gr.update(value=D[0]),
            gr.update(label=T[1]), gr.update(value=D[1]),
            gr.update(label=T[2]), gr.update(value=D[2]),
            gr.update(label=T[3]), gr.update(value=D[3]),
            gr.update(label=T[4]), gr.update(value=D[4]),
            gr.update(label=T[5]), gr.update(value=D[5]),
            gr.update(label=T[6]), gr.update(value=D[6]),
            gr.update(label=T[7]), gr.update(value=D[7]),
            gr.update(label=T[8]), gr.update(value=D[8]),
            gr.update(label=T[9]), gr.update(value=D[9]),
            gr.update(label=T[10]), gr.update(value=D[10]),
            gr.update(label=in_label, placeholder=in_ph),
            gr.update(label=out_lab),
            gr.update(label=L["out_ib"]),
            gr.update(label=L["out_audio"]),
            gr.update(value=L["btn"])
        )
    combo.change(
        switch_lang,
        [combo, direction],
        [title, subtitle, combo, direction, doc_header,
         acc1, md1, acc2, md2, acc3, md3, acc4, md4, acc5, md5, acc6, md6, acc7, md7, acc8, md8, acc9, md9, acc10, md10, acc11, md11,
         es_in, ni_out, ib_out, audio_out, btn_tr]
    )

    def switch_direction(dir_label, sel_lang):
        L=LABELS[sel_lang]
        in_label = L["in_label_es"] if dir_label.startswith("ES") else L["in_label_ni"]
        in_ph    = L["in_ph_es"]    if dir_label.startswith("ES") else L["in_ph_ni"]
        out_lab  = L["out_lat_esni"] if dir_label.startswith("ES") else L["out_lat_nies"]
        loc_vis  = True if dir_label.startswith("ES") else False
        return (gr.update(label=in_label, placeholder=in_ph),
                gr.update(label=out_lab, value=""),
                gr.update(value="<div class='ib-line'></div>"),
                gr.update(visible=loc_vis),
                gr.update(value=None),
                gr.update(value=""))
    direction.change(
        switch_direction,
        [direction, combo],
        [es_in, ni_out, ib_out, loc_btn, audio_out, diag_out]
    )

# ====== smoke opcional ======
def _symmetry_smoketest():
    print("\n[SMOKE] Prueba ES↔NI (BI-estricto, determinista)…")
    probes = [
        "nuker-ke ni etxe-ka ?",
        "¿Pagaste 12,75 en la cafetería?",
        "Marta llega a las 18:30.",
        "[SIN-LEX:Tomás]-na euŕak-ke !"
    ]
    for p in probes:
        es_from_ni = translate_ni_to_es_bi(p)
        ni_round, _ = translate_es_to_ni_bi(es_from_ni)
        print("  IN:", p)
        print("  ES:", es_from_ni)
        print("  NI:", ni_round)
        print("---")

if DEBUG_MODE:
    _symmetry_smoketest()

if __name__ == "__main__":
    demo.queue().launch()