Spaces:

LoloSemper
/

Spanish_NeoIberian_TranslatorInversionUltimate

Sleeping

App Files Files Community

Spanish_NeoIberian_TranslatorInversionUltimate / app.py

LoloSemper

Update app.py

6d901fa verified 4 months ago

raw

history blame contribute delete

44.5 kB

	# app.py — Traductor Español ↔ Neoíbero (BI-ONLY 1:1 estricto, determinista)
	# UI completa + CSS “íbero” + TTS + Línea ibérica (codificación appOld)
	# Requiere un ÚNICO CSV con superficies exactas (UTF-8) y columnas:
	# - source_es (o es/es_surface)
	# - target_ni (o ni/ni_surface)
	# - pair_id (opcional)
	#
	# El motor NO hace heurísticas ni morfología: 1:1 exacto por superficie.
	# Puntuación y números pasan tal cual. Desconocidos -> [SIN-LEX:...] / [?:...]
	# Determinismo NI→ES: entradas NI duplicadas (ambigüas) quedan bloqueadas y se rinden como [AMB-NI:...]

	import gradio as gr
	import os, csv, re, base64, unicodedata, gzip
	import torch
	from transformers import AutoProcessor, VitsModel
	import numpy as np
	from html import escape

	# ====== cache ======
	os.environ['TRANSFORMERS_CACHE'] = os.environ.get('TRANSFORMERS_CACHE', '/tmp/cache')
	os.environ['HF_HOME'] = os.environ.get('HF_HOME', '/tmp/hf')

	DEBUG_MODE = False
	def debug_print(msg):
	if DEBUG_MODE: print(f"[DEBUG] {msg}")

	# ====== util ======
	def _open_maybe_gzip(path):
	if str(path).endswith(".gz"):
	# CSV debe venir en UTF-8 (evita mojibake)
	return gzip.open(path, "rt", encoding="utf-8", newline="")
	return open(path, "r", encoding="utf-8", newline="")

	def norm(x): return (str(x).strip()) if x is not None else ""
	def lower(x): return norm(x).lower()
	def fold(s:str)->str:
	return ''.join(c for c in unicodedata.normalize('NFD', s or "") if unicodedata.category(c)!="Mn")

	# ====== rutas ======
	def _cand(*names):
	for n in names:
	if os.path.exists(n): return n
	p = os.path.join("salida", n)
	if os.path.exists(p): return p
	return names[0] # último recurso para mensajes

	# Prioriza los “master/surface-ready”; luego retrocompatibles
	CSV_BI = _cand(
	"LEXICON_UNICO_1a1.csv.gz",
	"MASTER_SURFACE_READY.csv.gz",
	"MASTER_REEXTENDED.csv.gz",
	"BI_SURFACE_READY.csv.gz",
	"HF_Pairs_BI_REEXTENDED.csv.gz",
	"HF_Pairs_BI_EXPANDED1_EXTENDED_FILLED.csv.gz",
	"HF_Pairs_BI_EXPANDED1.csv.gz"
	)

	# ====== estructuras strict BI ======
	# Clave = superficie exacta en minúsculas. Valor = (superficie_original_opuesta, pair_id)
	ES2NI = {} # es_surface_lower -> (ni_surface, pair_id)
	NI2ES = {} # ni_surface_lower -> (es_surface, pair_id)

	# N-gramas/frases:
	ESPHRASE2NI = {} # "el saco" -> (ni_surface, pair_id)
	NIPHRASE2ES = {} # "…-ke ni etxe-ka" -> (es_surface, pair_id)
	MAX_NGRAM = 3

	# ====== signos / tokenización mínima ======
	VISIBLE_PUNCT = set(list(",.;:!?¡¿…()[]{}\"'«»—–“”‘’"))
	_num_re = re.compile(r"^\d+([.,]\d+)?$")
	def is_number(tok:str)->bool: return bool(_num_re.fullmatch(tok or ""))

	# --- separadores de cláusula + placeholders atómicos ---
	CLAUSE_BREAKS = {",", ";", "—", "–", ":"}
	PLACEHOLDER_RE = re.compile(r"^\[[^\]]+\]$")
	def is_placeholder(tok: str) -> bool:
	return bool(PLACEHOLDER_RE.match(tok or ""))

	def _restore_brk(tok, protected):
	m = re.fullmatch(r"__BRK(\d+)__(?:-(na\|ba))?", tok or "")
	if not m: return tok
	idx = int(m.group(1))
	suf = m.group(2)
	base = protected[idx] if 0 <= idx < len(protected) else tok
	return base + (f"-{suf}" if suf else "")

	def simple_tokenize(text:str):
	"""Tokenización mínima, sin romper [ ... ] ni [ ... ]-na/-ba."""
	if not text:
	return []
	protected = []
	def _repl(m):
	key = f"__BRK{len(protected)}__"
	protected.append(m.group(0))
	return key
	t = re.sub(r"\[[^\]]*\]", _repl, (text or "").strip())
	t = re.sub(r"\s+"," ", t)
	t = re.sub(r"([,.;:!?¡¿…()\[\]{}\"'«»—–“”‘’])", r" \1 ", t)
	toks = [tok for tok in t.split() if tok]
	for i, tok in enumerate(toks):
	if tok.startswith("__BRK") and "__" in tok:
	toks[i] = _restore_brk(tok, protected)
	return toks

	def detokenize(tokens):
	s = " ".join(tokens)
	s = re.sub(r"\s+([,.;:!?])", r"\1", s)
	s = re.sub(r"([¿¡])\s+", r"\1", s)
	s = re.sub(r"\(\s+", "(", s)
	s = re.sub(r"\s+\)", ")", s)
	s = re.sub(r"\s{2,}", " ", s).strip()
	return s

	# ====== Modalidad vascoide (-na / -ba) ======
	MODAL_SUFFIX_ENABLE = True
	MODAL_ONLY_ON_FINITE = True
	MODAL_STRIP_QE_IN_NI = True

	SENT_END = {".", "!", "?", "…"}
	OPEN_FOR = {"?": "¿", "!": "¡"}
	WRAP_PREFIX = set(list("«“‘([{\"'"))
	PERS_ENDINGS = ("-n","-zu","-gu","-zuk","-zuek","-k")
	TAM_FINITE = ("-ke","-bo","-ta","-ni","-tu")

	def looks_like_finite_ni(tok:str)->bool:
	t = (tok or "").lower()
	if not t or t.startswith("["): return False
	base = re.sub(r"-(na\|ba)$","", t)
	for tam in TAM_FINITE:
	if base.endswith(tam) or any(base.endswith(tam+pe) for pe in PERS_ENDINGS):
	return True
	return False

	def last_content_index(tokens, start, end_exclusive):
	i = end_exclusive - 1
	while i >= start and tokens[i] in VISIBLE_PUNCT:
	i -= 1
	return i if i >= start else -1

	def strip_qe_punct(tokens):
	return [t for t in tokens if t not in ("¿","?","¡","!")]

	# --- helpers numéricos para no cortar decimales/horas ---
	def _is_numeric_comma(tokens, i):
	return (0 < i < len(tokens)-1 and tokens[i] == "," and
	is_number(tokens[i-1]) and is_number(tokens[i+1]))

	def _is_time_colon(tokens, i):
	return (0 < i < len(tokens)-1 and tokens[i] == ":" and
	is_number(tokens[i-1]) and is_number(tokens[i+1]))

	def _is_true_clause_break(tokens, i):
	if tokens[i] not in CLAUSE_BREAKS: return False
	if _is_numeric_comma(tokens, i): return False
	if _is_time_colon(tokens, i): return False
	return True

	def add_modal_suffixes_es2ni(tokens):
	"""Añade -na/-ba al último verbo finito (o último constituyente) por oración."""
	if not MODAL_SUFFIX_ENABLE:
	return tokens
	out = tokens[:]
	n = len(out)
	i = 0
	sent_start = 0
	while i < n:
	if out[i] in ("?", "!"):
	closer = out[i]
	target = -1
	j = i - 1
	while j >= sent_start:
	if out[j] not in VISIBLE_PUNCT and (not MODAL_ONLY_ON_FINITE or looks_like_finite_ni(out[j])):
	target = j; break
	j -= 1
	if target == -1:
	target = last_content_index(out, sent_start, i)
	if target != -1:
	suf = "na" if closer == "?" else "ba"
	if not re.search(rf"-(?:{suf})$", out[target].lower()):
	out[target] = out[target] + "-" + suf
	sent_start = i + 1
	elif out[i] in SENT_END:
	sent_start = i + 1
	i += 1
	if MODAL_STRIP_QE_IN_NI:
	out = strip_qe_punct(out)
	return out

	def strip_modal_suffixes_ni(tokens):
	"""
	Interpreta -na/-ba como modalidad; ahora SOLO cerramos al final de oración.
	(No cerramos en comas/“:”, salvo que ya haya ?/! explícitos.)
	"""
	if not MODAL_SUFFIX_ENABLE:
	return tokens

	out = []
	buf = []
	pending_end = None
	mode = None # "?" / "!"

	def _emit(end_override=None, also_append=None):
	nonlocal buf, mode, pending_end, out
	local = [t for t in buf if t not in ("¿","?","¡","!")]
	if local:
	end_tok = end_override or ("?" if mode == "?" else "!" if mode == "!" else pending_end or ".")
	out.extend(local)
	out.append(end_tok)
	buf.clear(); mode = None; pending_end = None
	if also_append:
	out.append(also_append)

	toks = tokens + ["."]
	for i, t in enumerate(toks):
	if t in ("¿", "¡"):
	_emit(); mode = "?" if t == "¿" else "!"
	continue
	if t in ("?", "!"):
	pending_end = t; _emit(); continue
	if t in SENT_END:
	pending_end = t; _emit(); continue

	# ✦ MODALIDAD: en separadores de cláusula NO cerramos todavía:
	if t in CLAUSE_BREAKS and mode in ("?","!"):
	buf.append(t)
	continue

	m = re.search(r"-(na\|ba)$", (t or "").lower())
	if m:
	if mode and buf: _emit()
	mode = "?" if m.group(1) == "na" else "!"
	t = t[:-len(m.group(0))]

	if t:
	buf.append(t)

	if len(out) >= 2 and out[-1] == "." and out[-2] == ".": out.pop()
	return out

	def add_inverted_openers(tokens):
	"""Inserta ¿/¡ al inicio de cada tramo que acaba en ?/!, ignorando comas/“:” numéricos."""
	out = tokens[:]
	START_BREAKS = SENT_END \| CLAUSE_BREAKS
	def _is_true_start_break(idx):
	if out[idx] in SENT_END: return True
	if out[idx] in CLAUSE_BREAKS: return _is_true_clause_break(out, idx)
	return False

	i = 0
	while i < len(out):
	if out[i] in ("?", "!"):
	closer = out[i]; opener = OPEN_FOR[closer]
	j = i - 1
	while j >= 0 and not _is_true_start_break(j):
	j -= 1
	start = j + 1
	k = start
	while k < i and out[k] in WRAP_PREFIX:
	k += 1
	if not (k < len(out) and out[k] == opener):
	out.insert(k, opener); i += 1
	i += 1
	return out

	# ====== EXPANSIONES (deterministas, sólo ES→NI) ======
	EXPANSION_ENABLE = True
	FLAG_COLNAMES = ("flags","FLAGS","expand","EXPAND","tags","TAGS","morph","MORPH")
	FLAG_PLURAL = ("S",)
	FLAG_3PL = ("3","V3")

	VOWELS = "aeiouáéíóúüAEIOUÁÉÍÓÚÜ"

	def _has_flag(cell:str, wanted:tuple)->bool:
	c = (cell or "")
	return any(w in c for w in wanted)

	def _pluralize_es_form(s: str) -> str:
	if not s: return s
	sl = s.lower()
	if sl.endswith("z"):
	return s[:-1] + ("ces" if s[-1].islower() else "CES")
	if s[-1] not in VOWELS:
	return s + ("es" if s[-1].islower() else "ES")
	return s + ("s" if s[-1].islower() else "S")

	def _present_3pl_from_3sg(s: str) -> str:
	if not s: return s
	return s + ("n" if s[-1].islower() else "N")

	# ====== TTS (appOld) ======
	print("Cargando modelo de voz (opcional)…")
	device = "cuda" if torch.cuda.is_available() else "cpu"
	processor = model = None
	try:
	processor = AutoProcessor.from_pretrained("facebook/mms-tts-spa")
	model = VitsModel.from_pretrained("facebook/mms-tts-spa").to(device)
	print("Modelo de voz cargado.")
	except Exception as e:
	print(f"AVISO TTS: {e}")

	def add_reading_pauses(text: str, level:int=3) -> str:
	if level <= 1: return text
	t = re.sub(r",\s*", ", , ", text)
	t = re.sub(r"\.\s*", ". . ", text)
	return re.sub(r'\s+',' ',t).strip()

	def hispanize_for_tts(ni_text: str) -> str:
	text=(ni_text or "").lower()
	text=text.replace('ŕ','rr').replace('ś','s').replace('eś','es').replace('-', ' ')
	text=re.sub(r'\[.*?\]','',text); text=re.sub(r'\s+',' ',text).strip()
	return add_reading_pauses(text, 3)

	def synthesize_speech(text):
	if not text or not text.strip() or model is None or processor is None: return None
	try:
	inputs = processor(text=hispanize_for_tts(text), return_tensors="pt").to(device)
	with torch.no_grad(): output = model(**inputs).waveform
	speech_np = output.cpu().numpy().squeeze()
	mx = max(abs(speech_np.min()), abs(speech_np.max()))
	if mx>0: speech_np = speech_np/mx*0.9
	return (16000, speech_np.astype(np.float32))
	except Exception as e:
	print(f"Error TTS: {e}"); return None

	# ====== Línea ibérica (appOld) ======
	V = "aeiou"
	SYL_FOR = {
	"b":["‹BA›","‹BE›","‹BI›","‹BO›","‹BU›"],
	"d":["‹DA›","‹DE›","‹DI›","‹DO›","‹DU›"],
	"t":["‹TA›","‹TE›","‹TI›","‹TO›","‹TU›"],
	"g":["‹GA›","‹GE›","‹GI›","‹DO›","‹GU›"] if False else ["‹GA›","‹GE›","‹GI›","‹GO›","‹GU›"],
	"k":["‹KA›","‹KE›","‹KI›","‹KO›","‹KU›"]
	}
	ALPHA_FOR={"a":"‹A›","e":"‹E›","i":"‹I›","o":"‹O›","u":"‹U›","s":"‹S›","ś":"‹Ś›",
	"l":"‹L›","r":"‹R›","ŕ":"‹Ŕ›","n":"‹N›","m":"‹M›"}
	CODA_FOR={"":"","n":"‹N›","s":"‹S›","ś":"‹Ś›","r":"‹R›","ŕ":"‹Ŕ›","l":"‹L›","m":"‹M›","k":"‹K›","t":"‹T›"}

	def tokens_from_latin(ni:str)->str:
	out=[]; i=0; ni=(ni or "").lower()
	while i<len(ni):
	c=ni[i]
	if c=="p": c="b"
	if c=="-": out.append("—"); i+=1; continue
	if c in V:
	out.append(ALPHA_FOR.get(c, c.upper())); i+=1; continue
	if c in SYL_FOR and i+1<len(ni) and ni[i+1] in V:
	idx=V.index(ni[i+1]); tok=SYL_FOR[c][idx]
	coda=ni[i+2] if i+2<len(ni) else ""
	if coda in CODA_FOR and coda!="": tok+=CODA_FOR[coda]; i+=3
	else: i+=2
	out.append(tok); continue
	out.append(ALPHA_FOR.get(c, c.upper())); i+=1
	return "".join(out)

	KEYS_MODE = "compact"
	KEYS_OVERRIDE = {}

	def georgeos_keys(token_str:str, ni_plain:str)->str:
	low=(ni_plain or "").lower()
	if low in KEYS_OVERRIDE: return KEYS_OVERRIDE[low]
	m=re.findall(r"‹(.*?)›", token_str)
	out=[]
	for t in m:
	if KEYS_MODE == "compact":
	if len(t)==2 and t[0] in "BDTGK": out.append(t[0])
	elif t in ("A","E","I","O","U"): out.append(t)
	elif t=="Ś": out.append("X")
	elif t=="Ŕ": out.append("r")
	else: out.append(t[0].upper())
	else:
	if len(t)==2 and t[0] in "BDTGK": out.append(t)
	elif t=="Ś": out.append("X")
	elif t=="Ŕ": out.append("r")
	else: out.append(t)
	return "".join(out)

	TRIDOT = "/"
	def render_ib_with_tridots(ib_toks):
	res=[]; prev_word=False
	for tk in ib_toks:
	is_punct = tk in VISIBLE_PUNCT
	if is_punct:
	res.append(" "+tk+" "); prev_word=False
	else:
	if prev_word: res.append(" "+TRIDOT+" ")
	res.append(tk); prev_word=True
	return "".join(res).strip()

	# ====== BI loader + diagnóstico ======

	# ### ★ MODO ESTRICTO Y DETERMINISTA
	STRICT_BI_ENFORCE = True # si True, no se admite NI ambigua
	AMBIG_NI = {} # ni_lower -> set de ES conflictivos
	BI_DIAG_HTML = "<em>Sin CSV cargado.</em>"

	def load_bi_strict_and_diagnose():
	"""Carga el CSV, llena ES2NI/NI2ES y prepara un HTML de diagnóstico."""
	global BI_DIAG_HTML
	# vaciar estructuras antes de cargar (determinismo)
	ES2NI.clear(); NI2ES.clear(); ESPHRASE2NI.clear(); NIPHRASE2ES.clear()
	AMBIG_NI.clear()

	if not os.path.exists(CSV_BI):
	msg=f"[ERROR] No se encontró el CSV bilingüe: {CSV_BI}"
	print(msg); BI_DIAG_HTML=f"<b>Error:</b> {escape(msg)}"
	return False

	rows=0; dup_es=0; dup_ni=0; empty_pid=0
	mismatch_backmap = 0
	mismatch_samples = []
	pid_seen=set()

	print(f"Detectado CSV bilingüe: {CSV_BI}")
	try:
	with _open_maybe_gzip(CSV_BI) as f:
	rd = csv.DictReader(f)
	flds=set(rd.fieldnames or [])
	ES_COL = "source_es" if "source_es" in flds else "es_surface" if "es_surface" in flds else "es"
	NI_COL = "target_ni" if "target_ni" in flds else "ni_surface" if "ni_surface" in flds else "ni"
	IDCOL = "pair_id" if "pair_id" in flds else "id" if "id" in flds else None
	FLAGCOL = None
	for cand in FLAG_COLNAMES:
	if cand in flds:
	FLAGCOL = cand; break

	base_rows = []
	for r in rd:
	es_orig = (r.get(ES_COL) or "").strip()
	ni_orig = (r.get(NI_COL) or "").strip()
	if not (es_orig and ni_orig): continue
	pid = (r.get(IDCOL) or "").strip() if IDCOL else ""
	if not pid: empty_pid += 1
	else: pid_seen.add(pid)
	flags = (r.get(FLAGCOL) or "") if FLAGCOL else ""

	es = lower(es_orig)
	ni = lower(ni_orig)

	# Frases
	if " " in es:
	if es not in ESPHRASE2NI: # determinista: primera manda
	ESPHRASE2NI[es] = (ni_orig, pid)
	if " " in ni:
	if ni not in NIPHRASE2ES:
	NIPHRASE2ES[ni] = (es_orig, pid)

	# ES→NI (determinista: primera fila gana)
	if es in ES2NI:
	dup_es += 1
	else:
	ES2NI[es] = (ni_orig, pid)

	# NI→ES (determinista + bloqueo de ambigüedad)
	if ni in NI2ES:
	dup_ni += 1
	# registra ambigüedad
	s = AMBIG_NI.get(ni, set())
	s.add(NI2ES[ni][0]); s.add(es_orig)
	AMBIG_NI[ni] = s
	if STRICT_BI_ENFORCE:
	NI2ES.pop(ni, None) # invalida la superficie NI conflictiva
	else:
	if STRICT_BI_ENFORCE and ni in AMBIG_NI:
	# ya marcada ambigua: no insertar
	pass
	else:
	NI2ES[ni] = (es_orig, pid)

	base_rows.append((es_orig, ni_orig, pid, flags))
	rows += 1

	# Expansiones deterministas (solo añaden ES2NI; NO tocan NI2ES)
	if EXPANSION_ENABLE:
	for es_orig, ni_orig, pid, flags in base_rows:
	if not flags: continue
	if _has_flag(flags, FLAG_PLURAL):
	pl = _pluralize_es_form(es_orig)
	pl_key = lower(pl)
	if pl_key not in ES2NI:
	ES2NI[pl_key] = (ni_orig, pid)
	if _has_flag(flags, FLAG_3PL):
	p3 = _present_3pl_from_3sg(es_orig)
	p3_key = lower(p3)
	if p3_key not in ES2NI:
	ES2NI[p3_key] = (ni_orig, pid)

	# Diagnóstico asimetrías (no afecta determinismo)
	for es_low, (ni_surf, _) in ES2NI.items():
	ni_low = lower(ni_surf)
	back = NI2ES.get(ni_low)
	if back and lower(back[0]) != es_low:
	mismatch_backmap += 1
	if len(mismatch_samples) < 10:
	mismatch_samples.append((es_low, ni_low, lower(back[0])))

	except Exception as e:
	msg=f"[ERROR] Al leer {CSV_BI}: {e}"
	print(msg); BI_DIAG_HTML=f"<b>Error:</b> {escape(msg)}"
	return False

	es_unique = len(ES2NI)
	ni_unique = len(NI2ES)
	pid_unique = len(pid_seen)

	print(f"✓ BI-ONLY ESTRICTO cargado: {rows:,} filas.")
	if dup_es: print(f"[AVISO] {dup_es:,} duplicados ES (se usó la primera).")
	if dup_ni: print(f"[AVISO] {dup_ni:,} duplicados NI (bloqueados en modo estricto).")
	if empty_pid: print(f"[AVISO] {empty_pid:,} filas sin pair_id.")
	if mismatch_backmap:
	print(f"[ALERTA] {mismatch_backmap:,} asimetrías ES↔NI (misma NI apunta a otro ES).")

	sam_html = ""
	if mismatch_samples:
	sam_rows = "".join(
	f"<li><code>{escape(es)}</code> → <code>{escape(ni)}</code> → <code>{escape(es2)}</code></li>"
	for es,ni,es2 in mismatch_samples
	)
	sam_html = f"<details><summary>Muestras</summary><ul>{sam_rows}</ul></details>"

	ambN = sum(len(v) > 1 for v in AMBIG_NI.values())
	ambList = ", ".join(f"{k}→{sorted(list(v))[:3]}" for k,v in list(AMBIG_NI.items())[:5])

	BI_DIAG_HTML = f"""
	<div style="font-family:Georgia,serif">
	<b>Diagnóstico del CSV BI</b><br>
	Archivo: <b>{escape(CSV_BI)}</b><br>
	Filas base (CSV): <b>{rows:,}</b><br>
	ES únicas (tras expansiones): <b>{es_unique:,}</b>  \|  NI únicas: <b>{ni_unique:,}</b>  \|  pair_id únicos: <b>{pid_unique:,}</b><br>
	Duplicados ES: <b>{dup_es:,}</b>  \|  Duplicados NI: <b>{dup_ni:,}</b> (bloqueados en estricto)  \|  Sin pair_id: <b>{empty_pid:,}</b><br>
	Asimetrías ES↔NI: <b>{mismatch_backmap:,}</b>
	{sam_html}
	<hr style="border:0;border-top:1px solid #caa">
	<small>NI ambiguas bloqueadas: <b>{ambN:,}</b>{(' · ej.: ' + escape(ambList)) if ambN else ''}</small><br>
	<small>Regla: el motor usa <b>sólo</b> tablas 1:1; NI duplicadas se bloquean y se muestran como <code>[AMB-NI:...]</code>.</small>
	</div>
	"""
	return rows > 0

	print("Cargando léxico/pares (BI-estricto)…")
	load_bi_strict_and_diagnose()

	# ====== Utilidad n-grama (longest-match, BI-only) ======
	def _longest_match(tokens, i, phrase_map):
	"""Devuelve (span, surface) si hay frase que comience en i."""
	if not phrase_map: return (0, None)
	max_span = 0; surface = None
	# determinista: prioriza el span más largo
	for span in range(1, MAX_NGRAM+1):
	if i+span > len(tokens): break
	cand = " ".join(lower(t) for t in tokens[i:i+span])
	if cand in phrase_map:
	max_span = span
	surface = phrase_map[cand][0]
	return (max_span, surface)

	# ====== Post-proceso ES (espacios + mayúsculas de oración) ======
	def sentence_case_spanish(s: str) -> str:
	out = []
	start = True
	in_br = False # dentro de [ ... ]
	WRAPS = "¿¡\"'«(“‘["

	for ch in s:
	if ch == '[':
	in_br = True

	if not in_br and start:
	if ch.isspace():
	out.append(ch)
	elif ch in WRAPS:
	out.append(ch)
	elif ch.isalpha():
	out.append(ch.upper()); start = False
	else:
	out.append(ch)
	start = ch in "¿¡"
	else:
	out.append(ch)
	if not in_br and ch in ".?!…":
	start = True
	elif not in_br and ch in "¿¡":
	start = True

	if ch == ']':
	in_br = False

	return "".join(out)

	# ✦ FIX: no re-espaciar horas/decimales y no añadir espacios tras “:”/“,”
	def postprocess_spanish(s: str) -> str:
	# 1) compactar horas y decimales
	s = re.sub(r"(\d)\s:\s(\d)", r"\1:\2", s) # 18:30
	s = re.sub(r"(\d)\s([.,])\s(\d)", r"\1\2\3", s) # 12,65 / 3.1415
	# 2) espacios y signos
	s = re.sub(r"\s+([,.;:!?])", r"\1", s) # nada antes de signos
	# añadir espacio SOLO tras . ! ? ; (NO tras coma/“:”)
	s = re.sub(r"([?.!;])(?!\s\|$)([^\s])", r"\1 \2", s)
	# 3) signos invertidos
	s = re.sub(r"([¿¡])\s+", r"\1", s)
	# 4) colapsar espacios
	s = re.sub(r"\s{2,}", " ", s).strip()
	# 5) mayúscula inicial de oración
	return sentence_case_spanish(s)

	# ====== Traducción BI estricta ======
	def translate_es_to_ni_bi(text:str):
	toks = simple_tokenize(text)

	out=[]; ib_toks=[]
	i=0
	while i < len(toks):
	t = toks[i]
	if t in VISIBLE_PUNCT:
	out.append(t); ib_toks.append(t); i+=1; continue
	if is_placeholder(t):
	out.append(t); ib_toks.append(t); i+=1; continue
	span, ni_surface = _longest_match(toks, i, ESPHRASE2NI)
	if span > 1:
	out.append(ni_surface)
	ib_toks.append(georgeos_keys(tokens_from_latin(ni_surface), ni_surface))
	i += span; continue
	key = lower(t)
	if key in ES2NI:
	ni = ES2NI[key][0]
	out.append(ni)
	ib_toks.append(georgeos_keys(tokens_from_latin(ni), ni))
	elif is_number(key):
	out.append(t); ib_toks.append(t)
	else:
	ph = f"[SIN-LEX:{t}]"
	out.append(ph); ib_toks.append(ph)
	i += 1

	if MODAL_SUFFIX_ENABLE:
	out = add_modal_suffixes_es2ni(out)
	ib_toks = []
	for tt in out:
	if tt in VISIBLE_PUNCT or tt.startswith("["):
	ib_toks.append(tt)
	else:
	ib_toks.append(georgeos_keys(tokens_from_latin(tt), tt))

	ni_text = detokenize(out)
	ib_html = "<div class='ib-line'>" + escape(render_ib_with_tridots(ib_toks)) + "</div>"
	return ni_text, ib_html

	def translate_ni_to_es_bi(text:str):
	toks = simple_tokenize(text)

	if MODAL_SUFFIX_ENABLE:
	toks = strip_modal_suffixes_ni(toks)

	out=[]
	i=0
	while i < len(toks):
	t = toks[i]
	if t in VISIBLE_PUNCT:
	out.append(t); i+=1; continue
	if is_placeholder(t):
	out.append(t); i+=1; continue
	span, es_surface = _longest_match(toks, i, NIPHRASE2ES)
	if span > 1:
	out.append(es_surface); i += span; continue

	key = lower(t)
	if key in NI2ES:
	es = NI2ES[key][0] or ""
	out.append(es if es else f"[?:{t}]")
	elif key in AMBIG_NI and STRICT_BI_ENFORCE:
	# ★ determinista: no elegimos al azar superficies NI con colisión
	out.append(f"[AMB-NI:{t}]")
	elif is_number(key):
	out.append(t)
	else:
	out.append(f"[?:{t}]")
	i += 1

	if MODAL_SUFFIX_ENABLE:
	out = add_inverted_openers(out)

	es_text = detokenize(out)
	es_text = postprocess_spanish(es_text)
	return es_text

	# ====== Diagnóstico ======
	def diagnose_text(text, dir_label):
	if not text or not text.strip():
	return "<em>Introduce texto para diagnosticar.</em>"

	toks = simple_tokenize(text)
	unknown=set(); asym=set(); amb=set()
	total_tokens=0; covered=0

	if dir_label.startswith("ES"):
	head = "ES→NI"
	i=0
	while i < len(toks):
	t = toks[i]
	if t in VISIBLE_PUNCT or is_number(t):
	i+=1; continue
	total_tokens += 1
	span, _ = _longest_match(toks, i, ESPHRASE2NI)
	if span > 1:
	covered += 1; i += span; continue
	k=lower(t)
	if k not in ES2NI:
	unknown.add(t); i+=1; continue
	covered += 1
	ni = ES2NI[k][0]
	back = NI2ES.get(lower(ni))
	if back and lower(back[0]) != k:
	asym.add(f"{t} → {ni} → {back[0]}")
	i+=1
	else:
	head = "NI→ES"
	i=0
	while i < len(toks):
	t = toks[i]
	if t in VISIBLE_PUNCT or is_number(t):
	i+=1; continue
	total_tokens += 1
	span, _ = _longest_match(toks, i, NIPHRASE2ES)
	if span > 1:
	covered += 1; i += span; continue
	k=lower(t)
	if k in AMBIG_NI:
	amb.add(t); i+=1; continue
	if k not in NI2ES:
	unknown.add(t); i+=1; continue
	covered += 1
	es = NI2ES[k][0]
	back = ES2NI.get(lower(es))
	if back and lower(back[0]) != k:
	asym.add(f"{t} → {es} → {back[0]}")
	i+=1

	cov_pct = (covered/total_tokens*100) if total_tokens else 100.0
	cov_html = f"<div><b>Tokens (sin puntuación/numéricos):</b> {total_tokens}  \|  <b>Cubiertos:</b> {covered} ({cov_pct:.1f}%)</div>"

	unk_html = "".join(f"<li><code>{escape(u)}</code></li>" for u in sorted(unknown, key=lambda x: lower(x))) or "<li><i>—</i></li>"
	amb_html = "".join(f"<li><code>{escape(a)}</code></li>" for a in sorted(amb, key=lambda x: lower(x))) or "<li><i>—</i></li>"
	asy_html = "".join(f"<li><code>{escape(a)}</code></li>" for a in sorted(asym)) or "<li><i>—</i></li>"

	return f"<b>Diagnóstico {head}</b>{cov_html}<b>Ambiguas (NI duplicada):</b><ul>{amb_html}</ul><b>Faltantes:</b><ul>{unk_html}</ul><b>Asimetrías:</b><ul>{asy_html}</ul>"

	# ====== UI (CSS / acordeones / fuentes) ======
	LABELS={
	"ES":{
	"title":"Traductor Español ↔ Neoíbero",
	"subtitle":"CSV estricto (BI-only 1:1; sin heurísticas; .gz) — determinista",
	"in_label_es":"✏️ Entrada (Español)",
	"in_label_ni":"✏️ Entrada (Neoíbero)",
	"in_ph_es":"Escribe aquí. Ej.: Veo a Ana y doy pan a Marta.",
	"in_ph_ni":"Idatzi hemen. Adib.: nuker-ke ni etxe-ka.",
	"out_lat_esni":"📜 Salida: Neoíbero (latín)",
	"out_lat_nies":"📜 Salida: Español",
	"out_ib":"🗿 Línea ibérica",
	"out_audio":"🔊 Locución (Audio)",
	"btn":"🔄 Traducir",
	"combo":"🌍 Idioma (UI + explicación)",
	"dir":"🔁 Dirección",
	"dir_opts":["ES → NI","NI → ES"],
	"doc_header":"📚 Documentación y Referencia",
	"acc_titles":[
	"🎓 Marco académico y decisiones del neoíbero",
	"🏛️ Herencia posible del íbero histórico",
	"🎨 Diseño de la conlang (neoíbero)",
	"⚙️ Pipeline del traductor (BI-estricto 1:1)",
	"🔤 Ortografía, línea ibérica y claves",
	"❓/❗ Modalidad vascoide (-na / -ba)",
	"🧩 Expansiones por CSV: plurales (S) y 3pl (3/V3)",
	"📖 Gramática de referencia (v1.2)",
	"📚 Bibliografía de base",
	"🧾 Siglas y glosario",
	"🔗 Simetría por pair_id (modo bilingüe)"
	]
	},
	"EN":{
	"title":"Spanish ↔ Neo-Iberian Translator",
	"subtitle":"Strict BI-only (1:1 surfaces; no heuristics; .gz) — deterministic",
	"in_label_es":"✏️ Input (Spanish)",
	"in_label_ni":"✏️ Input (Neo-Iberian)",
	"in_ph_es":"Type here. E.g., Veo a Ana y doy pan a Marta.",
	"in_ph_ni":"Type here. E.g., nuker-ke ni etxe-ka.",
	"out_lat_esni":"📜 Output: Neo-Iberian (Latin)",
	"out_lat_nies":"📜 Output: Spanish",
	"out_ib":"🗿 Iberian line",
	"out_audio":"🔊 Speech (Audio)",
	"btn":"🔄 Translate",
	"combo":"🌍 Language (UI + docs)",
	"dir":"🔁 Direction",
	"dir_opts":["ES → NI","NI → ES"],
	"doc_header":"📚 Documentation & Reference",
	"acc_titles":[
	"🎓 Background & design choices",
	"🏛️ Possible inheritance from ancient Iberian",
	"🎨 Conlang design (Neo-Iberian)",
	"⚙️ Translator pipeline (strict 1:1)",
	"🔤 Orthography, Iberian line & keys",
	"❓/❗ Vascoid modality (-na / -ba)",
	"🧩 CSV-driven expansions: plurals (S) & 3pl (3/V3)",
	"📖 Reference grammar (v1.2)",
	"📚 Core references",
	"🧾 Acronyms & glossary",
	"🔗 Pair-id symmetry (bilingual mode)"
	]
	}
	}
	DOC = {
	"ES":[
	"Escritura y datos. Un único CSV con `pair_id` y superficies exactas. La traducción ES↔NI es 1:1 por superficie.",
	"Herencia plausible del íbero. Fonotaxis CV(C); p→b; r/ŕ; casos -k/-te/-ka/-ar/-en/-i.",
	"Diseño del neoíbero. TAM: PRS -ke, PST -bo, FUT -ta, IPFV -ri, IMP -tu, COND/SBJV -ni, FUT_SBJV -ra.",
	"Pipeline (BI-estricto 1:1). Tokeniza; sustitución exacta; NI ambigua se bloquea y sale como `[AMB-NI:…]`.",
	"Ortografía y línea ibérica. Tokens BA/BE/…; tridots '/'; p→b; codas N/S/Ś/R/Ŕ/L/M/K/T.",
	"Modalidad (-na/-ba). ES→NI puede omitir ¿?¡! (si está activo). NI→ES inserta `¿…?`/`¡…!` al final de la oración marcada, no en comas.",
	"Expansiones por CSV (deterministas). `flags=S` plural regular; `flags=3\|V3` 3ª plural del presente. Solo si lo marcas.",
	"Gramática mínima. Visualización; la gramática no se “calcula”.",
	"Bibliografía. Untermann; de Hoz; Ferrer i Jané; Correa…",
	"Glosario & datasets. Faltas → `[SIN-LEX:…]` / `[?:…]`. Ambiguas → `[AMB-NI:…]` (limpia tu CSV).",
	"Simetría por pair_id. El diagnóstico avisa si una NI apunta a dos ES distintos."
	],
	"EN":[
	"One bilingual CSV with `pair_id` and exact surfaces. ES↔NI is strictly 1:1.",
	"Possible inheritance (non-palaeographic).",
	"Neo-Iberian design (phonology & morphology).",
	"Pipeline: tokenise → exact replacement. Ambiguous NI are blocked and rendered as `[AMB-NI:…]`.",
	"Orthography, Iberian line & keys.",
	"Modality (-na/-ba): ES→NI can drop ¿?¡!. NI→ES places them at sentence end, not at commas.",
	"CSV-driven expansions (deterministic): `S` plural; `3\|V3` present 3pl.",
	"Minimal grammar (v1.2).",
	"Selected references.",
	"Glossary & datasets.",
	"Pair-id symmetry diagnostics."
	]
	}

	# ====== CSS + fuente ======
	def build_css():
	b64=None
	if os.path.exists("Iberia-Georgeos.ttf"):
	with open("Iberia-Georgeos.ttf","rb") as f:
	b64=base64.b64encode(f.read()).decode("ascii")
	font_src = f"url(data:font/ttf;base64,{b64}) format('truetype')" if b64 else "local('sans-serif')"
	return f"""
	@font-face {{
	font-family: 'IberiaGeorgeos';
	src: {font_src};
	font-weight: normal; font-style: normal;
	}}
	:root {{
	--iberian-clay:#8B4513; --iberian-ochre:#CC7722; --iberian-stone:#5C5C5C;
	--iberian-sand:#D2B48C; --iberian-rust:#A0522D; --iberian-bronze:#CD7F32;
	}}
	.gradio-container {{ background:linear-gradient(135deg,#f4e8d8 0%,#e8d5c4 50%,#d4c4b0 100%)!important;
	font-family:'Georgia','Times New Roman',serif!important; }}
	.gradio-container h1, .gradio-container h2, .gradio-container h3 {{
	color:var(--iberian-clay)!important; text-shadow:2px 2px 4px rgba(139,69,19,.15)!important;
	border-bottom:3px solid var(--iberian-bronze)!important; padding-bottom:.5rem!important; letter-spacing:.5px!important;
	}}
	.gradio-container .gr-group {{ background:linear-gradient(to bottom,#f9f6f0,#ede6dc)!important;
	border:2px solid var(--iberian-sand)!important; border-radius:8px!important; box-shadow:0 4px 12px rgba(139,69,19,.2), inset 0 1px 0 rgba(255,255,255,.5)!important;
	padding:1.5rem!important; margin-bottom:1.5rem!important; }}
	.gradio-container .gr-accordion {{ background:linear-gradient(145deg,#ebe3d5,#d9cec0)!important;
	border:2px solid var(--iberian-rust)!important; border-radius:6px!important; margin-bottom:.8rem!important; box-shadow:2px 2px 6px rgba(0,0,0,.15)!important; }}
	.gradio-container .gr-accordion .label-wrap {{ background:linear-gradient(to right,var(--iberian-ochre),var(--iberian-rust))!important;
	color:#fff!important; font-weight:600!important; padding:.8rem 1rem!important; border-radius:4px!important; text-shadow:1px 1px 2px rgba(0,0,0,.3)!important; }}
	.gradio-container .gr-textbox textarea, .gradio-container .gr-textbox input {{ background:linear-gradient(to bottom,#faf8f3,#f5f0e8)!important;
	border:2px solid var(--iberian-sand)!important; border-radius:6px!important; color:var(--iberian-stone)!important;
	font-family:'Georgia',serif!important; box-shadow:inset 2px 2px 4px rgba(139,69,19,.1)!important; }}
	.gradio-container .gr-textbox textarea:focus, .gradio-container .gr-textbox input:focus {{
	border-color:var(--iberian-bronze)!important; box-shadow:inset 2px 2px 4px rgba(139,69,19,.1), 0 0 8px rgba(205,127,50,.3)!important; }}
	.gradio-container .gr-button.gr-button-primary {{ background:linear-gradient(145deg,var(--iberian-bronze),var(--iberian-rust))!important;
	border:2px solid var(--iberian-clay)!important; color:#fff!important; font-weight:bold!important; text-shadow:1px 2px 2px rgba(0,0,0,.4)!important;
	box-shadow:0 4px 8px rgba(139,69,19,.3), inset 0 1px 0 rgba(255,255,255,.2)!important; border-radius:8px!important; padding:.8rem 1.5rem!important; transition:all .3s ease!important; }}
	.gradio-container .gr-button.gr-button-primary:hover {{ background:linear-gradient(145deg,var(--iberian-rust),var(--iberian-bronze))!important;
	transform:translateY(-2px)!important; box-shadow:0 6px 12px rgba(139,69,19,.4)!important; }}
	.ib-line {{ font-family:'IberiaGeorgeos',monospace,sans-serif!important; font-size:1.9rem!important; line-height:2.4rem!important; white-space:pre-wrap!important;
	background:linear-gradient(135deg,#e8dcc8 0%,#d4c4a8 50%,#c4b098 100%)!important; padding:24px!important; border-radius:10px!important;
	border:3px solid var(--iberian-rust)!important; border-left:6px solid var(--iberian-bronze)!important;
	box-shadow:0 4px 15px rgba(139,69,19,.25), inset 0 2px 4px rgba(0,0,0,.1)!important; color:var(--iberian-clay)!important; position:relative!important; }}
	.ib-line::before {{ content:''!important; position:absolute!important; inset:0!important;
	background-image:repeating-linear-gradient(0deg,transparent,transparent 2px, rgba(139,69,19,.03) 2px, rgba(139,69,19,.03) 4px)!important;
	pointer-events:none!important; border-radius:10px!important; }}
	@media (max-width:768px) {{
	.ib-line {{ font-size:1.5rem!important; line-height:2rem!important; padding:16px!important; }}
	.gradio-container .gr-group {{ padding:1rem!important; }}
	.gradio-container h1 {{ font-size:1.8rem!important; }}
	}}
	@media (max-width:480px) {{
	.ib-line {{ font-size:1.3rem!important; line-height:1.8rem!important; padding:12px!important; }}
	.gradio-container h1 {{ font-size:1.5rem!important; }}
	}}
	"""
	CSS = build_css()

	with gr.Blocks(css=CSS, theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple")) as demo:
	with gr.Group():
	title = gr.Markdown(f"# {LABELS['ES']['title']}")
	subtitle = gr.Markdown(f"{LABELS['ES']['subtitle']}")
	with gr.Row():
	combo = gr.Dropdown(choices=["ES","EN"], value="ES", label=LABELS["ES"]["combo"])
	direction = gr.Radio(choices=LABELS["ES"]["dir_opts"], value="ES → NI", label=LABELS["ES"]["dir"])
	with gr.Group():
	doc_header = gr.Markdown(f"## {LABELS['ES']['doc_header']}")
	acc_titles = LABELS["ES"]["acc_titles"]
	with gr.Accordion(acc_titles[0], open=False) as acc1: md1 = gr.Markdown(DOC["ES"][0])
	with gr.Accordion(acc_titles[1], open=False) as acc2: md2 = gr.Markdown(DOC["ES"][1])
	with gr.Accordion(acc_titles[2], open=False) as acc3: md3 = gr.Markdown(DOC["ES"][2])
	with gr.Accordion(acc_titles[3], open=False) as acc4: md4 = gr.Markdown(DOC["ES"][3])
	with gr.Accordion(acc_titles[4], open=False) as acc5: md5 = gr.Markdown(DOC["ES"][4])
	with gr.Accordion(acc_titles[5], open=False) as acc6: md6 = gr.Markdown(DOC["ES"][5])
	with gr.Accordion(acc_titles[6], open=False) as acc7: md7 = gr.Markdown(DOC["ES"][6])
	with gr.Accordion(acc_titles[7], open=False) as acc8: md8 = gr.Markdown(DOC["ES"][7])
	with gr.Accordion(acc_titles[8], open=False) as acc9: md9 = gr.Markdown(DOC["ES"][8])
	with gr.Accordion(acc_titles[9], open=False) as acc10: md10 = gr.Markdown(DOC["ES"][9])
	with gr.Accordion(acc_titles[10], open=False) as acc11: md11 = gr.Markdown(DOC["ES"][10])
	with gr.Accordion("🧪 Diagnóstico del CSV BI (al cargar)", open=False):
	bi_diag_box = gr.HTML(value=BI_DIAG_HTML)

	with gr.Group():
	es_in = gr.Textbox(label=LABELS["ES"]["in_label_es"], placeholder=LABELS["ES"]["in_ph_es"], lines=5)
	with gr.Row():
	btn_tr = gr.Button(LABELS["ES"]["btn"], variant="primary")
	btn_diag = gr.Button("🔎 Diagnosticar BI con este texto", variant="secondary")
	with gr.Row():
	with gr.Column(scale=2):
	ni_out = gr.Textbox(label=LABELS["ES"]["out_lat_esni"], lines=5, interactive=False)
	loc_btn = gr.Button("🔊 Locutar", variant="secondary", visible=True)
	audio_out = gr.Audio(label=LABELS["ES"]["out_audio"], type="numpy")
	with gr.Column(scale=1):
	ib_out = gr.HTML(label=LABELS["ES"]["out_ib"])
	diag_out = gr.HTML(value="")

	def do_translate(text, dir_label):
	if not text or not text.strip():
	return (gr.update(value=""),
	gr.update(value="<div class='ib-line'></div>"),
	gr.update(visible=False),
	gr.update(value=None),
	gr.update(value=""))
	if dir_label.startswith("ES"):
	latin, ib = translate_es_to_ni_bi(text)
	return (gr.update(label=LABELS["ES"]["out_lat_esni"], value=latin),
	gr.update(value=ib),
	gr.update(visible=True),
	gr.update(value=None),
	gr.update(value=""))
	else:
	es_text = translate_ni_to_es_bi(text)
	return (gr.update(label=LABELS["ES"]["out_lat_nies"], value=es_text),
	gr.update(value="<div class='ib-line'></div>"),
	gr.update(visible=False),
	gr.update(value=None),
	gr.update(value=""))

	btn_tr.click(do_translate, [es_in, direction], [ni_out, ib_out, loc_btn, audio_out, diag_out])

	def run_locution(latin_text, dir_label):
	if dir_label.startswith("ES"):
	return synthesize_speech(latin_text)
	return None
	loc_btn.click(run_locution, [ni_out, direction], audio_out)

	def do_diagnose(text, dir_label):
	return gr.update(value=diagnose_text(text, dir_label))
	btn_diag.click(do_diagnose, [es_in, direction], [diag_out])

	def switch_lang(sel_lang, dir_label):
	L=LABELS[sel_lang]; T=L["acc_titles"]; D=DOC[sel_lang]
	in_label = L["in_label_es"] if dir_label.startswith("ES") else L["in_label_ni"]
	in_ph = L["in_ph_es"] if dir_label.startswith("ES") else L["in_ph_ni"]
	out_lab = L["out_lat_esni"] if dir_label.startswith("ES") else L["out_lat_nies"]
	return (
	gr.update(value=f"# {L['title']}"),
	gr.update(value=f"{L['subtitle']}"),
	gr.update(label=L["combo"], value=sel_lang),
	gr.update(label=L["dir"], choices=L["dir_opts"], value=dir_label),
	gr.update(value=f"## {L['doc_header']}"),
	gr.update(label=T[0]), gr.update(value=D[0]),
	gr.update(label=T[1]), gr.update(value=D[1]),
	gr.update(label=T[2]), gr.update(value=D[2]),
	gr.update(label=T[3]), gr.update(value=D[3]),
	gr.update(label=T[4]), gr.update(value=D[4]),
	gr.update(label=T[5]), gr.update(value=D[5]),
	gr.update(label=T[6]), gr.update(value=D[6]),
	gr.update(label=T[7]), gr.update(value=D[7]),
	gr.update(label=T[8]), gr.update(value=D[8]),
	gr.update(label=T[9]), gr.update(value=D[9]),
	gr.update(label=T[10]), gr.update(value=D[10]),
	gr.update(label=in_label, placeholder=in_ph),
	gr.update(label=out_lab),
	gr.update(label=L["out_ib"]),
	gr.update(label=L["out_audio"]),
	gr.update(value=L["btn"])
	)
	combo.change(
	switch_lang,
	[combo, direction],
	[title, subtitle, combo, direction, doc_header,
	acc1, md1, acc2, md2, acc3, md3, acc4, md4, acc5, md5, acc6, md6, acc7, md7, acc8, md8, acc9, md9, acc10, md10, acc11, md11,
	es_in, ni_out, ib_out, audio_out, btn_tr]
	)

	def switch_direction(dir_label, sel_lang):
	L=LABELS[sel_lang]
	in_label = L["in_label_es"] if dir_label.startswith("ES") else L["in_label_ni"]
	in_ph = L["in_ph_es"] if dir_label.startswith("ES") else L["in_ph_ni"]
	out_lab = L["out_lat_esni"] if dir_label.startswith("ES") else L["out_lat_nies"]
	loc_vis = True if dir_label.startswith("ES") else False
	return (gr.update(label=in_label, placeholder=in_ph),
	gr.update(label=out_lab, value=""),
	gr.update(value="<div class='ib-line'></div>"),
	gr.update(visible=loc_vis),
	gr.update(value=None),
	gr.update(value=""))
	direction.change(
	switch_direction,
	[direction, combo],
	[es_in, ni_out, ib_out, loc_btn, audio_out, diag_out]
	)

	# ====== smoke opcional ======
	def _symmetry_smoketest():
	print("\n[SMOKE] Prueba ES↔NI (BI-estricto, determinista)…")
	probes = [
	"nuker-ke ni etxe-ka ?",
	"¿Pagaste 12,75 en la cafetería?",
	"Marta llega a las 18:30.",
	"[SIN-LEX:Tomás]-na euŕak-ke !"
	]
	for p in probes:
	es_from_ni = translate_ni_to_es_bi(p)
	ni_round, _ = translate_es_to_ni_bi(es_from_ni)
	print(" IN:", p)
	print(" ES:", es_from_ni)
	print(" NI:", ni_round)
	print("---")

	if DEBUG_MODE:
	_symmetry_smoketest()

	if __name__ == "__main__":
	demo.queue().launch()