| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | import gradio as gr |
| | import os, csv, re, base64, unicodedata, gzip |
| | import torch |
| | from transformers import AutoProcessor, VitsModel |
| | import numpy as np |
| | from html import escape |
| |
|
| | |
| | os.environ['TRANSFORMERS_CACHE'] = os.environ.get('TRANSFORMERS_CACHE', '/tmp/cache') |
| | os.environ['HF_HOME'] = os.environ.get('HF_HOME', '/tmp/hf') |
| |
|
| | DEBUG_MODE = False |
| | def debug_print(msg): |
| | if DEBUG_MODE: print(f"[DEBUG] {msg}") |
| |
|
| | |
| | def _open_maybe_gzip(path): |
| | if str(path).endswith(".gz"): |
| | |
| | return gzip.open(path, "rt", encoding="utf-8", newline="") |
| | return open(path, "r", encoding="utf-8", newline="") |
| |
|
| | def norm(x): return (str(x).strip()) if x is not None else "" |
| | def lower(x): return norm(x).lower() |
| | def fold(s:str)->str: |
| | return ''.join(c for c in unicodedata.normalize('NFD', s or "") if unicodedata.category(c)!="Mn") |
| |
|
| | |
| | def _cand(*names): |
| | for n in names: |
| | if os.path.exists(n): return n |
| | p = os.path.join("salida", n) |
| | if os.path.exists(p): return p |
| | return names[0] |
| |
|
| | |
| | CSV_BI = _cand( |
| | "LEXICON_UNICO_1a1.csv.gz", |
| | "MASTER_SURFACE_READY.csv.gz", |
| | "MASTER_REEXTENDED.csv.gz", |
| | "BI_SURFACE_READY.csv.gz", |
| | "HF_Pairs_BI_REEXTENDED.csv.gz", |
| | "HF_Pairs_BI_EXPANDED1_EXTENDED_FILLED.csv.gz", |
| | "HF_Pairs_BI_EXPANDED1.csv.gz" |
| | ) |
| |
|
| | |
| | |
| | ES2NI = {} |
| | NI2ES = {} |
| |
|
| | |
| | ESPHRASE2NI = {} |
| | NIPHRASE2ES = {} |
| | MAX_NGRAM = 3 |
| |
|
| | |
| | VISIBLE_PUNCT = set(list(",.;:!?¡¿…()[]{}\"'«»—–“”‘’")) |
| | _num_re = re.compile(r"^\d+([.,]\d+)?$") |
| | def is_number(tok:str)->bool: return bool(_num_re.fullmatch(tok or "")) |
| |
|
| | |
| | CLAUSE_BREAKS = {",", ";", "—", "–", ":"} |
| | PLACEHOLDER_RE = re.compile(r"^\[[^\]]+\]$") |
| | def is_placeholder(tok: str) -> bool: |
| | return bool(PLACEHOLDER_RE.match(tok or "")) |
| |
|
| | def _restore_brk(tok, protected): |
| | m = re.fullmatch(r"__BRK(\d+)__(?:-(na|ba))?", tok or "") |
| | if not m: return tok |
| | idx = int(m.group(1)) |
| | suf = m.group(2) |
| | base = protected[idx] if 0 <= idx < len(protected) else tok |
| | return base + (f"-{suf}" if suf else "") |
| |
|
| | def simple_tokenize(text:str): |
| | """Tokenización mínima, sin romper [ ... ] ni [ ... ]-na/-ba.""" |
| | if not text: |
| | return [] |
| | protected = [] |
| | def _repl(m): |
| | key = f"__BRK{len(protected)}__" |
| | protected.append(m.group(0)) |
| | return key |
| | t = re.sub(r"\[[^\]]*\]", _repl, (text or "").strip()) |
| | t = re.sub(r"\s+"," ", t) |
| | t = re.sub(r"([,.;:!?¡¿…()\[\]{}\"'«»—–“”‘’])", r" \1 ", t) |
| | toks = [tok for tok in t.split() if tok] |
| | for i, tok in enumerate(toks): |
| | if tok.startswith("__BRK") and "__" in tok: |
| | toks[i] = _restore_brk(tok, protected) |
| | return toks |
| |
|
| | def detokenize(tokens): |
| | s = " ".join(tokens) |
| | s = re.sub(r"\s+([,.;:!?])", r"\1", s) |
| | s = re.sub(r"([¿¡])\s+", r"\1", s) |
| | s = re.sub(r"\(\s+", "(", s) |
| | s = re.sub(r"\s+\)", ")", s) |
| | s = re.sub(r"\s{2,}", " ", s).strip() |
| | return s |
| |
|
| | |
| | MODAL_SUFFIX_ENABLE = True |
| | MODAL_ONLY_ON_FINITE = True |
| | MODAL_STRIP_QE_IN_NI = True |
| |
|
| | SENT_END = {".", "!", "?", "…"} |
| | OPEN_FOR = {"?": "¿", "!": "¡"} |
| | WRAP_PREFIX = set(list("«“‘([{\"'")) |
| | PERS_ENDINGS = ("-n","-zu","-gu","-zuk","-zuek","-k") |
| | TAM_FINITE = ("-ke","-bo","-ta","-ni","-tu") |
| |
|
| | def looks_like_finite_ni(tok:str)->bool: |
| | t = (tok or "").lower() |
| | if not t or t.startswith("["): return False |
| | base = re.sub(r"-(na|ba)$","", t) |
| | for tam in TAM_FINITE: |
| | if base.endswith(tam) or any(base.endswith(tam+pe) for pe in PERS_ENDINGS): |
| | return True |
| | return False |
| |
|
| | def last_content_index(tokens, start, end_exclusive): |
| | i = end_exclusive - 1 |
| | while i >= start and tokens[i] in VISIBLE_PUNCT: |
| | i -= 1 |
| | return i if i >= start else -1 |
| |
|
| | def strip_qe_punct(tokens): |
| | return [t for t in tokens if t not in ("¿","?","¡","!")] |
| |
|
| | |
| | def _is_numeric_comma(tokens, i): |
| | return (0 < i < len(tokens)-1 and tokens[i] == "," and |
| | is_number(tokens[i-1]) and is_number(tokens[i+1])) |
| |
|
| | def _is_time_colon(tokens, i): |
| | return (0 < i < len(tokens)-1 and tokens[i] == ":" and |
| | is_number(tokens[i-1]) and is_number(tokens[i+1])) |
| |
|
| | def _is_true_clause_break(tokens, i): |
| | if tokens[i] not in CLAUSE_BREAKS: return False |
| | if _is_numeric_comma(tokens, i): return False |
| | if _is_time_colon(tokens, i): return False |
| | return True |
| |
|
| | def add_modal_suffixes_es2ni(tokens): |
| | """Añade -na/-ba al último verbo finito (o último constituyente) por oración.""" |
| | if not MODAL_SUFFIX_ENABLE: |
| | return tokens |
| | out = tokens[:] |
| | n = len(out) |
| | i = 0 |
| | sent_start = 0 |
| | while i < n: |
| | if out[i] in ("?", "!"): |
| | closer = out[i] |
| | target = -1 |
| | j = i - 1 |
| | while j >= sent_start: |
| | if out[j] not in VISIBLE_PUNCT and (not MODAL_ONLY_ON_FINITE or looks_like_finite_ni(out[j])): |
| | target = j; break |
| | j -= 1 |
| | if target == -1: |
| | target = last_content_index(out, sent_start, i) |
| | if target != -1: |
| | suf = "na" if closer == "?" else "ba" |
| | if not re.search(rf"-(?:{suf})$", out[target].lower()): |
| | out[target] = out[target] + "-" + suf |
| | sent_start = i + 1 |
| | elif out[i] in SENT_END: |
| | sent_start = i + 1 |
| | i += 1 |
| | if MODAL_STRIP_QE_IN_NI: |
| | out = strip_qe_punct(out) |
| | return out |
| |
|
| | def strip_modal_suffixes_ni(tokens): |
| | """ |
| | Interpreta -na/-ba como modalidad; ahora SOLO cerramos al final de oración. |
| | (No cerramos en comas/“:”, salvo que ya haya ?/! explícitos.) |
| | """ |
| | if not MODAL_SUFFIX_ENABLE: |
| | return tokens |
| |
|
| | out = [] |
| | buf = [] |
| | pending_end = None |
| | mode = None |
| |
|
| | def _emit(end_override=None, also_append=None): |
| | nonlocal buf, mode, pending_end, out |
| | local = [t for t in buf if t not in ("¿","?","¡","!")] |
| | if local: |
| | end_tok = end_override or ("?" if mode == "?" else "!" if mode == "!" else pending_end or ".") |
| | out.extend(local) |
| | out.append(end_tok) |
| | buf.clear(); mode = None; pending_end = None |
| | if also_append: |
| | out.append(also_append) |
| |
|
| | toks = tokens + ["."] |
| | for i, t in enumerate(toks): |
| | if t in ("¿", "¡"): |
| | _emit(); mode = "?" if t == "¿" else "!" |
| | continue |
| | if t in ("?", "!"): |
| | pending_end = t; _emit(); continue |
| | if t in SENT_END: |
| | pending_end = t; _emit(); continue |
| |
|
| | |
| | if t in CLAUSE_BREAKS and mode in ("?","!"): |
| | buf.append(t) |
| | continue |
| |
|
| | m = re.search(r"-(na|ba)$", (t or "").lower()) |
| | if m: |
| | if mode and buf: _emit() |
| | mode = "?" if m.group(1) == "na" else "!" |
| | t = t[:-len(m.group(0))] |
| |
|
| | if t: |
| | buf.append(t) |
| |
|
| | if len(out) >= 2 and out[-1] == "." and out[-2] == ".": out.pop() |
| | return out |
| |
|
| | def add_inverted_openers(tokens): |
| | """Inserta ¿/¡ al inicio de cada tramo que acaba en ?/!, ignorando comas/“:” numéricos.""" |
| | out = tokens[:] |
| | START_BREAKS = SENT_END | CLAUSE_BREAKS |
| | def _is_true_start_break(idx): |
| | if out[idx] in SENT_END: return True |
| | if out[idx] in CLAUSE_BREAKS: return _is_true_clause_break(out, idx) |
| | return False |
| |
|
| | i = 0 |
| | while i < len(out): |
| | if out[i] in ("?", "!"): |
| | closer = out[i]; opener = OPEN_FOR[closer] |
| | j = i - 1 |
| | while j >= 0 and not _is_true_start_break(j): |
| | j -= 1 |
| | start = j + 1 |
| | k = start |
| | while k < i and out[k] in WRAP_PREFIX: |
| | k += 1 |
| | if not (k < len(out) and out[k] == opener): |
| | out.insert(k, opener); i += 1 |
| | i += 1 |
| | return out |
| |
|
| | |
| | EXPANSION_ENABLE = True |
| | FLAG_COLNAMES = ("flags","FLAGS","expand","EXPAND","tags","TAGS","morph","MORPH") |
| | FLAG_PLURAL = ("S",) |
| | FLAG_3PL = ("3","V3") |
| |
|
| | VOWELS = "aeiouáéíóúüAEIOUÁÉÍÓÚÜ" |
| |
|
| | def _has_flag(cell:str, wanted:tuple)->bool: |
| | c = (cell or "") |
| | return any(w in c for w in wanted) |
| |
|
| | def _pluralize_es_form(s: str) -> str: |
| | if not s: return s |
| | sl = s.lower() |
| | if sl.endswith("z"): |
| | return s[:-1] + ("ces" if s[-1].islower() else "CES") |
| | if s[-1] not in VOWELS: |
| | return s + ("es" if s[-1].islower() else "ES") |
| | return s + ("s" if s[-1].islower() else "S") |
| |
|
| | def _present_3pl_from_3sg(s: str) -> str: |
| | if not s: return s |
| | return s + ("n" if s[-1].islower() else "N") |
| |
|
| | |
| | print("Cargando modelo de voz (opcional)…") |
| | device = "cuda" if torch.cuda.is_available() else "cpu" |
| | processor = model = None |
| | try: |
| | processor = AutoProcessor.from_pretrained("facebook/mms-tts-spa") |
| | model = VitsModel.from_pretrained("facebook/mms-tts-spa").to(device) |
| | print("Modelo de voz cargado.") |
| | except Exception as e: |
| | print(f"AVISO TTS: {e}") |
| |
|
| | def add_reading_pauses(text: str, level:int=3) -> str: |
| | if level <= 1: return text |
| | t = re.sub(r",\s*", ", , ", text) |
| | t = re.sub(r"\.\s*", ". . ", text) |
| | return re.sub(r'\s+',' ',t).strip() |
| |
|
| | def hispanize_for_tts(ni_text: str) -> str: |
| | text=(ni_text or "").lower() |
| | text=text.replace('ŕ','rr').replace('ś','s').replace('eś','es').replace('-', ' ') |
| | text=re.sub(r'\[.*?\]','',text); text=re.sub(r'\s+',' ',text).strip() |
| | return add_reading_pauses(text, 3) |
| |
|
| | def synthesize_speech(text): |
| | if not text or not text.strip() or model is None or processor is None: return None |
| | try: |
| | inputs = processor(text=hispanize_for_tts(text), return_tensors="pt").to(device) |
| | with torch.no_grad(): output = model(**inputs).waveform |
| | speech_np = output.cpu().numpy().squeeze() |
| | mx = max(abs(speech_np.min()), abs(speech_np.max())) |
| | if mx>0: speech_np = speech_np/mx*0.9 |
| | return (16000, speech_np.astype(np.float32)) |
| | except Exception as e: |
| | print(f"Error TTS: {e}"); return None |
| |
|
| | |
| | V = "aeiou" |
| | SYL_FOR = { |
| | "b":["‹BA›","‹BE›","‹BI›","‹BO›","‹BU›"], |
| | "d":["‹DA›","‹DE›","‹DI›","‹DO›","‹DU›"], |
| | "t":["‹TA›","‹TE›","‹TI›","‹TO›","‹TU›"], |
| | "g":["‹GA›","‹GE›","‹GI›","‹DO›","‹GU›"] if False else ["‹GA›","‹GE›","‹GI›","‹GO›","‹GU›"], |
| | "k":["‹KA›","‹KE›","‹KI›","‹KO›","‹KU›"] |
| | } |
| | ALPHA_FOR={"a":"‹A›","e":"‹E›","i":"‹I›","o":"‹O›","u":"‹U›","s":"‹S›","ś":"‹Ś›", |
| | "l":"‹L›","r":"‹R›","ŕ":"‹Ŕ›","n":"‹N›","m":"‹M›"} |
| | CODA_FOR={"":"","n":"‹N›","s":"‹S›","ś":"‹Ś›","r":"‹R›","ŕ":"‹Ŕ›","l":"‹L›","m":"‹M›","k":"‹K›","t":"‹T›"} |
| |
|
| | def tokens_from_latin(ni:str)->str: |
| | out=[]; i=0; ni=(ni or "").lower() |
| | while i<len(ni): |
| | c=ni[i] |
| | if c=="p": c="b" |
| | if c=="-": out.append("—"); i+=1; continue |
| | if c in V: |
| | out.append(ALPHA_FOR.get(c, c.upper())); i+=1; continue |
| | if c in SYL_FOR and i+1<len(ni) and ni[i+1] in V: |
| | idx=V.index(ni[i+1]); tok=SYL_FOR[c][idx] |
| | coda=ni[i+2] if i+2<len(ni) else "" |
| | if coda in CODA_FOR and coda!="": tok+=CODA_FOR[coda]; i+=3 |
| | else: i+=2 |
| | out.append(tok); continue |
| | out.append(ALPHA_FOR.get(c, c.upper())); i+=1 |
| | return "".join(out) |
| |
|
| | KEYS_MODE = "compact" |
| | KEYS_OVERRIDE = {} |
| |
|
| | def georgeos_keys(token_str:str, ni_plain:str)->str: |
| | low=(ni_plain or "").lower() |
| | if low in KEYS_OVERRIDE: return KEYS_OVERRIDE[low] |
| | m=re.findall(r"‹(.*?)›", token_str) |
| | out=[] |
| | for t in m: |
| | if KEYS_MODE == "compact": |
| | if len(t)==2 and t[0] in "BDTGK": out.append(t[0]) |
| | elif t in ("A","E","I","O","U"): out.append(t) |
| | elif t=="Ś": out.append("X") |
| | elif t=="Ŕ": out.append("r") |
| | else: out.append(t[0].upper()) |
| | else: |
| | if len(t)==2 and t[0] in "BDTGK": out.append(t) |
| | elif t=="Ś": out.append("X") |
| | elif t=="Ŕ": out.append("r") |
| | else: out.append(t) |
| | return "".join(out) |
| |
|
| | TRIDOT = "/" |
| | def render_ib_with_tridots(ib_toks): |
| | res=[]; prev_word=False |
| | for tk in ib_toks: |
| | is_punct = tk in VISIBLE_PUNCT |
| | if is_punct: |
| | res.append(" "+tk+" "); prev_word=False |
| | else: |
| | if prev_word: res.append(" "+TRIDOT+" ") |
| | res.append(tk); prev_word=True |
| | return "".join(res).strip() |
| |
|
| | |
| |
|
| | |
| | STRICT_BI_ENFORCE = True |
| | AMBIG_NI = {} |
| | BI_DIAG_HTML = "<em>Sin CSV cargado.</em>" |
| |
|
| | def load_bi_strict_and_diagnose(): |
| | """Carga el CSV, llena ES2NI/NI2ES y prepara un HTML de diagnóstico.""" |
| | global BI_DIAG_HTML |
| | |
| | ES2NI.clear(); NI2ES.clear(); ESPHRASE2NI.clear(); NIPHRASE2ES.clear() |
| | AMBIG_NI.clear() |
| |
|
| | if not os.path.exists(CSV_BI): |
| | msg=f"[ERROR] No se encontró el CSV bilingüe: {CSV_BI}" |
| | print(msg); BI_DIAG_HTML=f"<b>Error:</b> {escape(msg)}" |
| | return False |
| |
|
| | rows=0; dup_es=0; dup_ni=0; empty_pid=0 |
| | mismatch_backmap = 0 |
| | mismatch_samples = [] |
| | pid_seen=set() |
| |
|
| | print(f"Detectado CSV bilingüe: {CSV_BI}") |
| | try: |
| | with _open_maybe_gzip(CSV_BI) as f: |
| | rd = csv.DictReader(f) |
| | flds=set(rd.fieldnames or []) |
| | ES_COL = "source_es" if "source_es" in flds else "es_surface" if "es_surface" in flds else "es" |
| | NI_COL = "target_ni" if "target_ni" in flds else "ni_surface" if "ni_surface" in flds else "ni" |
| | IDCOL = "pair_id" if "pair_id" in flds else "id" if "id" in flds else None |
| | FLAGCOL = None |
| | for cand in FLAG_COLNAMES: |
| | if cand in flds: |
| | FLAGCOL = cand; break |
| |
|
| | base_rows = [] |
| | for r in rd: |
| | es_orig = (r.get(ES_COL) or "").strip() |
| | ni_orig = (r.get(NI_COL) or "").strip() |
| | if not (es_orig and ni_orig): continue |
| | pid = (r.get(IDCOL) or "").strip() if IDCOL else "" |
| | if not pid: empty_pid += 1 |
| | else: pid_seen.add(pid) |
| | flags = (r.get(FLAGCOL) or "") if FLAGCOL else "" |
| |
|
| | es = lower(es_orig) |
| | ni = lower(ni_orig) |
| |
|
| | |
| | if " " in es: |
| | if es not in ESPHRASE2NI: |
| | ESPHRASE2NI[es] = (ni_orig, pid) |
| | if " " in ni: |
| | if ni not in NIPHRASE2ES: |
| | NIPHRASE2ES[ni] = (es_orig, pid) |
| |
|
| | |
| | if es in ES2NI: |
| | dup_es += 1 |
| | else: |
| | ES2NI[es] = (ni_orig, pid) |
| |
|
| | |
| | if ni in NI2ES: |
| | dup_ni += 1 |
| | |
| | s = AMBIG_NI.get(ni, set()) |
| | s.add(NI2ES[ni][0]); s.add(es_orig) |
| | AMBIG_NI[ni] = s |
| | if STRICT_BI_ENFORCE: |
| | NI2ES.pop(ni, None) |
| | else: |
| | if STRICT_BI_ENFORCE and ni in AMBIG_NI: |
| | |
| | pass |
| | else: |
| | NI2ES[ni] = (es_orig, pid) |
| |
|
| | base_rows.append((es_orig, ni_orig, pid, flags)) |
| | rows += 1 |
| |
|
| | |
| | if EXPANSION_ENABLE: |
| | for es_orig, ni_orig, pid, flags in base_rows: |
| | if not flags: continue |
| | if _has_flag(flags, FLAG_PLURAL): |
| | pl = _pluralize_es_form(es_orig) |
| | pl_key = lower(pl) |
| | if pl_key not in ES2NI: |
| | ES2NI[pl_key] = (ni_orig, pid) |
| | if _has_flag(flags, FLAG_3PL): |
| | p3 = _present_3pl_from_3sg(es_orig) |
| | p3_key = lower(p3) |
| | if p3_key not in ES2NI: |
| | ES2NI[p3_key] = (ni_orig, pid) |
| |
|
| | |
| | for es_low, (ni_surf, _) in ES2NI.items(): |
| | ni_low = lower(ni_surf) |
| | back = NI2ES.get(ni_low) |
| | if back and lower(back[0]) != es_low: |
| | mismatch_backmap += 1 |
| | if len(mismatch_samples) < 10: |
| | mismatch_samples.append((es_low, ni_low, lower(back[0]))) |
| |
|
| | except Exception as e: |
| | msg=f"[ERROR] Al leer {CSV_BI}: {e}" |
| | print(msg); BI_DIAG_HTML=f"<b>Error:</b> {escape(msg)}" |
| | return False |
| |
|
| | es_unique = len(ES2NI) |
| | ni_unique = len(NI2ES) |
| | pid_unique = len(pid_seen) |
| |
|
| | print(f"✓ BI-ONLY ESTRICTO cargado: {rows:,} filas.") |
| | if dup_es: print(f"[AVISO] {dup_es:,} duplicados ES (se usó la primera).") |
| | if dup_ni: print(f"[AVISO] {dup_ni:,} duplicados NI (bloqueados en modo estricto).") |
| | if empty_pid: print(f"[AVISO] {empty_pid:,} filas sin pair_id.") |
| | if mismatch_backmap: |
| | print(f"[ALERTA] {mismatch_backmap:,} asimetrías ES↔NI (misma NI apunta a otro ES).") |
| |
|
| | sam_html = "" |
| | if mismatch_samples: |
| | sam_rows = "".join( |
| | f"<li><code>{escape(es)}</code> → <code>{escape(ni)}</code> → <code>{escape(es2)}</code></li>" |
| | for es,ni,es2 in mismatch_samples |
| | ) |
| | sam_html = f"<details><summary>Muestras</summary><ul>{sam_rows}</ul></details>" |
| |
|
| | ambN = sum(len(v) > 1 for v in AMBIG_NI.values()) |
| | ambList = ", ".join(f"{k}→{sorted(list(v))[:3]}" for k,v in list(AMBIG_NI.items())[:5]) |
| |
|
| | BI_DIAG_HTML = f""" |
| | <div style="font-family:Georgia,serif"> |
| | <b>Diagnóstico del CSV BI</b><br> |
| | Archivo: <b>{escape(CSV_BI)}</b><br> |
| | Filas base (CSV): <b>{rows:,}</b><br> |
| | ES únicas (tras expansiones): <b>{es_unique:,}</b> | NI únicas: <b>{ni_unique:,}</b> | pair_id únicos: <b>{pid_unique:,}</b><br> |
| | Duplicados ES: <b>{dup_es:,}</b> | Duplicados NI: <b>{dup_ni:,}</b> (bloqueados en estricto) | Sin pair_id: <b>{empty_pid:,}</b><br> |
| | Asimetrías ES↔NI: <b>{mismatch_backmap:,}</b> |
| | {sam_html} |
| | <hr style="border:0;border-top:1px solid #caa"> |
| | <small>NI ambiguas bloqueadas: <b>{ambN:,}</b>{(' · ej.: ' + escape(ambList)) if ambN else ''}</small><br> |
| | <small>Regla: el motor usa <b>sólo</b> tablas 1:1; NI duplicadas se bloquean y se muestran como <code>[AMB-NI:...]</code>.</small> |
| | </div> |
| | """ |
| | return rows > 0 |
| |
|
| | print("Cargando léxico/pares (BI-estricto)…") |
| | load_bi_strict_and_diagnose() |
| |
|
| | |
| | def _longest_match(tokens, i, phrase_map): |
| | """Devuelve (span, surface) si hay frase que comience en i.""" |
| | if not phrase_map: return (0, None) |
| | max_span = 0; surface = None |
| | |
| | for span in range(1, MAX_NGRAM+1): |
| | if i+span > len(tokens): break |
| | cand = " ".join(lower(t) for t in tokens[i:i+span]) |
| | if cand in phrase_map: |
| | max_span = span |
| | surface = phrase_map[cand][0] |
| | return (max_span, surface) |
| |
|
| | |
| | def sentence_case_spanish(s: str) -> str: |
| | out = [] |
| | start = True |
| | in_br = False |
| | WRAPS = "¿¡\"'«(“‘[" |
| |
|
| | for ch in s: |
| | if ch == '[': |
| | in_br = True |
| |
|
| | if not in_br and start: |
| | if ch.isspace(): |
| | out.append(ch) |
| | elif ch in WRAPS: |
| | out.append(ch) |
| | elif ch.isalpha(): |
| | out.append(ch.upper()); start = False |
| | else: |
| | out.append(ch) |
| | start = ch in "¿¡" |
| | else: |
| | out.append(ch) |
| | if not in_br and ch in ".?!…": |
| | start = True |
| | elif not in_br and ch in "¿¡": |
| | start = True |
| |
|
| | if ch == ']': |
| | in_br = False |
| |
|
| | return "".join(out) |
| |
|
| | |
| | def postprocess_spanish(s: str) -> str: |
| | |
| | s = re.sub(r"(\d)\s*:\s*(\d)", r"\1:\2", s) |
| | s = re.sub(r"(\d)\s*([.,])\s*(\d)", r"\1\2\3", s) |
| | |
| | s = re.sub(r"\s+([,.;:!?])", r"\1", s) |
| | |
| | s = re.sub(r"([?.!;])(?!\s|$)([^\s])", r"\1 \2", s) |
| | |
| | s = re.sub(r"([¿¡])\s+", r"\1", s) |
| | |
| | s = re.sub(r"\s{2,}", " ", s).strip() |
| | |
| | return sentence_case_spanish(s) |
| |
|
| | |
| | def translate_es_to_ni_bi(text:str): |
| | toks = simple_tokenize(text) |
| |
|
| | out=[]; ib_toks=[] |
| | i=0 |
| | while i < len(toks): |
| | t = toks[i] |
| | if t in VISIBLE_PUNCT: |
| | out.append(t); ib_toks.append(t); i+=1; continue |
| | if is_placeholder(t): |
| | out.append(t); ib_toks.append(t); i+=1; continue |
| | span, ni_surface = _longest_match(toks, i, ESPHRASE2NI) |
| | if span > 1: |
| | out.append(ni_surface) |
| | ib_toks.append(georgeos_keys(tokens_from_latin(ni_surface), ni_surface)) |
| | i += span; continue |
| | key = lower(t) |
| | if key in ES2NI: |
| | ni = ES2NI[key][0] |
| | out.append(ni) |
| | ib_toks.append(georgeos_keys(tokens_from_latin(ni), ni)) |
| | elif is_number(key): |
| | out.append(t); ib_toks.append(t) |
| | else: |
| | ph = f"[SIN-LEX:{t}]" |
| | out.append(ph); ib_toks.append(ph) |
| | i += 1 |
| |
|
| | if MODAL_SUFFIX_ENABLE: |
| | out = add_modal_suffixes_es2ni(out) |
| | ib_toks = [] |
| | for tt in out: |
| | if tt in VISIBLE_PUNCT or tt.startswith("["): |
| | ib_toks.append(tt) |
| | else: |
| | ib_toks.append(georgeos_keys(tokens_from_latin(tt), tt)) |
| |
|
| | ni_text = detokenize(out) |
| | ib_html = "<div class='ib-line'>" + escape(render_ib_with_tridots(ib_toks)) + "</div>" |
| | return ni_text, ib_html |
| |
|
| | def translate_ni_to_es_bi(text:str): |
| | toks = simple_tokenize(text) |
| |
|
| | if MODAL_SUFFIX_ENABLE: |
| | toks = strip_modal_suffixes_ni(toks) |
| |
|
| | out=[] |
| | i=0 |
| | while i < len(toks): |
| | t = toks[i] |
| | if t in VISIBLE_PUNCT: |
| | out.append(t); i+=1; continue |
| | if is_placeholder(t): |
| | out.append(t); i+=1; continue |
| | span, es_surface = _longest_match(toks, i, NIPHRASE2ES) |
| | if span > 1: |
| | out.append(es_surface); i += span; continue |
| |
|
| | key = lower(t) |
| | if key in NI2ES: |
| | es = NI2ES[key][0] or "" |
| | out.append(es if es else f"[?:{t}]") |
| | elif key in AMBIG_NI and STRICT_BI_ENFORCE: |
| | |
| | out.append(f"[AMB-NI:{t}]") |
| | elif is_number(key): |
| | out.append(t) |
| | else: |
| | out.append(f"[?:{t}]") |
| | i += 1 |
| |
|
| | if MODAL_SUFFIX_ENABLE: |
| | out = add_inverted_openers(out) |
| |
|
| | es_text = detokenize(out) |
| | es_text = postprocess_spanish(es_text) |
| | return es_text |
| |
|
| | |
| | def diagnose_text(text, dir_label): |
| | if not text or not text.strip(): |
| | return "<em>Introduce texto para diagnosticar.</em>" |
| |
|
| | toks = simple_tokenize(text) |
| | unknown=set(); asym=set(); amb=set() |
| | total_tokens=0; covered=0 |
| |
|
| | if dir_label.startswith("ES"): |
| | head = "ES→NI" |
| | i=0 |
| | while i < len(toks): |
| | t = toks[i] |
| | if t in VISIBLE_PUNCT or is_number(t): |
| | i+=1; continue |
| | total_tokens += 1 |
| | span, _ = _longest_match(toks, i, ESPHRASE2NI) |
| | if span > 1: |
| | covered += 1; i += span; continue |
| | k=lower(t) |
| | if k not in ES2NI: |
| | unknown.add(t); i+=1; continue |
| | covered += 1 |
| | ni = ES2NI[k][0] |
| | back = NI2ES.get(lower(ni)) |
| | if back and lower(back[0]) != k: |
| | asym.add(f"{t} → {ni} → {back[0]}") |
| | i+=1 |
| | else: |
| | head = "NI→ES" |
| | i=0 |
| | while i < len(toks): |
| | t = toks[i] |
| | if t in VISIBLE_PUNCT or is_number(t): |
| | i+=1; continue |
| | total_tokens += 1 |
| | span, _ = _longest_match(toks, i, NIPHRASE2ES) |
| | if span > 1: |
| | covered += 1; i += span; continue |
| | k=lower(t) |
| | if k in AMBIG_NI: |
| | amb.add(t); i+=1; continue |
| | if k not in NI2ES: |
| | unknown.add(t); i+=1; continue |
| | covered += 1 |
| | es = NI2ES[k][0] |
| | back = ES2NI.get(lower(es)) |
| | if back and lower(back[0]) != k: |
| | asym.add(f"{t} → {es} → {back[0]}") |
| | i+=1 |
| |
|
| | cov_pct = (covered/total_tokens*100) if total_tokens else 100.0 |
| | cov_html = f"<div><b>Tokens (sin puntuación/numéricos):</b> {total_tokens} | <b>Cubiertos:</b> {covered} ({cov_pct:.1f}%)</div>" |
| |
|
| | unk_html = "".join(f"<li><code>{escape(u)}</code></li>" for u in sorted(unknown, key=lambda x: lower(x))) or "<li><i>—</i></li>" |
| | amb_html = "".join(f"<li><code>{escape(a)}</code></li>" for a in sorted(amb, key=lambda x: lower(x))) or "<li><i>—</i></li>" |
| | asy_html = "".join(f"<li><code>{escape(a)}</code></li>" for a in sorted(asym)) or "<li><i>—</i></li>" |
| |
|
| | return f"<b>Diagnóstico {head}</b>{cov_html}<b>Ambiguas (NI duplicada):</b><ul>{amb_html}</ul><b>Faltantes:</b><ul>{unk_html}</ul><b>Asimetrías:</b><ul>{asy_html}</ul>" |
| |
|
| | |
| | LABELS={ |
| | "ES":{ |
| | "title":"Traductor Español ↔ Neoíbero", |
| | "subtitle":"CSV estricto (BI-only 1:1; sin heurísticas; .gz) — determinista", |
| | "in_label_es":"✏️ Entrada (Español)", |
| | "in_label_ni":"✏️ Entrada (Neoíbero)", |
| | "in_ph_es":"Escribe aquí. Ej.: Veo a Ana y doy pan a Marta.", |
| | "in_ph_ni":"Idatzi hemen. Adib.: nuker-ke ni etxe-ka.", |
| | "out_lat_esni":"📜 Salida: Neoíbero (latín)", |
| | "out_lat_nies":"📜 Salida: Español", |
| | "out_ib":"🗿 Línea ibérica", |
| | "out_audio":"🔊 Locución (Audio)", |
| | "btn":"🔄 Traducir", |
| | "combo":"🌍 Idioma (UI + explicación)", |
| | "dir":"🔁 Dirección", |
| | "dir_opts":["ES → NI","NI → ES"], |
| | "doc_header":"📚 Documentación y Referencia", |
| | "acc_titles":[ |
| | "🎓 Marco académico y decisiones del neoíbero", |
| | "🏛️ Herencia posible del íbero histórico", |
| | "🎨 Diseño de la conlang (neoíbero)", |
| | "⚙️ Pipeline del traductor (BI-estricto 1:1)", |
| | "🔤 Ortografía, línea ibérica y claves", |
| | "❓/❗ Modalidad vascoide (-na / -ba)", |
| | "🧩 Expansiones por CSV: plurales (S) y 3pl (3/V3)", |
| | "📖 Gramática de referencia (v1.2)", |
| | "📚 Bibliografía de base", |
| | "🧾 Siglas y glosario", |
| | "🔗 Simetría por pair_id (modo bilingüe)" |
| | ] |
| | }, |
| | "EN":{ |
| | "title":"Spanish ↔ Neo-Iberian Translator", |
| | "subtitle":"Strict BI-only (1:1 surfaces; no heuristics; .gz) — deterministic", |
| | "in_label_es":"✏️ Input (Spanish)", |
| | "in_label_ni":"✏️ Input (Neo-Iberian)", |
| | "in_ph_es":"Type here. E.g., Veo a Ana y doy pan a Marta.", |
| | "in_ph_ni":"Type here. E.g., nuker-ke ni etxe-ka.", |
| | "out_lat_esni":"📜 Output: Neo-Iberian (Latin)", |
| | "out_lat_nies":"📜 Output: Spanish", |
| | "out_ib":"🗿 Iberian line", |
| | "out_audio":"🔊 Speech (Audio)", |
| | "btn":"🔄 Translate", |
| | "combo":"🌍 Language (UI + docs)", |
| | "dir":"🔁 Direction", |
| | "dir_opts":["ES → NI","NI → ES"], |
| | "doc_header":"📚 Documentation & Reference", |
| | "acc_titles":[ |
| | "🎓 Background & design choices", |
| | "🏛️ Possible inheritance from ancient Iberian", |
| | "🎨 Conlang design (Neo-Iberian)", |
| | "⚙️ Translator pipeline (strict 1:1)", |
| | "🔤 Orthography, Iberian line & keys", |
| | "❓/❗ Vascoid modality (-na / -ba)", |
| | "🧩 CSV-driven expansions: plurals (S) & 3pl (3/V3)", |
| | "📖 Reference grammar (v1.2)", |
| | "📚 Core references", |
| | "🧾 Acronyms & glossary", |
| | "🔗 Pair-id symmetry (bilingual mode)" |
| | ] |
| | } |
| | } |
| | DOC = { |
| | "ES":[ |
| | "**Escritura y datos.** Un **único CSV con `pair_id`** y superficies exactas. La traducción ES↔NI es **1:1** por superficie.", |
| | "**Herencia plausible del íbero.** Fonotaxis CV(C); p→b; r/ŕ; casos -k/-te/-ka/-ar/-en/-i.", |
| | "**Diseño del neoíbero.** TAM: PRS -ke, PST -bo, FUT -ta, IPFV -ri, IMP -tu, COND/SBJV -ni, FUT_SBJV -ra.", |
| | "**Pipeline (BI-estricto 1:1).** Tokeniza; sustitución exacta; NI ambigua **se bloquea** y sale como `[AMB-NI:…]`.", |
| | "**Ortografía y línea ibérica.** Tokens BA/BE/…; tridots '/'; p→b; codas N/S/Ś/R/Ŕ/L/M/K/T.", |
| | "**Modalidad (-na/-ba).** ES→NI puede omitir ¿?¡! (si está activo). NI→ES inserta `¿…?`/`¡…!` al final de la oración marcada, **no en comas**.", |
| | "**Expansiones por CSV (deterministas).** `flags=S` plural regular; `flags=3|V3` 3ª plural del presente. Solo si lo marcas.", |
| | "**Gramática mínima.** Visualización; la gramática no se “calcula”.", |
| | "**Bibliografía.** Untermann; de Hoz; Ferrer i Jané; Correa…", |
| | "**Glosario & datasets.** Faltas → `[SIN-LEX:…]` / `[?:…]`. Ambiguas → `[AMB-NI:…]` (limpia tu CSV).", |
| | "**Simetría por pair_id.** El diagnóstico avisa si una NI apunta a dos ES distintos." |
| | ], |
| | "EN":[ |
| | "One bilingual CSV with `pair_id` and exact surfaces. ES↔NI is strictly 1:1.", |
| | "Possible inheritance (non-palaeographic).", |
| | "Neo-Iberian design (phonology & morphology).", |
| | "Pipeline: tokenise → exact replacement. Ambiguous NI are **blocked** and rendered as `[AMB-NI:…]`.", |
| | "Orthography, Iberian line & keys.", |
| | "Modality (-na/-ba): ES→NI can drop ¿?¡!. NI→ES places them at sentence end, not at commas.", |
| | "CSV-driven expansions (deterministic): `S` plural; `3|V3` present 3pl.", |
| | "Minimal grammar (v1.2).", |
| | "Selected references.", |
| | "Glossary & datasets.", |
| | "Pair-id symmetry diagnostics." |
| | ] |
| | } |
| |
|
| | |
| | def build_css(): |
| | b64=None |
| | if os.path.exists("Iberia-Georgeos.ttf"): |
| | with open("Iberia-Georgeos.ttf","rb") as f: |
| | b64=base64.b64encode(f.read()).decode("ascii") |
| | font_src = f"url(data:font/ttf;base64,{b64}) format('truetype')" if b64 else "local('sans-serif')" |
| | return f""" |
| | @font-face {{ |
| | font-family: 'IberiaGeorgeos'; |
| | src: {font_src}; |
| | font-weight: normal; font-style: normal; |
| | }} |
| | :root {{ |
| | --iberian-clay:#8B4513; --iberian-ochre:#CC7722; --iberian-stone:#5C5C5C; |
| | --iberian-sand:#D2B48C; --iberian-rust:#A0522D; --iberian-bronze:#CD7F32; |
| | }} |
| | .gradio-container {{ background:linear-gradient(135deg,#f4e8d8 0%,#e8d5c4 50%,#d4c4b0 100%)!important; |
| | font-family:'Georgia','Times New Roman',serif!important; }} |
| | .gradio-container h1, .gradio-container h2, .gradio-container h3 {{ |
| | color:var(--iberian-clay)!important; text-shadow:2px 2px 4px rgba(139,69,19,.15)!important; |
| | border-bottom:3px solid var(--iberian-bronze)!important; padding-bottom:.5rem!important; letter-spacing:.5px!important; |
| | }} |
| | .gradio-container .gr-group {{ background:linear-gradient(to bottom,#f9f6f0,#ede6dc)!important; |
| | border:2px solid var(--iberian-sand)!important; border-radius:8px!important; box-shadow:0 4px 12px rgba(139,69,19,.2), inset 0 1px 0 rgba(255,255,255,.5)!important; |
| | padding:1.5rem!important; margin-bottom:1.5rem!important; }} |
| | .gradio-container .gr-accordion {{ background:linear-gradient(145deg,#ebe3d5,#d9cec0)!important; |
| | border:2px solid var(--iberian-rust)!important; border-radius:6px!important; margin-bottom:.8rem!important; box-shadow:2px 2px 6px rgba(0,0,0,.15)!important; }} |
| | .gradio-container .gr-accordion .label-wrap {{ background:linear-gradient(to right,var(--iberian-ochre),var(--iberian-rust))!important; |
| | color:#fff!important; font-weight:600!important; padding:.8rem 1rem!important; border-radius:4px!important; text-shadow:1px 1px 2px rgba(0,0,0,.3)!important; }} |
| | .gradio-container .gr-textbox textarea, .gradio-container .gr-textbox input {{ background:linear-gradient(to bottom,#faf8f3,#f5f0e8)!important; |
| | border:2px solid var(--iberian-sand)!important; border-radius:6px!important; color:var(--iberian-stone)!important; |
| | font-family:'Georgia',serif!important; box-shadow:inset 2px 2px 4px rgba(139,69,19,.1)!important; }} |
| | .gradio-container .gr-textbox textarea:focus, .gradio-container .gr-textbox input:focus {{ |
| | border-color:var(--iberian-bronze)!important; box-shadow:inset 2px 2px 4px rgba(139,69,19,.1), 0 0 8px rgba(205,127,50,.3)!important; }} |
| | .gradio-container .gr-button.gr-button-primary {{ background:linear-gradient(145deg,var(--iberian-bronze),var(--iberian-rust))!important; |
| | border:2px solid var(--iberian-clay)!important; color:#fff!important; font-weight:bold!important; text-shadow:1px 2px 2px rgba(0,0,0,.4)!important; |
| | box-shadow:0 4px 8px rgba(139,69,19,.3), inset 0 1px 0 rgba(255,255,255,.2)!important; border-radius:8px!important; padding:.8rem 1.5rem!important; transition:all .3s ease!important; }} |
| | .gradio-container .gr-button.gr-button-primary:hover {{ background:linear-gradient(145deg,var(--iberian-rust),var(--iberian-bronze))!important; |
| | transform:translateY(-2px)!important; box-shadow:0 6px 12px rgba(139,69,19,.4)!important; }} |
| | .ib-line {{ font-family:'IberiaGeorgeos',monospace,sans-serif!important; font-size:1.9rem!important; line-height:2.4rem!important; white-space:pre-wrap!important; |
| | background:linear-gradient(135deg,#e8dcc8 0%,#d4c4a8 50%,#c4b098 100%)!important; padding:24px!important; border-radius:10px!important; |
| | border:3px solid var(--iberian-rust)!important; border-left:6px solid var(--iberian-bronze)!important; |
| | box-shadow:0 4px 15px rgba(139,69,19,.25), inset 0 2px 4px rgba(0,0,0,.1)!important; color:var(--iberian-clay)!important; position:relative!important; }} |
| | .ib-line::before {{ content:''!important; position:absolute!important; inset:0!important; |
| | background-image:repeating-linear-gradient(0deg,transparent,transparent 2px, rgba(139,69,19,.03) 2px, rgba(139,69,19,.03) 4px)!important; |
| | pointer-events:none!important; border-radius:10px!important; }} |
| | @media (max-width:768px) {{ |
| | .ib-line {{ font-size:1.5rem!important; line-height:2rem!important; padding:16px!important; }} |
| | .gradio-container .gr-group {{ padding:1rem!important; }} |
| | .gradio-container h1 {{ font-size:1.8rem!important; }} |
| | }} |
| | @media (max-width:480px) {{ |
| | .ib-line {{ font-size:1.3rem!important; line-height:1.8rem!important; padding:12px!important; }} |
| | .gradio-container h1 {{ font-size:1.5rem!important; }} |
| | }} |
| | """ |
| | CSS = build_css() |
| |
|
| | with gr.Blocks(css=CSS, theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple")) as demo: |
| | with gr.Group(): |
| | title = gr.Markdown(f"# {LABELS['ES']['title']}") |
| | subtitle = gr.Markdown(f"*{LABELS['ES']['subtitle']}*") |
| | with gr.Row(): |
| | combo = gr.Dropdown(choices=["ES","EN"], value="ES", label=LABELS["ES"]["combo"]) |
| | direction = gr.Radio(choices=LABELS["ES"]["dir_opts"], value="ES → NI", label=LABELS["ES"]["dir"]) |
| | with gr.Group(): |
| | doc_header = gr.Markdown(f"## {LABELS['ES']['doc_header']}") |
| | acc_titles = LABELS["ES"]["acc_titles"] |
| | with gr.Accordion(acc_titles[0], open=False) as acc1: md1 = gr.Markdown(DOC["ES"][0]) |
| | with gr.Accordion(acc_titles[1], open=False) as acc2: md2 = gr.Markdown(DOC["ES"][1]) |
| | with gr.Accordion(acc_titles[2], open=False) as acc3: md3 = gr.Markdown(DOC["ES"][2]) |
| | with gr.Accordion(acc_titles[3], open=False) as acc4: md4 = gr.Markdown(DOC["ES"][3]) |
| | with gr.Accordion(acc_titles[4], open=False) as acc5: md5 = gr.Markdown(DOC["ES"][4]) |
| | with gr.Accordion(acc_titles[5], open=False) as acc6: md6 = gr.Markdown(DOC["ES"][5]) |
| | with gr.Accordion(acc_titles[6], open=False) as acc7: md7 = gr.Markdown(DOC["ES"][6]) |
| | with gr.Accordion(acc_titles[7], open=False) as acc8: md8 = gr.Markdown(DOC["ES"][7]) |
| | with gr.Accordion(acc_titles[8], open=False) as acc9: md9 = gr.Markdown(DOC["ES"][8]) |
| | with gr.Accordion(acc_titles[9], open=False) as acc10: md10 = gr.Markdown(DOC["ES"][9]) |
| | with gr.Accordion(acc_titles[10], open=False) as acc11: md11 = gr.Markdown(DOC["ES"][10]) |
| | with gr.Accordion("🧪 Diagnóstico del CSV BI (al cargar)", open=False): |
| | bi_diag_box = gr.HTML(value=BI_DIAG_HTML) |
| |
|
| | with gr.Group(): |
| | es_in = gr.Textbox(label=LABELS["ES"]["in_label_es"], placeholder=LABELS["ES"]["in_ph_es"], lines=5) |
| | with gr.Row(): |
| | btn_tr = gr.Button(LABELS["ES"]["btn"], variant="primary") |
| | btn_diag = gr.Button("🔎 Diagnosticar BI con este texto", variant="secondary") |
| | with gr.Row(): |
| | with gr.Column(scale=2): |
| | ni_out = gr.Textbox(label=LABELS["ES"]["out_lat_esni"], lines=5, interactive=False) |
| | loc_btn = gr.Button("🔊 Locutar", variant="secondary", visible=True) |
| | audio_out = gr.Audio(label=LABELS["ES"]["out_audio"], type="numpy") |
| | with gr.Column(scale=1): |
| | ib_out = gr.HTML(label=LABELS["ES"]["out_ib"]) |
| | diag_out = gr.HTML(value="") |
| |
|
| | def do_translate(text, dir_label): |
| | if not text or not text.strip(): |
| | return (gr.update(value=""), |
| | gr.update(value="<div class='ib-line'></div>"), |
| | gr.update(visible=False), |
| | gr.update(value=None), |
| | gr.update(value="")) |
| | if dir_label.startswith("ES"): |
| | latin, ib = translate_es_to_ni_bi(text) |
| | return (gr.update(label=LABELS["ES"]["out_lat_esni"], value=latin), |
| | gr.update(value=ib), |
| | gr.update(visible=True), |
| | gr.update(value=None), |
| | gr.update(value="")) |
| | else: |
| | es_text = translate_ni_to_es_bi(text) |
| | return (gr.update(label=LABELS["ES"]["out_lat_nies"], value=es_text), |
| | gr.update(value="<div class='ib-line'></div>"), |
| | gr.update(visible=False), |
| | gr.update(value=None), |
| | gr.update(value="")) |
| |
|
| | btn_tr.click(do_translate, [es_in, direction], [ni_out, ib_out, loc_btn, audio_out, diag_out]) |
| |
|
| | def run_locution(latin_text, dir_label): |
| | if dir_label.startswith("ES"): |
| | return synthesize_speech(latin_text) |
| | return None |
| | loc_btn.click(run_locution, [ni_out, direction], audio_out) |
| |
|
| | def do_diagnose(text, dir_label): |
| | return gr.update(value=diagnose_text(text, dir_label)) |
| | btn_diag.click(do_diagnose, [es_in, direction], [diag_out]) |
| |
|
| | def switch_lang(sel_lang, dir_label): |
| | L=LABELS[sel_lang]; T=L["acc_titles"]; D=DOC[sel_lang] |
| | in_label = L["in_label_es"] if dir_label.startswith("ES") else L["in_label_ni"] |
| | in_ph = L["in_ph_es"] if dir_label.startswith("ES") else L["in_ph_ni"] |
| | out_lab = L["out_lat_esni"] if dir_label.startswith("ES") else L["out_lat_nies"] |
| | return ( |
| | gr.update(value=f"# {L['title']}"), |
| | gr.update(value=f"*{L['subtitle']}*"), |
| | gr.update(label=L["combo"], value=sel_lang), |
| | gr.update(label=L["dir"], choices=L["dir_opts"], value=dir_label), |
| | gr.update(value=f"## {L['doc_header']}"), |
| | gr.update(label=T[0]), gr.update(value=D[0]), |
| | gr.update(label=T[1]), gr.update(value=D[1]), |
| | gr.update(label=T[2]), gr.update(value=D[2]), |
| | gr.update(label=T[3]), gr.update(value=D[3]), |
| | gr.update(label=T[4]), gr.update(value=D[4]), |
| | gr.update(label=T[5]), gr.update(value=D[5]), |
| | gr.update(label=T[6]), gr.update(value=D[6]), |
| | gr.update(label=T[7]), gr.update(value=D[7]), |
| | gr.update(label=T[8]), gr.update(value=D[8]), |
| | gr.update(label=T[9]), gr.update(value=D[9]), |
| | gr.update(label=T[10]), gr.update(value=D[10]), |
| | gr.update(label=in_label, placeholder=in_ph), |
| | gr.update(label=out_lab), |
| | gr.update(label=L["out_ib"]), |
| | gr.update(label=L["out_audio"]), |
| | gr.update(value=L["btn"]) |
| | ) |
| | combo.change( |
| | switch_lang, |
| | [combo, direction], |
| | [title, subtitle, combo, direction, doc_header, |
| | acc1, md1, acc2, md2, acc3, md3, acc4, md4, acc5, md5, acc6, md6, acc7, md7, acc8, md8, acc9, md9, acc10, md10, acc11, md11, |
| | es_in, ni_out, ib_out, audio_out, btn_tr] |
| | ) |
| |
|
| | def switch_direction(dir_label, sel_lang): |
| | L=LABELS[sel_lang] |
| | in_label = L["in_label_es"] if dir_label.startswith("ES") else L["in_label_ni"] |
| | in_ph = L["in_ph_es"] if dir_label.startswith("ES") else L["in_ph_ni"] |
| | out_lab = L["out_lat_esni"] if dir_label.startswith("ES") else L["out_lat_nies"] |
| | loc_vis = True if dir_label.startswith("ES") else False |
| | return (gr.update(label=in_label, placeholder=in_ph), |
| | gr.update(label=out_lab, value=""), |
| | gr.update(value="<div class='ib-line'></div>"), |
| | gr.update(visible=loc_vis), |
| | gr.update(value=None), |
| | gr.update(value="")) |
| | direction.change( |
| | switch_direction, |
| | [direction, combo], |
| | [es_in, ni_out, ib_out, loc_btn, audio_out, diag_out] |
| | ) |
| |
|
| | |
| | def _symmetry_smoketest(): |
| | print("\n[SMOKE] Prueba ES↔NI (BI-estricto, determinista)…") |
| | probes = [ |
| | "nuker-ke ni etxe-ka ?", |
| | "¿Pagaste 12,75 en la cafetería?", |
| | "Marta llega a las 18:30.", |
| | "[SIN-LEX:Tomás]-na euŕak-ke !" |
| | ] |
| | for p in probes: |
| | es_from_ni = translate_ni_to_es_bi(p) |
| | ni_round, _ = translate_es_to_ni_bi(es_from_ni) |
| | print(" IN:", p) |
| | print(" ES:", es_from_ni) |
| | print(" NI:", ni_round) |
| | print("---") |
| |
|
| | if DEBUG_MODE: |
| | _symmetry_smoketest() |
| |
|
| | if __name__ == "__main__": |
| | demo.queue().launch() |
| |
|
| |
|
| |
|