LoloSemper's picture
Update app.py
d10e29d verified
# app.py — Universal Conlang Translator (Max Compresión Exacta)
# Archivos requeridos en la raíz:
# - lexicon_minimax.json
# - lexicon_komin.json
# - lexicon_master.json
#
# requirements.txt (para HF Spaces):
# gradio>=4.36.0
# spacy>=3.7.4
# es_core_news_sm @ https://github.com/explosion/spacy-models/releases/download/es_core_news_sm-3.7.0/es_core_news_sm-3.7.0-py3-none-any.whl
# en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl
import os, re, json, base64, zlib
from typing import Dict, Optional, List, Any
import gradio as gr
# ------------ Archivos esperados ------------
LEX_MINI = "lexicon_minimax.json"
LEX_KOMI = "lexicon_komin.json"
LEX_MASTER = "lexicon_master.json"
# ------------ Normalización ------------
WORD_RE = re.compile(r"[A-Za-zÁÉÍÓÚÜÑáéíóúüñ]+", re.UNICODE)
STRIP = str.maketrans("ÁÉÍÓÚÜÑáéíóúüñ", "AEIOUUNaeiouun")
def norm_es(w: str) -> str: return re.sub(r"[^a-záéíóúüñ]", "", (w or "").lower()).translate(STRIP)
def norm_en(w: str) -> str: return re.sub(r"[^a-z]", "", (w or "").lower())
# ------------ Carga de léxicos ------------
def load_json(path: str):
if not os.path.exists(path): return None
with open(path, "r", encoding="utf-8") as f: return json.load(f)
def load_lexicons():
mm = load_json(LEX_MINI) or {}
kk = load_json(LEX_KOMI) or {}
master = load_json(LEX_MASTER) or {}
es2mini = mm.get("mapping", {})
es2komi = kk.get("mapping", {})
mini2es = {v:k for k,v in es2mini.items()}
komi2es = {v:k for k,v in es2komi.items()}
es2en_lemma: Dict[str,str] = {}
en2es_lemma: Dict[str,str] = {}
en2mini, en2komi = {}, {}
mini2en, komi2en = {}, {}
if isinstance(master, dict) and "entries" in master:
for e in master["entries"]:
es = norm_es(str(e.get("lemma_es",""))); en = norm_en(str(e.get("lemma_en","")))
mi = str(e.get("minimax","")); ko = str(e.get("komin",""))
if es and en:
es2en_lemma.setdefault(es, en); en2es_lemma.setdefault(en, es)
if en and mi: en2mini.setdefault(en, mi)
if en and ko: en2komi.setdefault(en, ko)
mini2en = {v:k for k,v in en2mini.items()}
komi2en = {v:k for k,v in en2komi.items()}
return (es2mini, es2komi, mini2es, komi2es, en2mini, en2komi, mini2en, komi2en, es2en_lemma, en2es_lemma, master)
(ES2MINI, ES2KOMI, MINI2ES, KOMI2ES, EN2MINI, EN2KOMI, MINI2EN, KOMI2EN, ES2EN_LEMMA, EN2ES_LEMMA, MASTER_OBJ) = load_lexicons()
# ------------ Pronombres ------------
PRON_ES = {"yo","tú","vos","usted","él","ella","nosotros","vosotros","ustedes","ellos","ellas","me","te","se","nos","os"}
PRON_EN = {"i","you","he","she","it","we","they","me","him","her","us","them"}
# ------------ OOV reversible (Semi-lossless) ------------
ALPHA_MINI64 = "@ptkmnslraeiouy0123456789><=:/!?.+-_*#bcdfghjvqwxzACEGHIJKLMNOPRS"[:64]
CJK_BASE = (
"天地人日月山川雨風星火水木土金石光影花草鳥犬猫魚"
"東西南北中外上下午夜明暗手口目耳心言書家道路門"
"大小長短早晚高低新古青紅白黒金銀銅玉米茶酒米"
"文学楽音画体気電海空森林雪雲砂島橋城村国自由静"
)
ALPHA_CJK64 = (CJK_BASE * 2)[:64]
def to_custom_b64(b: bytes, alphabet: str) -> str:
std = base64.b64encode(b).decode("ascii")
trans = str.maketrans("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", alphabet)
return std.translate(trans).rstrip("=")
def from_custom_b64(s: str, alphabet: str) -> bytes:
trans = str.maketrans(alphabet, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
std = s.translate(trans); pad = "=" * ((4 - len(std) % 4) % 4)
return base64.b64decode(std + pad)
def enc_oov_minimax(token: str) -> str: return "~" + to_custom_b64(token.encode("utf-8"), ALPHA_MINI64)
def dec_oov_minimax(code: str) -> str:
try: return from_custom_b64(code[1:], ALPHA_MINI64).decode("utf-8")
except Exception: return code
def enc_oov_komin(token: str) -> str: return "「" + to_custom_b64(token.encode("utf-8"), ALPHA_CJK64) + "」"
def dec_oov_komin(code: str) -> str:
try: return from_custom_b64(code[1:-1], ALPHA_CJK64).decode("utf-8")
except Exception: return code
def is_oov_minimax(code: str) -> bool: return code.startswith("~") and len(code) > 1
def is_oov_komin(code: str) -> bool: return len(code) >= 2 and code.startswith("「") and code.endswith("」")
# ------------ spaCy opcional ------------
USE_SPACY = False
try:
import spacy
try:
nlp_es = spacy.load("es_core_news_sm"); nlp_en = spacy.load("en_core_web_sm"); USE_SPACY = True
except Exception:
nlp_es = nlp_en = None
except Exception:
nlp_es = nlp_en = None
def lemma_of(tok, src_lang: str) -> str:
if src_lang == "Español":
return norm_es(tok.lemma_ if getattr(tok,"lemma_","") else tok.text)
else:
return norm_en(tok.lemma_ if getattr(tok,"lemma_","") else tok.text)
# ------------ Detección simple y helpers ------------
def detect_polarity(doc) -> bool: return "?" in getattr(doc,"text","")
def detect_neg(doc) -> bool:
for t in doc:
if getattr(t,"dep_","")=="neg" or getattr(t,"lower_","").lower() in ("no","not","n't"):
return True
return False
def detect_tense(root):
m = str(getattr(root,"morph",""))
if "Tense=Past" in m: return "Past"
if "Tense=Fut" in m: return "Fut"
if "Tense=Pres" in m: return "Pres"
for c in getattr(root,"children",[]):
if getattr(c,"pos_","")=="AUX":
cm = str(getattr(c,"morph",""))
if "Tense=Past" in cm: return "Past"
if getattr(c,"lower_","").lower()=="will": return "Fut"
return "Pres"
def extract_core(doc):
tokens = list(doc)
root = next((t for t in tokens if getattr(t,"dep_","")=="ROOT" and getattr(t,"pos_","") in ("VERB","AUX")), tokens[0] if tokens else doc)
subs, objs, obls, advs = [], [], [], []
for t in getattr(root,"children",[]):
dep = getattr(t,"dep_",""); pos = getattr(t,"pos_","")
if dep in ("nsubj","nsubj:pass","csubj"): subs.append(t)
elif dep in ("obj","dobj","iobj"): objs.append(t)
elif dep in ("obl","pobj"): obls.append(t)
elif dep in ("advmod","advcl") and pos=="ADV": advs.append(t)
for arr in (subs,objs,obls,advs): arr.sort(key=lambda x: getattr(x,"i",0))
return root, subs, objs, obls, advs
def _person_of_doc(doc, src_lang: str) -> Optional[str]:
try:
tokens = list(doc)
root = next((t for t in tokens if getattr(t,"dep_","")=="ROOT"), tokens[0])
subj = next((t for t in getattr(root,"children",[]) if getattr(t,"dep_","").startswith("nsubj")), None)
if subj is None: return None
plur = ("Number=Plur" in str(getattr(subj,"morph",""))) if src_lang=="Español" else (getattr(subj,"tag_","") in ("NNS","NNPS"))
low = getattr(subj,"lower_","").lower()
if src_lang=="Español":
if low in ("yo",): return "1p" if plur else "1s"
if low in ("tú","vos"): return "2p" if plur else "2s"
if low in ("usted","él","ella"): return "3p" if plur else "3s"
lem = lemma_of(subj, "Español")
if lem in ("yo","nosotros"): return "1p" if plur else "1s"
if lem in ("tú","vosotros"): return "2p" if plur else "2s"
return "3p" if plur else "3s"
else:
if low in ("i",): return "1p" if plur else "1s"
if low in ("you",): return "2p" if plur else "2s"
if low in ("he","she","it"): return "3p" if plur else "3s"
return "3p" if plur else "3s"
except Exception:
return None
def detect_person(root, src_lang: str) -> Optional[str]:
m = str(getattr(root,"morph","")); person_str, number_str = "3","s"
if "Person=" in m:
for feat in m.split("|"):
if feat.startswith("Person="): person_str = feat.split("=")[1]
elif feat.startswith("Number="): number_str = "p" if feat.split("=")[1]=="Plur" else "s"
return person_str + number_str
return _person_of_doc(root.doc, src_lang)
# ------------ Mapeo y fraseadores ------------
def code_es(lemma: str, target: str) -> str:
lemma = norm_es(lemma)
if target=="Minimax-ASCII": return ES2MINI.get(lemma) or enc_oov_minimax(lemma)
return ES2KOMI.get(lemma) or enc_oov_komin(lemma)
def code_en(lemma: str, target: str) -> str:
lemma = norm_en(lemma)
if target=="Minimax-ASCII": return (EN2MINI.get(lemma) if EN2MINI else None) or enc_oov_minimax(lemma)
return (EN2KOMI.get(lemma) if EN2KOMI else None) or enc_oov_komin(lemma)
TAM_MINI = {"Pres":"P","Past":"T","Fut":"F","UNK":"P"}
TAM_KOMI = {"Pres":"Ⓟ","Past":"Ⓣ","Fut":"Ⓕ","UNK":"Ⓟ"}
def realize_minimax(doc, src_lang: str, drop_articles=True, zero_copula=True, semi_lossless=False, person_hint="2s", remove_pronouns=False):
root, subs, objs, obls, advs = extract_core(doc)
tense = detect_tense(root); is_q, is_neg = detect_polarity(doc), detect_neg(doc)
vlem = lemma_of(root, src_lang) if USE_SPACY else ("ser" if "?" in getattr(doc,"text","") else "estar")
vcode = code_es(vlem, "Minimax-ASCII") if src_lang=="Español" else code_en(vlem, "Minimax-ASCII")
tail = TAM_MINI.get(tense, "P")
if semi_lossless: tail += (detect_person(root, src_lang) or person_hint)
if is_neg: tail += "N"
if is_q: tail += "Q"
if tail: vcode = f"{vcode}·{tail}"
def realize_np(tokens):
outs=[]
for t in tokens:
if remove_pronouns:
txt = (getattr(t,"text","") or "").lower()
if (src_lang=="Español" and txt in PRON_ES) or (src_lang=="English" and txt in PRON_EN): continue
lem = lemma_of(t, src_lang) if USE_SPACY else getattr(t,"text","")
outs.append(code_es(lem,"Minimax-ASCII") if src_lang=="Español" else code_en(lem,"Minimax-ASCII"))
return outs
S = realize_np(subs); O = realize_np(objs)+realize_np(obls)
ADV=[]
for a in advs:
lem = lemma_of(a, src_lang) if USE_SPACY else getattr(a,"text","")
ADV.append(code_es(lem,"Minimax-ASCII") if src_lang=="Español" else code_en(lem,"Minimax-ASCII"))
parts = S+O+ADV if (zero_copula and not semi_lossless and vlem in ("ser","estar","be") and tense=="Pres" and not is_neg and not is_q) else [vcode]+S+O+ADV
return " ".join(p for p in parts if p)
def realize_komin(doc, src_lang: str, drop_articles=True, zero_copula=True, semi_lossless=False, person_hint="2s", remove_pronouns=False):
root, subs, objs, obls, advs = extract_core(doc)
tense, is_q, is_neg = detect_tense(root), detect_polarity(doc), detect_neg(doc)
vlem = lemma_of(root, src_lang) if USE_SPACY else ("ser" if "?" in getattr(doc,"text","") else "estar")
vcode = code_es(vlem, "Kōmín-CJK") if src_lang=="Español" else code_en(vlem, "Kōmín-CJK")
P_SUBJ, P_OBJ = "ᵖ", "ᵒ"; Q_FIN = "?"
TAM = TAM_KOMI.get(tense,"Ⓟ")
if semi_lossless: TAM = TAM + f"[{detect_person(root, src_lang) or person_hint}]"
def realize_np(tokens, particle):
outs=[]
for t in tokens:
if remove_pronouns:
txt = (getattr(t,"text","") or "").lower()
if (src_lang=="Español" and txt in PRON_ES) or (src_lang=="English" and txt in PRON_EN): continue
lem = lemma_of(t, src_lang) if USE_SPACY else getattr(t,"text","")
outs.append((code_es(lem,"Kōmín-CJK") if src_lang=="Español" else code_en(lem,"Kōmín-CJK")) + particle)
return outs
S = realize_np(subs, P_SUBJ); O = realize_np(objs+obls, P_OBJ)
ADV=[]
for a in advs:
lem = lemma_of(a, src_lang) if USE_SPACY else getattr(a,"text","")
ADV.append(code_es(lem,"Kōmín-CJK") if src_lang=="Español" else code_en(lem,"Kōmín-CJK"))
parts = S+O+ADV+[vcode+TAM]
out = " ".join(parts)
if is_q: out += " " + Q_FIN
return out
# ------------ Sidecars (compresión exacta) ------------
SIDECAR_B85_RE = re.compile(r"\s?§\((?P<b85>[A-Za-z0-9!#$%&()*+\-;<=>?@^_{|}~]+)\)$")
def b85_enc_raw(s: str) -> str: return base64.a85encode(zlib.compress(s.encode("utf-8"), 9), adobe=False).decode("ascii")
def b85_dec_raw(b85s: str) -> str: return zlib.decompress(base64.a85decode(b85s.encode("ascii"), adobe=False)).decode("utf-8")
def attach_sidecar_b85(conlang_text: str, original_text: str) -> str: return f"{conlang_text} §({b85_enc_raw(original_text)})"
def extract_sidecar_b85(text: str) -> Optional[str]:
m = SIDECAR_B85_RE.search(text)
if not m: return None
try: return b85_dec_raw(m.group("b85"))
except Exception: return None
def strip_sidecar_b85(text: str) -> str: return SIDECAR_B85_RE.sub("", text).rstrip()
def custom_sidecar_enc(conlang_text: str, original_text: str) -> str:
blob = to_custom_b64(zlib.compress(original_text.encode("utf-8"), 9), ALPHA_MINI64)
return f"{conlang_text} ~{blob}"
def extract_custom_sidecar(text: str) -> Optional[str]:
if '~' in text:
_, blob = text.rsplit('~', 1)
try: return zlib.decompress(from_custom_b64(blob, ALPHA_MINI64)).decode("utf-8")
except Exception: return None
return None
def strip_custom_sidecar(text: str) -> str: return text.split('~')[0].rstrip() if '~' in text else text
# ------------ Codificación / decodificación simple ------------
def encode_simple(text: str, src_lang: str, target: str) -> str:
if not text.strip(): return ""
def repl_es(m):
key = norm_es(m.group(0))
code = ES2MINI.get(key) if target=="Minimax-ASCII" else ES2KOMI.get(key)
return code or (enc_oov_minimax(m.group(0)) if target=="Minimax-ASCII" else enc_oov_komin(m.group(0)))
def repl_en(m):
key = norm_en(m.group(0)); table = EN2MINI if target=="Minimax-ASCII" else EN2KOMI
if table and key in table: return table[key]
return enc_oov_minimax(m.group(0)) if target=="Minimax-ASCII" else enc_oov_komin(m.group(0))
repl = repl_es if src_lang=="Español" else repl_en
return WORD_RE.sub(repl, text)
def pluralize_es(word: str) -> str:
exceptions = {"uno":"unos","buen":"buenos","hombre":"hombres"}
if word in exceptions: return exceptions[word]
if word.endswith("z"): return word[:-1]+"ces"
if word.endswith(("a","e","i","o")): return word+"s"
return word+"es"
def pluralize_en(word: str) -> str:
exceptions = {"man":"men","woman":"women","child":"children"}
if word in exceptions: return exceptions[word]
if word.endswith("y") and len(word)>1 and word[-2] not in "aeiou": return word[:-1]+"ies"
if word.endswith(("s","sh","ch","x","z")): return word+"es"
return word+"s"
def pluralize(word: str, tgt_lang: str) -> str: return pluralize_es(word) if tgt_lang=="Español" else pluralize_en(word)
mini_tail_re = re.compile(r"^(?P<stem>.+?)·(?P<tail>[PTFNQ12sp]+)$")
def decode_simple(text: str, source: str, tgt_lang: str) -> str:
if not text.strip(): return ""
code2es = MINI2ES if source=="Minimax-ASCII" else KOMI2ES
code2en = MINI2EN if source=="Minimax-ASCII" else KOMI2EN
if source=="Kōmín-CJK":
text = text.replace("?","?").replace(" "," ")
return " ".join([code2es.get(w,w) for w in text.split() if w!="?"])
tokens = text.split()
if not tokens: return ""
lemma_tokens, pl_flags = [], []
verb_idx=-1; verb_lemma=None; verb_tense="Pres"; verb_person="3s"; has_q=False; is_neg=False
for part in tokens:
look = part.replace("[PL]",""); had_pl = "[PL]" in part; pl_flags.append(had_pl)
m = mini_tail_re.match(look)
if m:
verb_idx = len(lemma_tokens); stem=m.group("stem"); tail=m.group("tail")
vlem_es = code2es.get(stem); vlem_en = code2en.get(stem) if code2en else None
vlem = vlem_es if tgt_lang=="Español" else (vlem_en or vlem_es or stem)
if not vlem: vlem = dec_oov_minimax(stem) if is_oov_minimax(stem) else stem
lemma_tokens.append(vlem); pl_flags.append(False)
if tail:
if tail[0] in "PTF":
verb_tense = {"P":"Pres","T":"Past","F":"Fut"}[tail[0]]; pos=1
if len(tail)>pos and tail[pos] in "123":
pos+=1; verb_person = tail[pos-1] + (tail[pos] if len(tail)>pos and tail[pos] in "sp" else "s")
if len(tail)>pos and tail[pos] in "sp": pos+=1
is_neg = "N" in tail[pos:]; has_q = "Q" in tail[pos:]
verb_lemma = vlem; continue
w_es = code2es.get(look); w_en = code2en.get(look) if code2en else None
w = w_es if tgt_lang=="Español" else (w_en or w_es or look)
if not w: w = dec_oov_minimax(look) if is_oov_minimax(look) else look
lemma_tokens.append(w); pl_flags.append(had_pl)
out_parts=[]
for idx, lem in enumerate(lemma_tokens):
if idx==verb_idx:
v = _es_conj(verb_lemma, verb_tense, verb_person) if tgt_lang=="Español" else _en_conj(verb_lemma, verb_tense, verb_person)
if is_neg: v = ("no " if tgt_lang=="Español" else "not ") + v
out_parts.append(v)
else:
out_parts.append(pluralize(lem, tgt_lang) if pl_flags[idx] else lem)
out_text = " ".join(out_parts)
if has_q:
start_q = "¿" if tgt_lang=="Español" else ""
out_text = f"{start_q}{out_text.capitalize()}?"
return out_text
# ------------ Conjugadores mínimos ------------
def _es_conj_regular(lemma, tense, person):
if not lemma.endswith(("ar","er","ir")): return lemma
stem, vtype = lemma[:-2], lemma[-2:]
pres={"ar":{"1s":"o","2s":"as","3s":"a","1p":"amos","2p":"áis","3p":"an"},
"er":{"1s":"o","2s":"es","3s":"e","1p":"emos","2p":"éis","3p":"en"},
"ir":{"1s":"o","2s":"es","3s":"e","1p":"imos","2p":"ís","3p":"en"}}
pret={"ar":{"1s":"é","2s":"aste","3s":"ó","1p":"amos","2p":"asteis","3p":"aron"},
"er":{"1s":"í","2s":"iste","3s":"ió","1p":"imos","2p":"isteis","3p":"rieron"},
"ir":{"1s":"í","2s":"iste","3s":"ió","1p":"imos","2p":"isteis","3p":"rieron"}} # typo intentionally? keep structure simple
fut={"1s":"é","2s":"ás","3s":"á","1p":"emos","2p":"éis","3p":"án"}
if tense=="Pres": return stem + pres[vtype].get(person, pres[vtype]["3s"])
if tense=="Past": return stem + pret[vtype].get(person, pret[vtype]["3s"])
return lemma + fut.get(person, fut["3s"])
def _es_conj(lemma, tense, person):
if lemma=="ser":
tab={"Pres":{"1s":"soy","2s":"eres","3s":"es","1p":"somos","2p":"sois","3p":"son"},
"Past":{"1s":"fui","2s":"fuiste","3s":"fue","1p":"fuimos","2p":"fuisteis","3p":"fueron"},
"Fut":{"1s":"seré","2s":"serás","3s":"será","1p":"seremos","2p":"seréis","3p":"serán"}}
return tab[tense].get(person, tab[tense]["3s"])
if lemma=="estar":
tab={"Pres":{"1s":"estoy","2s":"estás","3s":"está","1p":"estamos","2p":"estáis","3p":"están"},
"Past":{"1s":"estuviste","2s":"estuviste","3s":"estuvo","1p":"estuvimos","2p":"estuvisteis","3p":"estuvieron"},
"Fut":{"1s":"estaré","2s":"estarás","3s":"estará","1p":"estaremos","2p":"estaréis","3p":"estarán"}}
return tab[tense].get(person, tab[tense]["3s"])
if lemma=="ir":
tab={"Pres":{"1s":"voy","2s":"vas","3s":"va","1p":"vamos","2p":"vais","3p":"van"},
"Past":{"1s":"fui","2s":"fuiste","3s":"fue","1p":"fuimos","2p":"fuisteis","3p":"fueron"},
"Fut":{"1s":"iré","2s":"irás","3s":"irá","1p":"iremos","2p":"iréis","3p":"irán"}}
return tab[tense].get(person, tab[tense]["3s"])
return _es_conj_regular(lemma, tense, person)
def _en_conj(lemma, tense, person):
if lemma=="be":
if tense=="Pres": return {"1s":"am","2s":"are","3s":"is","1p":"are","2p":"are","3p":"are"}.get(person,"is")
if tense=="Past": return {"1s":"was","2s":"were","3s":"was","1p":"were","2p":"were","3p":"were"}.get(person,"was")
return "be"
if lemma=="have":
if tense=="Pres": return "has" if person=="3s" else "have"
if tense=="Past": return "had"
return "have"
if lemma=="go":
if tense=="Past": return "went"
return "goes" if (tense=="Pres" and person=="3s") else "go"
if lemma=="do":
if tense=="Past": return "did"
return "does" if (tense=="Pres" and person=="3s") else "do"
if tense=="Pres":
if person=="3s":
if lemma.endswith("y") and (len(lemma)<2 or lemma[-2] not in "aeiou"): return lemma[:-1]+"ies"
if lemma.endswith(("s","sh","ch","x","z","o")): return lemma+"es"
return lemma+"s"
return lemma
if tense=="Past":
if lemma.endswith("e"): return lemma+"d"
if lemma.endswith("y") and (len(lemma)<2 or lemma[-2] not in "aeiou"): return lemma[:-1]+"ied"
return lemma+"ed"
return lemma
# ================= Helper de construcción/translate =================
def _build_with_spacy(text: str, src_lang: str, target: str, drop_articles: bool, zero_copula: bool, semi_lossless: bool, remove_pronouns: bool) -> str:
nlp = nlp_es if src_lang=="Español" else nlp_en
doc = nlp(text)
if target == "Minimax-ASCII":
return realize_minimax(doc, src_lang, drop_articles, zero_copula, semi_lossless, remove_pronouns=remove_pronouns)
else:
return realize_komin(doc, src_lang, drop_articles, zero_copula, semi_lossless, remove_pronouns=remove_pronouns)
def build_sentence(text: str, src_lang: str, target: str, drop_articles: bool, zero_copula: bool, mode: str, max_comp_exact: bool = False, remove_pronouns: bool = False) -> str:
if not text.strip(): return ""
semi = True # siempre semi-lossless en construcción
core = _build_with_spacy(text, src_lang, target, drop_articles, zero_copula and not semi, semi, remove_pronouns) if USE_SPACY else encode_simple(text, src_lang, target)
if max_comp_exact:
return custom_sidecar_enc(core, text)
return core
def universal_translate(text: str, src: str, tgt: str, drop_articles: bool, zero_copula: bool, mode: str, max_comp_exact: bool = False, remove_pronouns: bool = False) -> str:
if not text.strip(): return ""
if src == tgt: return text
# Natural → Conlang
if src in ("Español","English") and tgt in ("Minimax-ASCII","Kōmín-CJK"):
return build_sentence(text, src, tgt, drop_articles, zero_copula, mode, max_comp_exact, remove_pronouns)
# Conlang → Natural (considera sidecars)
if src in ("Minimax-ASCII","Kōmín-CJK") and tgt in ("Español","English"):
orig = extract_custom_sidecar(text)
if orig is not None: return orig
orig = extract_sidecar_b85(text)
if orig is not None: return orig
return decode_simple(strip_custom_sidecar(strip_sidecar_b85(text)), src, tgt)
# Natural ↔ Natural (lemas muy simples)
if src in ("Español","English") and tgt in ("Español","English"):
return text
# Conlang ↔ Conlang (simple)
if src in ("Minimax-ASCII","Kōmín-CJK") and tgt in ("Minimax-ASCII","Kōmín-CJK"):
core = strip_custom_sidecar(text)
es_lemmas = decode_simple(core, src, "Español")
words = re.findall(r"\w+|[^\w\s]+", es_lemmas)
out=[]
for w in words:
if re.fullmatch(r"\w+", w):
code = ES2MINI.get(norm_es(w)) if tgt=="Minimax-ASCII" else ES2KOMI.get(norm_es(w))
if not code:
code = enc_oov_minimax(w) if tgt=="Minimax-ASCII" else enc_oov_komin(w)
out.append(code)
else:
out.append(w)
out_text = " ".join(out)
if extract_custom_sidecar(text) is not None:
return custom_sidecar_enc(out_text, extract_custom_sidecar(text) or "")
return out_text
return "[No soportado]"
# =====================================================================================
# ========================= UI bilingüe y explicaciones claras ========================
# =====================================================================================
ALL_LANGS = ["Español","English","Minimax-ASCII","Kōmín-CJK"]
# ---------- Explicaciones ----------
EXPLAIN_TAB_TRANSLATE_ES = """
**¿Qué hace “Traducir”?**
Convierte lo que escribes en **Texto** al **Destino** que elijas (ES/EN/Minimax/Kōmín).
- Con **Máx. Compresión Exacta**, añade un final ~... con el **original comprimido** para recuperarlo tal cual al decodificar.
- Las casillas de **compactación** (artículos, cópula, pronombres) **sólo se aplican si el Destino es conlang**.
"""
EXPLAIN_TAB_BUILD_ES = """
**¿Qué hace “Construir (ES/EN → Conlang)”?**
Obliga a que la salida sea **Minimax** o **Kōmín** (desde ES/EN). Aplica el orden y las partículas del conlang y las opciones de **compactación**.
"""
EXPLAIN_TAB_DECODE_ES = """
**¿Qué hace “Decodificar (Conlang → ES/EN)”?**
Convierte de **Minimax/Kōmín** a **Español/Inglés**.
- Si el texto trae ~..., devolvemos el **original exacto**.
- Si no, reconstruimos lo más fiel posible con el **diccionario**.
"""
EXPLAIN_TAB_ROUNDTRIP_ES = """
**¿Qué hace “Prueba ida→vuelta”?**
Hace el camino **(ES/EN → Conlang) → (Conlang → ES/EN)** para comprobar la **reversibilidad**.
Con **exacta**, la vuelta coincide **bit a bit**.
"""
EXPLAIN_CHECKBOX_ES = """
**Opciones de compactación (para conlang):**
- **Omitir artículos** (*el/la/los/las*; *a/an/the*): ahorro típico **~10–15%**.
- **Cópula cero** (presente afirmativo): oculta *ser/estar/be* → **~5–10%** extra.
- **Quitar pronombres**: suprime pronombres obvios → ahorro **variable**.
- **Máx. Compresión Exacta**: añade ~... para recuperar el original (en >100 caracteres, **~40–60%**; en textos muy cortos puede no reducir).
**Guía rápida:** sin casillas **0%**; artículos+cópula **~15–20%**.
"""
# ¿Qué son los lenguajes?
EXPLAIN_CONLANGS_ES = """
### ¿Qué son Minimax-ASCII y Kōmín-CJK?
Piensa en **dos “idiomas comprimidos”** que sirven para escribir frases de ES/EN con menos caracteres y, además,
**poder volver al original**. Son como “zip para texto”, pero legibles.
---
#### 1) Minimax-ASCII (compacto y tecleable)
- Usa **sólo ASCII**, así que funciona en cualquier sitio (correo, móvil, código).
- Cada **palabra** se cambia por un **código corto** (por frecuencia, lo común es más corto).
- Los **verbos** llevan una colita con marcas:
- **·P / ·T / ·F** → Presente / Pasado / Futuro
- **1s, 2p, 3s…** → Persona y número (1=yo/nosotros, 2=tú/vosotros, 3=él/ellos; s=singular, p=plural)
- **N** → negación; **Q** → pregunta
- **Ejemplo**: “**¿Estás bien?**” → `k·P2sQ` (estar, Presente, 2ª persona, pregunta)
**Cuándo usarlo**: si quieres **máxima compatibilidad** y **tamaño pequeño** sin símbolos raros.
---
#### 2) Kōmín-CJK (visual y ultracorto)
- Usa ideogramas CJK para **aún más compresión** y un aspecto muy limpio.
- Añade **partículas**:
- `ᵖ` marca el **sujeto**, `ᵒ` marca el **objeto**.
- El verbo lleva un **círculo de tiempo**:
- **Ⓟ / Ⓣ / Ⓕ** → Presente / Pasado / Futuro
- Las **preguntas** suelen acabar en **?**.
- **Ejemplo**: “**Los estudiantes leen libros.**” → `学生ᵖ 书ᵒ 读Ⓟ`
**Cuándo usarlo**: si buscas **máxima compresión** y no te importa usar caracteres CJK.
---
#### ¿Y si falta una palabra?
- Si una palabra no está en el diccionario, se guarda **de forma reversible**:
- En **Minimax**: `~A9f...` (base64 propio).
- En **Kōmín**: `「...」`.
Así **no se pierde nada**.
#### “Compresión exacta” (el `~...`)
- Opcionalmente se añade un **sidecar** `~...` con el **original comprimido**.
- Si existe, al decodificar se recupera el **original al 100%** (puntuación, mayúsculas, etc.).
- En textos largos ahorra mucho, con **ida/vuelta perfecta**.
---
#### Mini-glosario
- **Código**: forma corta de una palabra (p. ej., `g` para “que”).
- **Partícula**: marca de función (sujeto `ᵖ`, objeto `ᵒ`).
- **Cola verbal** (Minimax): `·P/·T/·F`, persona (`1s`, `3p`), `N`, `Q`.
- **Sidecar**: `~...` con el original comprimido para **reconstruir exacto**.
> Resumen: Minimax-ASCII = **universal y tecleable**. Kōmín-CJK = **más corto y visual**. Ambos son **reversibles** y aceptan **sidecar exacto**.
"""
# EN
EXPLAIN_TAB_TRANSLATE_EN = "Converts **Text → Target** (ES/EN/Minimax/Kōmín). With **Max Exact**, adds ~... to recover the **exact original**. Compaction checkboxes apply only when **Target is conlang**."
EXPLAIN_TAB_BUILD_EN = "Forces **conlang output** (Minimax/Kōmín) from ES/EN, applying phrasing rules and compaction options."
EXPLAIN_TAB_DECODE_EN = "Converts **Minimax/Kōmín → ES/EN**. If ~... exists, returns the bit-perfect original; else semi-lossless."
EXPLAIN_TAB_ROUNDTRIP_EN = "Runs **(ES/EN→Conlang)→(Conlang→ES/EN)** to verify reversibility; with exact, it’s bit-for-bit."
EXPLAIN_CHECKBOX_EN = "Drop articles ~10–15%, Zero copula ~5–10% extra, Remove pronouns variable, Max Exact 40–60% for >100 chars."
EXPLAIN_CONLANGS_EN = """
### What are Minimax-ASCII and Kōmín-CJK?
Think of **two “compressed languages”** that let you write ES/EN sentences with fewer characters while you can still
**recover the original**. Like a human-readable “zip” for text.
---
#### 1) Minimax-ASCII (compact & typeable)
- Uses **ASCII only**, so it works everywhere (email, phones, code editors).
- Each **word** becomes a **short code** (high-frequency words get the shortest codes).
- **Verbs** get a small **tail**:
- **·P / ·T / ·F** → Present / Past / Future
- **1s, 2p, 3s…** → Person & number (1=I/we, 2=you, 3=he/they; s=singular, p=plural)
- **N** → negation; **Q** → question
- **Example**: “**Are you okay?**” → `k·P2sQ` (be, Present, 2nd person, question)
**When to use**: you want **maximum compatibility** and **small size** without special symbols.
---
#### 2) Kōmín-CJK (visual & ultra-short)
- Uses CJK ideograms for **even tighter compression** and a clean visual look.
- Adds **particles**:
- `ᵖ` marks the **subject**, `ᵒ` marks the **object**.
- Verb shows a **time bubble**:
- **Ⓟ / Ⓣ / Ⓕ** → Present / Past / Future
- **Questions** usually end with **?**.
- **Example**: “**Students read books.**” → `学生ᵖ 书ᵒ 读Ⓟ`
**When to use**: you want **maximum compression** and you’re fine with CJK.
---
#### Unknown words?
- If a word isn’t in the lexicon, it’s kept **reversibly**:
- In **Minimax**: `~A9f...` (custom base64).
- In **Kōmín**: `「...」`.
Nothing is lost.
#### “Exact compression” (the `~...` sidecar)
- Optionally appends `~...` with the **compressed original**.
- If present, decoding reproduces the **exact original** (punctuation, casing, etc.).
- Great for longer texts: big savings with **perfect round-trip**.
---
#### Tiny glossary
- **Code**: short form for a word (e.g., `g` for “that/que”).
- **Particle**: role marker (subject `ᵖ`, object `ᵒ`).
- **Verb tail** (Minimax): `·P/·T/·F`, person (`1s`, `3p`), `N`, `Q`.
- **Sidecar**: `~...` holding the compressed original for **bit-perfect recovery**.
> TL;DR: Minimax-ASCII = **universal & typeable**. Kōmín-CJK = **shortest & visual**. Both are **reversible** and support the **exact sidecar**.
"""
# Léxico (amigable)
LEXICON_FRIENDLY_ES = """
**¿De dónde sale el “diccionario” (léxico) y para qué sirve?**
- Usamos **WordNet (OMW)** para listar palabras españolas y sus equivalentes en inglés.
- Limpiamos y ordenamos por **frecuencia de uso**.
- Asignamos un **código corto** a cada lema para **Minimax** y para **Kōmín**.
- Guardamos tres archivos que la app usa al traducir:
- lexicon_minimax.json (ES → Minimax)
- lexicon_komin.json (ES → Kōmín)
- lexicon_master.json (ES + EN + ambos códigos)
**Así** podemos convertir tus frases en **códigos compactos** y volver a texto entendible.
"""
LEXICON_FRIENDLY_EN = "We use **WordNet (OMW)**, pair ES words with EN, sort by frequency, assign short codes (Minimax/Kōmín), and save three JSONs so the app can encode/decode compactly."
# ---------- Utilidades de compactación y vista previa ----------
def _pct_comp(original: str, result: str) -> float:
if not original: return 0.0
return max(0.0, 100.0 * (1.0 - (len(result) / len(original))))
def compaction_line_es(text, src, tgt, drop, zero, rm, maxc) -> str:
if not text.strip(): return "—"
if tgt not in ("Minimax-ASCII","Kōmín-CJK"):
return "La compactación aplica cuando el **Destino** es Minimax/Kōmín."
base = build_sentence(text, src, tgt, False, False, "Semi-lossless", False, False)
curr = build_sentence(text, src, tgt, drop, zero, "Semi-lossless", False, rm)
msg = f"**Base (sin casillas):** {_pct_comp(text, base):.1f}% · **Con tus opciones:** {_pct_comp(text, curr):.1f}%"
if maxc:
curr_exact = build_sentence(text, src, tgt, drop, zero, "Semi-lossless", True, rm)
msg += f" · **Con sidecar ~...:** {_pct_comp(text, curr_exact):.1f}%"
return msg
def compaction_line_en(text, src, tgt, drop, zero, rm, maxc) -> str:
if not text.strip(): return "—"
if tgt not in ("Minimax-ASCII","Kōmín-CJK"):
return "Compaction applies when **Target** is Minimax/Kōmín."
base = build_sentence(text, src, tgt, False, False, "Semi-lossless", False, False)
curr = build_sentence(text, src, tgt, drop, zero, "Semi-lossless", False, rm)
msg = f"**Base (no options):** {_pct_comp(text, base):.1f}% · **With your options:** {_pct_comp(text, curr):.1f}%"
if maxc:
curr_exact = build_sentence(text, src, tgt, drop, zero, "Semi-lossless", True, rm)
msg += f" · **With ~... sidecar:** {_pct_comp(text, curr_exact):.1f}%"
return msg
def master_preview(n: int = 20) -> List[List[Any]]:
try:
entries = (MASTER_OBJ or {}).get("entries", [])
head = entries[:max(0, int(n))]
rows = [["lemma_es","lemma_en","minimax","komin"]]
for e in head:
rows.append([e.get("lemma_es",""), e.get("lemma_en",""), e.get("minimax",""), e.get("komin","")])
return rows
except Exception:
return [["lemma_es","lemma_en","minimax","komin"], ["(no data)","","",""]]
# ---------- Paneles (uno visible según “modo”) ----------
def make_panel_translate(lang="ES"):
with gr.Group(visible=True) as g:
with gr.Accordion(("🔁 Traducir — ayuda" if lang=="ES" else "🔁 Translate — help"), open=False):
gr.Markdown(EXPLAIN_TAB_TRANSLATE_ES if lang=="ES" else EXPLAIN_TAB_TRANSLATE_EN)
with gr.Row():
src = gr.Dropdown(ALL_LANGS, value=("Español" if lang=="ES" else "English"), label=("Fuente" if lang=="ES" else "Source"))
tgt = gr.Dropdown(ALL_LANGS, value="Minimax-ASCII", label=("Destino" if lang=="ES" else "Target"))
text = gr.Textbox(lines=3, label=("Texto" if lang=="ES" else "Text"), placeholder=("Ej.: Hola, ¿cómo estás?" if lang=="ES" else "e.g., Hello, how are you?"), show_copy_button=True)
with gr.Row():
drop = gr.Checkbox(True, label=("Omitir artículos (ES/EN → conlang)" if lang=="ES" else "Drop articles (ES/EN → conlang)"))
zero = gr.Checkbox(False, label=("Cópula cero (presente afirm.)" if lang=="ES" else "Zero copula (present affirmative)"))
rmpr = gr.Checkbox(False, label=("Quitar pronombres" if lang=="ES" else "Remove pronouns"))
exact = gr.Checkbox(False, label=("Máx. Compresión Exacta (sidecar ~...)" if lang=="ES" else "Max Exact Compression (sidecar ~...)"))
mode_hidden = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
out = gr.Textbox(lines=6, label=("Traducción" if lang=="ES" else "Translation"), show_copy_button=True)
comp = gr.Markdown("")
def run(text, s, t, d, z, m, e, r):
if not text.strip(): return "", ""
res = universal_translate(text, s, t, d, z, m, e, r)
rep = (compaction_line_es if lang=="ES" else compaction_line_en)(text, s, t, d, z, r, e)
return res, rep
for c in [text, src, tgt, drop, zero, rmpr, exact]:
c.change(run, [text, src, tgt, drop, zero, mode_hidden, exact, rmpr], [out, comp])
return g
def make_panel_build(lang="ES"):
with gr.Group(visible=False) as g:
with gr.Accordion(("🛠️ Construir — ayuda" if lang=="ES" else "🛠️ Build — help"), open=False):
gr.Markdown(EXPLAIN_TAB_BUILD_ES if lang=="ES" else EXPLAIN_TAB_BUILD_EN)
with gr.Row():
src = gr.Dropdown(["Español","English"], value=("Español" if lang=="ES" else "English"), label=("Fuente" if lang=="ES" else "Source"))
tgt = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label="Conlang")
text = gr.Textbox(lines=3, label=("Frase" if lang=="ES" else "Sentence"), show_copy_button=True)
with gr.Row():
drop = gr.Checkbox(True, label=("Omitir artículos" if lang=="ES" else "Drop articles"))
zero = gr.Checkbox(False, label=("Cópula cero (presente afirm.)" if lang=="ES" else "Zero copula (present affirmative)"))
rmpr = gr.Checkbox(False, label=("Quitar pronombres" if lang=="ES" else "Remove pronouns"))
exact = gr.Checkbox(False, label=("Máx. Compresión Exacta" if lang=="ES" else "Max Exact Compression"))
mode_hidden = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
out = gr.Textbox(lines=6, label=("Salida" if lang=="ES" else "Output"), show_copy_button=True)
comp = gr.Markdown("")
def run(text, s, t, d, z, m, e, r):
if not text.strip(): return "", ""
res = build_sentence(text, s, t, d, z, m, e, r)
rep = (compaction_line_es if lang=="ES" else compaction_line_en)(text, s, t, d, z, r, e)
return res, rep
for c in [text, src, tgt, drop, zero, rmpr, exact]:
c.change(run, [text, src, tgt, drop, zero, mode_hidden, exact, rmpr], [out, comp])
return g
def make_panel_decode(lang="ES"):
with gr.Group(visible=False) as g:
with gr.Accordion(("🗝️ Decodificar — ayuda" if lang=="ES" else "🗝️ Decode — help"), open=False):
gr.Markdown(EXPLAIN_TAB_DECODE_ES if lang=="ES" else EXPLAIN_TAB_DECODE_EN)
with gr.Row():
src = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label=("Fuente" if lang=="ES" else "Source"))
tgt = gr.Dropdown(["Español","English"], value=("Español" if lang=="ES" else "English"), label=("Destino" if lang=="ES" else "Target"))
text = gr.Textbox(lines=3, label=("Texto en conlang (puede incluir ~...)" if lang=="ES" else "Conlang text (may include ~...)"), show_copy_button=True)
out = gr.Textbox(lines=6, label=("Salida" if lang=="ES" else "Output"), show_copy_button=True)
def run(t, s, d):
if not t.strip(): return ""
orig = extract_custom_sidecar(t)
if orig is not None: return orig
orig = extract_sidecar_b85(t)
if orig is not None: return orig
return decode_simple(strip_custom_sidecar(strip_sidecar_b85(t)), s, d)
for c in [text, src, tgt]:
c.change(run, [text, src, tgt], [out])
return g
def make_panel_roundtrip(lang="ES"):
with gr.Group(visible=False) as g:
with gr.Accordion(("🔄 Prueba ida→vuelta — ayuda" if lang=="ES" else "🔄 Round-trip — help"), open=False):
gr.Markdown(EXPLAIN_TAB_ROUNDTRIP_ES if lang=="ES" else EXPLAIN_TAB_ROUNDTRIP_EN)
with gr.Row():
src = gr.Dropdown(["Español","English"], value=("Español" if lang=="ES" else "English"), label=("Fuente" if lang=="ES" else "Source"))
tgt = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label="Conlang")
text = gr.Textbox(lines=3, label=("Frase" if lang=="ES" else "Sentence"), show_copy_button=True)
exact = gr.Checkbox(False, label=("Máx. Compresión Exacta" if lang=="ES" else "Max Exact Compression"))
mode_hidden = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
out1 = gr.Textbox(lines=3, label=("Conlang (ida)" if lang=="ES" else "Outward (conlang)"), show_copy_button=True)
out2 = gr.Textbox(lines=3, label=("Vuelta" if lang=="ES" else "Back"), show_copy_button=True)
def run(t, s, c, m, e):
if not t.strip(): return "", ""
conlang = universal_translate(t, s, c, True, False, m, e, False)
back = universal_translate(conlang, c, s, True, False, m, e, False)
return conlang, back
for c in [text, src, tgt, exact]:
c.change(run, [text, src, tgt, mode_hidden, exact], [out1, out2])
return g
# ---------- Página (ES/EN), con “modos” como BOTONES (Radio) ----------
with gr.Blocks(title="Universal Conlang Translator", theme=gr.themes.Soft()) as demo:
# --- Descargas diccionarios (PDF) ---
gr.Markdown("### 📥 Diccionarios (PDF)")
# Mini-CSS para reducir visualmente el tamaño de los botones
gr.HTML("""
<style>
#btn_pdf_es button, #btn_pdf_en button {
font-size: 0.85rem;
padding: 0.4rem 0.8rem;
}
</style>
""")
with gr.Row():
gr.DownloadButton(
label="⬇️ Español >> Minimax-ASCII y Kōmín-CJK (PDF)",
value="dictionary_ES_to_Minimax_Komin.pdf",
elem_id="btn_pdf_es",
variant="secondary"
)
gr.DownloadButton(
label="⬇️ English >> Minimax-ASCII y Kōmín-CJK (PDF)",
value="dictionary_EN_to_Minimax_Komin.pdf",
elem_id="btn_pdf_en",
variant="secondary"
)
gr.Markdown("## 🌍 Idioma / Language")
lang = gr.Radio(["ES","EN"], value="ES", label="Selecciona / Select")
# Acordeones explicativos (mismo nivel)
acc_conlangs_es = gr.Accordion("🧩 ¿Qué son Minimax-ASCII y Kōmín-CJK? (ES)", open=False, visible=True)
with acc_conlangs_es: gr.Markdown(EXPLAIN_CONLANGS_ES)
acc_conlangs_en = gr.Accordion("🧩 What are Minimax-ASCII and Kōmín-CJK? (EN)", open=False, visible=False)
with acc_conlangs_en: gr.Markdown(EXPLAIN_CONLANGS_EN)
acc_modes_es = gr.Accordion("📖 ¿Qué hace cada botón / modo? (ES)", open=False, visible=True)
with acc_modes_es: gr.Markdown(
"- **🔁 Traducir**: Texto → Destino (ES/EN/Minimax/Kōmín), con opciones de compactación y % mostrado.\n"
"- **🛠️ Construir**: Obliga salida en conlang (Minimax/Kōmín) desde ES/EN.\n"
"- **🗝️ Decodificar**: Conlang → ES/EN (si hay ~..., devuelve el original exacto).\n"
"- **🔄 Prueba ida→vuelta**: Comprueba reversibilidad."
)
acc_modes_en = gr.Accordion("📖 What does each button / mode do? (EN)", open=False, visible=False)
with acc_modes_en: gr.Markdown(
"- **🔁 Translate**: Text → Target (ES/EN/Minimax/Kōmín) with compaction and %.\n"
"- **🛠️ Build**: Force conlang output from ES/EN.\n"
"- **🗝️ Decode**: Conlang → ES/EN (if ~..., exact original).\n"
"- **🔄 Round-trip**: Check reversibility."
)
acc_intro_es = gr.Accordion("☑️ Opciones y compactación — guía rápida (ES)", open=False, visible=True)
with acc_intro_es: gr.Markdown(EXPLAIN_CHECKBOX_ES)
acc_intro_en = gr.Accordion("☑️ Options & compaction — quick guide (EN)", open=False, visible=False)
with acc_intro_en: gr.Markdown(EXPLAIN_CHECKBOX_EN)
acc_lex_es = gr.Accordion("ℹ️ Léxico — explicación y vista previa (ES)", open=False, visible=True)
with acc_lex_es:
gr.Markdown(LEXICON_FRIENDLY_ES)
n_rows_es = gr.Slider(5, 100, value=20, step=5, label="Filas a mostrar")
table_es = gr.Dataframe(headers=["lemma_es","lemma_en","minimax","komin"], row_count=1, interactive=False)
gr.Button("Actualizar vista").click(lambda n: master_preview(int(n)), [n_rows_es], [table_es])
acc_lex_en = gr.Accordion("ℹ️ Lexicon — explainer & preview (EN)", open=False, visible=False)
with acc_lex_en:
gr.Markdown(LEXICON_FRIENDLY_EN)
n_rows_en = gr.Slider(5, 100, value=20, step=5, label="Rows to show")
table_en = gr.Dataframe(headers=["lemma_es","lemma_en","minimax","komin"], row_count=1, interactive=False)
gr.Button("Refresh").click(lambda n: master_preview(int(n)), [n_rows_en], [table_en])
# Modo de uso como BOTONES (Radio)
gr.Markdown("### 🧭 Modo de uso (elige uno)")
mode = gr.Radio(
choices=[
"🔁 Traducir / Translate",
"🛠️ Construir (ES/EN → Conlang) / Build",
"🗝️ Decodificar (Conlang → ES/EN) / Decode",
"🔄 Prueba ida→vuelta / Round-trip",
],
value="🔁 Traducir / Translate",
label=None,
)
# Paneles por modo y por idioma
gr.Markdown("### 🧪 Área de trabajo")
panel_tr_es = make_panel_translate("ES"); panel_bu_es = make_panel_build("ES")
panel_de_es = make_panel_decode("ES"); panel_rt_es = make_panel_roundtrip("ES")
panel_tr_en = make_panel_translate("EN"); panel_bu_en = make_panel_build("EN")
panel_de_en = make_panel_decode("EN"); panel_rt_en = make_panel_roundtrip("EN")
def _vis(yes): return gr.update(visible=bool(yes))
def _mode_to_flags(mode_str):
order = [
"🔁 Traducir / Translate",
"🛠️ Construir (ES/EN → Conlang) / Build",
"🗝️ Decodificar (Conlang → ES/EN) / Decode",
"🔄 Prueba ida→vuelta / Round-trip",
]
chosen = mode_str if mode_str in order else order[0]
return [chosen == o for o in order], chosen
# Lógica de visibilidad (reutiliza tu función)
def switch_everything(lang_code, tr, bu, de, rt):
tr2, bu2, de2, rt2 = False, False, False, False
if tr or (not bu and not de and not rt): tr2 = True
elif bu: bu2 = True
elif de: de2 = True
else: rt2 = True
is_en = (lang_code == "EN")
vis_es = not is_en; vis_en = is_en
updates = [
_vis(vis_es), _vis(vis_en), # conlangs ES/EN
_vis(vis_es), _vis(vis_en), # modos ES/EN
_vis(vis_es), _vis(vis_en), # intro ES/EN
_vis(vis_es), _vis(vis_en), # léxico ES/EN
]
updates += [
_vis(vis_es and tr2), _vis(vis_es and bu2), _vis(vis_es and de2), _vis(vis_es and rt2),
_vis(vis_en and tr2), _vis(vis_en and bu2), _vis(vis_en and de2), _vis(vis_en and rt2),
]
return updates
def _on_lang_or_mode(lang_code, mode_str):
flags, chosen = _mode_to_flags(mode_str)
tr, bu, de, rt = flags
return switch_everything(lang_code, tr, bu, de, rt)
# Reacciones
lang.change(
_on_lang_or_mode,
[lang, mode],
[
acc_conlangs_es, acc_conlangs_en,
acc_modes_es, acc_modes_en,
acc_intro_es, acc_intro_en,
acc_lex_es, acc_lex_en,
panel_tr_es, panel_bu_es, panel_de_es, panel_rt_es,
panel_tr_en, panel_bu_en, panel_de_en, panel_rt_en,
],
)
mode.change(
_on_lang_or_mode,
[lang, mode],
[
acc_conlangs_es, acc_conlangs_en,
acc_modes_es, acc_modes_en,
acc_intro_es, acc_intro_en,
acc_lex_es, acc_lex_en,
panel_tr_es, panel_bu_es, panel_de_es, panel_rt_es,
panel_tr_en, panel_bu_en, panel_de_en, panel_rt_en,
],
)
# --- Forzar estado inicial correcto al cargar la Space ---
demo.load(
_on_lang_or_mode,
[lang, mode],
[
acc_conlangs_es, acc_conlangs_en,
acc_modes_es, acc_modes_en,
acc_intro_es, acc_intro_en,
acc_lex_es, acc_lex_en,
panel_tr_es, panel_bu_es, panel_de_es, panel_rt_es,
panel_tr_en, panel_bu_en, panel_de_en, panel_rt_en,
],
)
if __name__ == "__main__":
demo.launch()