Spaces:

LoloSemper
/

new_language_maximum_efficiency2

Sleeping

App Files Files Community

LoloSemper commited on Oct 6, 2025

Commit

eff6688

verified ·

1 Parent(s): 1054168

Update app.py

Browse files

Files changed (1) hide show

app.py +267 -360

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 # app.py — Universal Conlang Translator (Max Compresión Exacta)
-# UI bilingüe ES/EN + Botón de explicación de léxico + Acordeones plegables
-# Archivos requeridos:
 #  - lexicon_minimax.json
 #  - lexicon_komin.json
 #  - lexicon_master.json
@@ -23,18 +22,13 @@ LEX_MASTER = "lexicon_master.json"
 # ------------ Normalización ------------
 WORD_RE = re.compile(r"[A-Za-zÁÉÍÓÚÜÑáéíóúüñ]+", re.UNICODE)
 STRIP = str.maketrans("ÁÉÍÓÚÜÑáéíóúüñ", "AEIOUUNaeiouun")
-def norm_es(w: str) -> str:
-    return re.sub(r"[^a-záéíóúüñ]", "", (w or "").lower()).translate(STRIP)
-def norm_en(w: str) -> str:
-    return re.sub(r"[^a-z]", "", (w or "").lower())
 # ------------ Carga de léxicos ------------
 def load_json(path: str):
     if not os.path.exists(path): return None
-    with open(path, "r", encoding="utf-8") as f:
-        return json.load(f)
 def load_lexicons():
     mm = load_json(LEX_MINI) or {}
@@ -53,19 +47,15 @@ def load_lexicons():
     if isinstance(master, dict) and "entries" in master:
         for e in master["entries"]:
-            es = norm_es(str(e.get("lemma_es","")))
-            en = norm_en(str(e.get("lemma_en","")))
-            mi = str(e.get("minimax",""))
-            ko = str(e.get("komin",""))
             if es and en:
-                es2en_lemma.setdefault(es, en)
-                en2es_lemma.setdefault(en, es)
             if en and mi: en2mini.setdefault(en, mi)
             if en and ko: en2komi.setdefault(en, ko)
     mini2en = {v:k for k,v in en2mini.items()}
     komi2en = {v:k for k,v in en2komi.items()}
     return (es2mini, es2komi, mini2es, komi2es,
             en2mini, en2komi, mini2en, komi2en,
             es2en_lemma, en2es_lemma, master)
@@ -92,11 +82,9 @@ def to_custom_b64(b: bytes, alphabet: str) -> str:
     std = base64.b64encode(b).decode("ascii")
     trans = str.maketrans("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", alphabet)
     return std.translate(trans).rstrip("=")
 def from_custom_b64(s: str, alphabet: str) -> bytes:
     trans = str.maketrans(alphabet, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
-    std = s.translate(trans)
-    pad = "=" * ((4 - len(std) % 4) % 4)
     return base64.b64decode(std + pad)
 def enc_oov_minimax(token: str) -> str: return "~" + to_custom_b64(token.encode("utf-8"), ALPHA_MINI64)
@@ -115,9 +103,7 @@ USE_SPACY = False
 try:
     import spacy
     try:
-        nlp_es = spacy.load("es_core_news_sm")
-        nlp_en = spacy.load("en_core_web_sm")
-        USE_SPACY = True
     except Exception:
         nlp_es = nlp_en = None
 except Exception:
@@ -125,54 +111,48 @@ except Exception:
 def lemma_of(tok, src_lang: str) -> str:
     if src_lang == "Español":
-        return norm_es(tok.lemma_ if getattr(tok, "lemma_", "") else tok.text)
     else:
-        return norm_en(tok.lemma_ if getattr(tok, "lemma_", "") else tok.text)
-# ------------ Herramientas análisis simple ------------
-def detect_polarity(doc) -> bool: return "?" in doc.text
 def detect_neg(doc) -> bool:
     for t in doc:
-        if getattr(t, "dep_", "") == "neg" or getattr(t, "lower_", "").lower() in ("no","not","n't"):
             return True
     return False
 def detect_tense(root):
-    m = str(getattr(root, "morph", ""))
     if "Tense=Past" in m: return "Past"
     if "Tense=Fut"  in m: return "Fut"
     if "Tense=Pres" in m: return "Pres"
-    for c in getattr(root, "children", []):
-        if getattr(c, "pos_", "") == "AUX":
-            cm = str(getattr(c, "morph", ""))
             if "Tense=Past" in cm: return "Past"
-            if getattr(c, "lower_", "").lower() == "will": return "Fut"
     return "Pres"
 def extract_core(doc):
     tokens = list(doc)
-    root = next((t for t in tokens if getattr(t, "dep_", "")=="ROOT" and getattr(t, "pos_", "") in ("VERB","AUX")), tokens[0] if tokens else doc)
     subs, objs, obls, advs = [], [], [], []
-    for t in getattr(root, "children", []):
-        dep = getattr(t, "dep_", "")
-        pos = getattr(t, "pos_", "")
         if dep in ("nsubj","nsubj:pass","csubj"): subs.append(t)
         elif dep in ("obj","dobj","iobj"):        objs.append(t)
         elif dep in ("obl","pobj"):               obls.append(t)
         elif dep in ("advmod","advcl") and pos=="ADV": advs.append(t)
-    sortkey = lambda x: getattr(x, "i", 0)
-    for arr in (subs,objs,obls,advs): arr.sort(key=sortkey)
     return root, subs, objs, obls, advs
 def _person_of_doc(doc, src_lang: str) -> Optional[str]:
     try:
         tokens = list(doc)
-        root = next((t for t in tokens if getattr(t, "dep_", "")=="ROOT"), tokens[0])
-        subj = next((t for t in getattr(root, "children", []) if getattr(t, "dep_", "").startswith("nsubj")), None)
         if subj is None: return None
-        plur = ("Number=Plur" in str(getattr(subj, "morph",""))) if src_lang=="Español" else (getattr(subj, "tag_", "") in ("NNS","NNPS"))
-        low = getattr(subj, "lower_", "").lower()
         if src_lang=="Español":
             if low in ("yo",): return "1p" if plur else "1s"
             if low in ("tú","vos"): return "2p" if plur else "2s"
@@ -188,44 +168,39 @@ def _person_of_doc(doc, src_lang: str) -> Optional[str]:
             return "3p" if plur else "3s"
     except Exception:
         return None
 def detect_person(root, src_lang: str) -> Optional[str]:
-    m = str(getattr(root, "morph", ""))
-    person_str = "3"; number_str = "s"
     if "Person=" in m:
         for feat in m.split("|"):
             if feat.startswith("Person="): person_str = feat.split("=")[1]
-            elif feat.startswith("Number="): number_str = "p" if feat.split("=")[1] == "Plur" else "s"
         return person_str + number_str
     return _person_of_doc(root.doc, src_lang)
-# ------------ Mapeo lema→código y fraseadores ------------
 def code_es(lemma: str, target: str) -> str:
     lemma = norm_es(lemma)
-    return ES2MINI.get(lemma) if target=="Minimax-ASCII" else ES2KOMI.get(lemma) or (enc_oov_komin(lemma) if target!="Minimax-ASCII" else enc_oov_minimax(lemma))
 def code_en(lemma: str, target: str) -> str:
     lemma = norm_en(lemma)
-    if target == "Minimax-ASCII":
         return (EN2MINI.get(lemma) if EN2MINI else None) or enc_oov_minimax(lemma)
-    else:
-        return (EN2KOMI.get(lemma) if EN2KOMI else None) or enc_oov_komin(lemma)
-TAM_MINI = {"Pres":"P", "Past":"T", "Fut":"F", "UNK":"P"}
-TAM_KOMI = {"Pres":"Ⓟ", "Past":"Ⓣ", "Fut":"Ⓕ", "UNK":"Ⓟ"}
 def realize_minimax(doc, src_lang: str, drop_articles=True, zero_copula=True,
                     semi_lossless=False, person_hint="2s", remove_pronouns=False):
     root, subs, objs, obls, advs = extract_core(doc)
-    tense = detect_tense(root)
-    is_q, is_neg = detect_polarity(doc), detect_neg(doc)
     vlem  = lemma_of(root, src_lang) if USE_SPACY else ("ser" if "?" in getattr(doc,"text","") else "estar")
     vcode = code_es(vlem, "Minimax-ASCII") if src_lang=="Español" else code_en(vlem, "Minimax-ASCII")
     tail = TAM_MINI.get(tense, "P")
-    if semi_lossless:
-        pi = detect_person(root, src_lang) or person_hint
-        tail += pi
-    if is_neg: tail += "N"
     if is_q:   tail += "Q"
     if tail: vcode = f"{vcode}·{tail}"
@@ -234,24 +209,18 @@ def realize_minimax(doc, src_lang: str, drop_articles=True, zero_copula=True,
         for t in tokens:
             if remove_pronouns:
                 txt = (getattr(t,"text","") or "").lower()
-                if (src_lang=="Español" and txt in PRON_ES) or (src_lang=="English" and txt in PRON_EN):
-                    continue
-            lem  = lemma_of(t, src_lang) if USE_SPACY else (getattr(t,"text",""))
-            code = code_es(lem, "Minimax-ASCII") if src_lang=="Español" else code_en(lem, "Minimax-ASCII")
-            outs.append(code)
         return outs
-    S = realize_np(subs)
-    O = realize_np(objs) + realize_np(obls)
     ADV=[]
     for a in advs:
-        lem  = lemma_of(a, src_lang) if USE_SPACY else getattr(a,"text","")
-        ADV.append(code_es(lem, "Minimax-ASCII") if src_lang=="Español" else code_en(lem, "Minimax-ASCII"))
-    if zero_copula and not semi_lossless and vlem in ("ser","estar","be") and tense=="Pres" and not is_neg and not is_q:
-        parts = S + O + ADV
-    else:
-        parts = [vcode] + S + O + ADV
     return " ".join(p for p in parts if p)
 def realize_komin(doc, src_lang: str, drop_articles=True, zero_copula=True,
@@ -260,38 +229,27 @@ def realize_komin(doc, src_lang: str, drop_articles=True, zero_copula=True,
     tense, is_q, is_neg = detect_tense(root), detect_polarity(doc), detect_neg(doc)
     vlem  = lemma_of(root, src_lang) if USE_SPACY else ("ser" if "?" in getattr(doc,"text","") else "estar")
     vcode = code_es(vlem, "Kōmín-CJK") if src_lang=="Español" else code_en(vlem, "Kōmín-CJK")
-    P_SUBJ, P_OBJ = "ᵖ", "ᵒ"
-    NEG_M, Q_FIN = "̆", "？"
-    TAM = TAM_KOMI.get(tense, "Ⓟ")
-    if semi_lossless:
-        pi = detect_person(root, src_lang) or person_hint
-        TAM = TAM + f"[{pi}]"
     def realize_np(tokens, particle):
         outs=[]
         for t in tokens:
             if remove_pronouns:
                 txt = (getattr(t,"text","") or "").lower()
-                if (src_lang=="Español" and txt in PRON_ES) or (src_lang=="English" and txt in PRON_EN):
-                    continue
-            lem  = lemma_of(t, src_lang) if USE_SPACY else getattr(t,"text","")
-            code = code_es(lem, "Kōmín-CJK") if src_lang=="Español" else code_en(lem, "Kōmín-CJK")
-            outs.append(code + particle)
         return outs
-    S = realize_np(subs, P_SUBJ)
-    O = realize_np(objs + obls, P_OBJ)
     ADV=[]
     for a in advs:
-        lem  = lemma_of(a, src_lang) if USE_SPACY else getattr(a,"text","")
-        ADV.append(code_es(lem, "Kōmín-CJK") if src_lang=="Español" else code_en(lem, "Kōmín-CJK"))
-    v_form = vcode + TAM + (NEG_M if is_neg else "")
-    if zero_copula and not semi_lossless and vlem in ("ser","estar","be") and tense=="Pres" and not is_neg and not is_q:
-        parts = S + O + ADV
-    else:
-        parts = S + O + ADV + [v_form]
     out = " ".join(parts)
     if is_q: out += " " + Q_FIN
     return out
@@ -326,29 +284,25 @@ def encode_simple(text: str, src_lang: str, target: str) -> str:
         code = ES2MINI.get(key) if target=="Minimax-ASCII" else ES2KOMI.get(key)
         return code or (enc_oov_minimax(m.group(0)) if target=="Minimax-ASCII" else enc_oov_komin(m.group(0)))
     def repl_en(m):
-        key = norm_en(m.group(0))
-        table = EN2MINI if target=="Minimax-ASCII" else EN2KOMI
         if table and key in table: return table[key]
         return enc_oov_minimax(m.group(0)) if target=="Minimax-ASCII" else enc_oov_komin(m.group(0))
     repl = repl_es if src_lang=="Español" else repl_en
     return WORD_RE.sub(repl, text)
 def pluralize_es(word: str) -> str:
-    exceptions = {"uno": "unos", "buen": "buenos", "hombre": "hombres"}
     if word in exceptions: return exceptions[word]
-    if word.endswith("z"): return word[:-1] + "ces"
-    if word.endswith(("a","e","i","o")): return word + "s"
-    return word + "es"
 def pluralize_en(word: str) -> str:
     exceptions = {"man":"men","woman":"women","child":"children"}
     if word in exceptions: return exceptions[word]
-    if word.endswith("y") and len(word)>1 and word[-2] not in "aeiou": return word[:-1] + "ies"
-    if word.endswith(("s","sh","ch","x","z")): return word + "es"
-    return word + "s"
-def pluralize(word: str, tgt_lang: str) -> str:
-    return pluralize_es(word) if tgt_lang=="Español" else pluralize_en(word)
 mini_tail_re = re.compile(r"^(?P<stem>.+?)·(?P<tail>[PTFNQ12sp]+)$")
@@ -356,59 +310,41 @@ def decode_simple(text: str, source: str, tgt_lang: str) -> str:
     if not text.strip(): return ""
     code2es = MINI2ES if source=="Minimax-ASCII" else KOMI2ES
     code2en = MINI2EN if source=="Minimax-ASCII" else KOMI2EN
-    if source == "Kōmín-CJK":
-        text = text.replace("？","?").replace(" ", " ")
-        return " ".join([code2es.get(w, w) for w in text.split() if w != "?"])
-    tokens = text.split()
     if not tokens: return ""
-    lemma_tokens, pl_flags = [], []
-    verb_idx, verb_lemma, verb_tense, verb_person = -1, None, "Pres", "3s"
-    has_q, is_neg = False, False
     for part in tokens:
-        look = part.replace("[PL]","")
-        had_pl = "[PL]" in part
-        pl_flags.append(had_pl)
         m = mini_tail_re.match(look)
         if m:
-            verb_idx = len(lemma_tokens)
-            stem, tail = m.group("stem"), m.group("tail")
             vlem_es = code2es.get(stem); vlem_en = code2en.get(stem) if code2en else None
             vlem = vlem_es if tgt_lang=="Español" else (vlem_en or vlem_es or stem)
-            if not vlem:
-                vlem = dec_oov_minimax(stem) if is_oov_minimax(stem) else stem
             lemma_tokens.append(vlem); pl_flags.append(False)
             if tail:
                 if tail[0] in "PTF":
-                    verb_tense = {"P":"Pres","T":"Past","F":"Fut"}[tail[0]]
-                    pos=1
                     if len(tail)>pos and tail[pos] in "123":
-                        pos+=1
-                        verb_person = tail[pos-1] + (tail[pos] if len(tail)>pos and tail[pos] in "sp" else "s")
                         if len(tail)>pos and tail[pos] in "sp": pos+=1
-                    is_neg = "N" in tail[pos:]
-                    has_q  = "Q" in tail[pos:]
-            verb_lemma = vlem
-            continue
         w_es = code2es.get(look); w_en = code2en.get(look) if code2en else None
         w = w_es if tgt_lang=="Español" else (w_en or w_es or look)
         if not w: w = dec_oov_minimax(look) if is_oov_minimax(look) else look
         lemma_tokens.append(w); pl_flags.append(had_pl)
     out_parts=[]
     for idx, lem in enumerate(lemma_tokens):
-        if idx == verb_idx:
             v_conj = _es_conj(verb_lemma, verb_tense, verb_person) if tgt_lang=="Español" else _en_conj(verb_lemma, verb_tense, verb_person)
             if is_neg: v_conj = ("no " if tgt_lang=="Español" else "not ") + v_conj
             out_parts.append(v_conj)
         else:
             out_parts.append(pluralize(lem, tgt_lang) if pl_flags[idx] else lem)
     out_text = " ".join(out_parts)
     if has_q:
         start_q = "¿" if tgt_lang=="Español" else ""
@@ -419,17 +355,16 @@ def decode_simple(text: str, source: str, tgt_lang: str) -> str:
 def _es_conj_regular(lemma, tense, person):
     if not lemma.endswith(("ar","er","ir")): return lemma
     stem, vtype = lemma[:-2], lemma[-2:]
-    pres = {"ar":{"1s":"o","2s":"as","3s":"a","1p":"amos","2p":"áis","3p":"an"},
-            "er":{"1s":"o","2s":"es","3s":"e","1p":"emos","2p":"éis","3p":"en"},
-            "ir":{"1s":"o","2s":"es","3s":"e","1p":"imos","2p":"ís","3p":"en"}}
-    pret = {"ar":{"1s":"é","2s":"aste","3s":"ó","1p":"amos","2p":"asteis","3p":"aron"},
-            "er":{"1s":"í","2s":"iste","3s":"ió","1p":"imos","2p":"isteis","3p":"ieron"},
-            "ir":{"1s":"í","2s":"iste","3s":"ió","1p":"imos","2p":"isteis","3p":"ieron"}}
-    fut  = {"1s":"é","2s":"ás","3s":"á","1p":"emos","2p":"éis","3p":"án"}
     if tense=="Pres": return stem + pres[vtype].get(person, pres[vtype]["3s"])
     if tense=="Past": return stem + pret[vtype].get(person, pret[vtype]["3s"])
     return lemma + fut.get(person, fut["3s"])
 def _es_conj(lemma, tense, person):
     if lemma=="ser":
         tab={"Pres":{"1s":"soy","2s":"eres","3s":"es","1p":"somos","2p":"sois","3p":"son"},
@@ -447,7 +382,6 @@ def _es_conj(lemma, tense, person):
              "Fut":{"1s":"iré","2s":"irás","3s":"irá","1p":"iremos","2p":"iréis","3p":"irán"}}
         return tab[tense].get(person, tab[tense]["3s"])
     return _es_conj_regular(lemma, tense, person)
 def _en_conj(lemma, tense, person):
     if lemma=="be":
         if tense=="Pres": return {"1s":"am","2s":"are","3s":"is","1p":"are","2p":"are","3p":"are"}.get(person,"is")
@@ -481,23 +415,18 @@ def _build_with_spacy(text: str, src_lang: str, target: str,
                       remove_pronouns: bool) -> str:
     nlp = nlp_es if src_lang=="Español" else nlp_en
     doc = nlp(text)
-    if target == "Minimax-ASCII":
-        return realize_minimax(doc, src_lang, drop_articles, zero_copula, semi_lossless=semi_lossless,
-                               remove_pronouns=remove_pronouns)
     else:
-        return realize_komin(doc, src_lang, drop_articles, zero_copula, semi_lossless=semi_lossless,
-                             remove_pronouns=remove_pronouns)
 def build_sentence(text: str, src_lang: str, target: str,
                    drop_articles: bool, zero_copula: bool, mode: str,
                    max_comp_exact: bool = False, remove_pronouns: bool = False) -> str:
     if not text.strip(): return ""
-    semi = True  # siempre semi-lossless
     if USE_SPACY:
-        core = _build_with_spacy(text, src_lang, target, drop_articles, zero_copula and not semi,
-                                 semi_lossless=semi, remove_pronouns=remove_pronouns)
     else:
-        # Modo léxico simple: quitar pronombres por forma si procede
         if remove_pronouns:
             pron = PRON_ES if src_lang=="Español" else PRON_EN
             tokens = re.findall(r"\w+|[^\w\s]+", text)
@@ -526,24 +455,20 @@ def universal_translate(text: str, src: str, tgt: str,
         if orig is not None:
             core = strip_custom_sidecar(text)
             es_lemmas = decode_simple(core, src, "Español")
-            words = re.findall(r"\w+|[^\w\s]+", es_lemmas)
-            out=[]
             for w in words:
                 if re.fullmatch(r"\w+", w):
                     code = ES2MINI.get(norm_es(w)) if tgt=="Minimax-ASCII" else ES2KOMI.get(norm_es(w))
                     out.append(code or (enc_oov_minimax(w) if tgt=="Minimax-ASCII" else enc_oov_komin(w)))
-                else:
-                    out.append(w)
             return custom_sidecar_enc(" ".join(out), orig)
         es_lemmas = decode_simple(text, src, "Español")
-        words = re.findall(r"\w+|[^\w\s]+", es_lemmas)
-        out=[]
         for w in words:
             if re.fullmatch(r"\w+", w):
                 code = ES2MINI.get(norm_es(w)) if tgt=="Minimax-ASCII" else ES2KOMI.get(norm_es(w))
                 out.append(code or (enc_oov_minimax(w) if tgt=="Minimax-ASCII" else enc_oov_komin(w)))
-            else:
-                out.append(w)
         return " ".join(out)
     return "[No soportado]"
@@ -551,11 +476,9 @@ def translate_natural(text: str, src_lang: str, tgt_lang: str) -> str:
     if not text.strip(): return ""
     if not USE_SPACY: return text
     nlp = nlp_es if src_lang=="Español" else nlp_en
-    doc = nlp(text)
-    out=[]
     for t in doc:
-        if not getattr(t, "is_alpha", False):
-            out.append(getattr(t,"text","")); continue
         lem = lemma_of(t, src_lang)
         if src_lang=="Español":
             tr = ES2EN_LEMMA.get(lem); out.append(tr if tr else lem)
@@ -569,49 +492,94 @@ def round_trip(text, src, tgt, mode, max_comp_exact):
     return conlang, back
 # =====================================================================================
-# ========================== UI bilingüe con selector global ==========================
 # =====================================================================================
 ALL_LANGS = ["Español","English","Minimax-ASCII","Kōmín-CJK"]
-# Secciones de ayuda (plegables)
 COMPACT_ES = """
-**📏 Compactación orientativa**
-- Sin casillas: 0%
-- Omitir artículos: **~10–15%**
-- Cópula cero: **~5–10%**
-- Ambas: **~15–20%**
-- Máx. Compresión Exacta: **~40–60%** en >100 caracteres (zlib). En textos muy cortos puede no reducir.
 """
 COMPACT_EN = """
-**📏 Typical compaction**
-- No options: 0%
-- Drop articles: **~10–15%**
-- Zero copula: **~5–10%**
-- Both: **~15–20%**
-- Max Exact Compression: **~40–60%** for >100 chars (zlib). Very short texts may not shrink.
 """
 LEXICON_BUILD_ES = """
-### 🧱 Cómo se construyó el léxico (OMW → Minimax/Kōmín)
-1) OMW/WordNet → extrae **lemas ES** y sus **equivalentes EN** por sinset.
-2) Normaliza y ordena por **frecuencia** (wordfreq).
 3) Opcional: **spaCy** refina lemas; **Argos** puede rellenar EN faltantes.
-4) Asigna códigos compactos con alfabetos **barajados por SEED** hasta `MAXLEN_MINI`/`MAXLEN_CJK`.
-5) Exporta: `lexicon_minimax.json`, `lexicon_komin.json`, `lexicon_master.json` (+TSV).
 """
 LEXICON_BUILD_EN = """
-### 🧱 How the lexicon was built (OMW → Minimax/Kōmín)
-1) From OMW/WordNet → extract **ES lemmas** and **EN counterparts** by synset.
-2) Normalize and sort by **frequency** (wordfreq).
-3) Optional: **spaCy** refines lemmas; **Argos** may fill missing EN.
-4) Assign compact codes using alphabets **shuffled by SEED** up to `MAXLEN_MINI`/`MAXLEN_CJK`.
-5) Exports: `lexicon_minimax.json`, `lexicon_komin.json`, `lexicon_master.json` (+TSV).
 """
-EXPLAIN_TOP_ES = "Traduce entre **Español / Inglés** y dos conlangs: **Minimax-ASCII** y **Kōmín-CJK**. Con **Máx. Compresión Exacta** puedes recuperar el original exacto (trailer `~...`)."
-EXPLAIN_TOP_EN = "Translate between **Spanish / English** and **Minimax-ASCII / Kōmín-CJK**. With **Max Exact Compression**, you can recover the exact original (trailer `~...`)."
 def master_preview(n: int = 20) -> List[List[Any]]:
     try:
         entries = (MASTER_OBJ or {}).get("entries", [])
@@ -623,68 +591,49 @@ def master_preview(n: int = 20) -> List[List[Any]]:
     except Exception:
         return [["lemma_es","lemma_en","minimax","komin"], ["(no data)","","",""]]
-# === ES Group ===
 def make_group_es():
-    with gr.Group(visible=True) as group:
         gr.Markdown("# 🌐 Universal Conlang Translator · Compresión Exacta (ES)")
-        # Botón grande para mostrar/ocultar explicación del léxico
-        show_lex_state = gr.State(False)
-        with gr.Row():
-            btn_lex = gr.Button("ℹ️ **Ver explicación del léxico (OMW → Minimax/Kōmín)**", variant="primary", size="lg")
-        lex_group = gr.Group(visible=False)
-        with lex_group:
-            with gr.Accordion("🧱 Léxico: ¿cómo se construyó? (ES)", open=True):
-                gr.Markdown(LEXICON_BUILD_ES)
-                gr.Markdown("**Vista previa de `lexicon_master.json` (primeras filas):**")
-                n_rows = gr.Slider(5, 100, value=20, step=5, label="Filas a mostrar")
-                df_prev = gr.Dataframe(headers=["lemma_es","lemma_en","minimax","komin"], row_count=1, interactive=False)
-                gr.Button("Actualizar vista").click(lambda n: master_preview(int(n)), [n_rows], [df_prev])
-        def toggle_lex(show):
-            show = not bool(show)
-            return show, (gr.update(visible=show), gr.update(value="ℹ️ **Ocultar explicación del léxico**" if show else "ℹ️ **Ver explicación del léxico (OMW → Minimax/Kōmín)**"))
-        btn_lex.click(toggle_lex, [show_lex_state], [show_lex_state, lex_group, btn_lex])
-        # Ayuda plegable por apartados
         with gr.Row():
             with gr.Column():
-                with gr.Accordion("Resumen (¿qué hace?)", open=True):
-                    gr.Markdown(EXPLAIN_TOP_ES)
-                with gr.Accordion("Opciones y compactación", open=False):
-                    gr.Markdown(COMPACT_ES)
-                with gr.Accordion("FAQ", open=False):
-                    gr.Markdown("- **¿Se pierde info?** No con Máx. Compresión Exacta (`~...`).\n- **¿Sin spaCy?** Funciona en modo léxico.\n- **Privacidad**: todo corre dentro del Space.")
             with gr.Column():
-                with gr.Accordion("Tutorial rápido", open=True):
-                    gr.Markdown("1) Elige **Fuente/Destino**.\n2) Escribe.\n3) Pulsa **Traducir**.\n\n> Activa **Máx. Compresión Exacta** para poder recuperar el original exacto luego.")
-        # Tabs
         with gr.Tab("🔁 Traducir"):
             with gr.Row():
                 uni_src = gr.Dropdown(ALL_LANGS, value="Español", label="Fuente")
                 uni_tgt = gr.Dropdown(ALL_LANGS, value="Minimax-ASCII", label="Destino")
             uni_text = gr.Textbox(lines=3, label="Texto", placeholder="Ej.: Hola, ¿cómo estás?", show_copy_button=True)
             with gr.Row():
-                uni_drop = gr.Checkbox(value=True,  label="Omitir artículos (ES/EN → conlang)")
-                uni_zero = gr.Checkbox(value=False, label="Cópula cero (presente afirm.)")
-                uni_rmpr = gr.Checkbox(value=False, label="Quitar pronombres (sujeto/objeto evidentes)")
-                uni_maxc = gr.Checkbox(value=False, label="Máx. Compresión Exacta (sidecar `~...`)")
             uni_mode = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
             with gr.Row():
-                btn_translate = gr.Button("🚀 Traducir", variant="primary")
-                btn_reset = gr.Button("🧹 Limpiar")
             uni_out = gr.Textbox(lines=6, label="Traducción", show_copy_button=True)
-            btn_translate.click(
-                universal_translate,
-                [uni_text, uni_src, uni_tgt, uni_drop, uni_zero, uni_mode, uni_maxc, uni_rmpr],
-                [uni_out]
-            )
-            btn_reset.click(lambda: ("",""), None, [uni_text, uni_out])
-            with gr.Accordion("¿Qué hace esta pestaña?", open=False):
-                gr.Markdown("Traduce **entre cualquier par** (ES/EN/Minimax/Kōmín). Si marcas **Máx. Compresión Exacta**, añade `~...` con el original comprimido para recuperar luego *bit a bit*.")
         with gr.Tab("🛠️ Construir (ES/EN → Conlang)"):
             with gr.Row():
@@ -692,25 +641,23 @@ def make_group_es():
                 target   = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label="Conlang")
             text_in = gr.Textbox(lines=3, label="Frase", show_copy_button=True)
             with gr.Row():
-                drop_articles  = gr.Checkbox(value=True,  label="Omitir artículos")
-                zero_copula    = gr.Checkbox(value=False, label="Cópula cero (presente afirm.)")
-                rm_pron_build  = gr.Checkbox(value=False, label="Quitar pronombres")
-                max_comp_build = gr.Checkbox(value=False, label="Máx. Compresión Exacta")
             mode_build = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
             with gr.Row():
-                btn_build = gr.Button("🏗️ Construir", variant="primary")
-                btn_build_clear = gr.Button("🧹 Limpiar")
             out = gr.Textbox(lines=6, label="Salida", show_copy_button=True)
-            btn_build.click(
-                build_sentence,
-                [text_in, src_lang, target, drop_articles, zero_copula, mode_build, max_comp_build, rm_pron_build],
-                [out]
-            )
-            btn_build_clear.click(lambda: ("",""), None, [text_in, out])
-            with gr.Accordion("¿Qué hace esta pestaña?", open=False):
-                gr.Markdown("Fuerza salida **en conlang** desde ES/EN, aplicando reglas de fraseo (orden, partículas, TAM) y tus opciones de compactación.")
         with gr.Tab("🗝️ Decodificar (Conlang → ES/EN)"):
             with gr.Row():
@@ -727,69 +674,50 @@ def make_group_es():
                 return decode_simple(strip_custom_sidecar(strip_sidecar_b85(text)), src, tgt)
             with gr.Row():
-                btn_decode = gr.Button("🔓 Decodificar", variant="primary")
-                btn_decode_clear = gr.Button("🧹 Limpiar")
-            btn_decode.click(decode_lossless_aware, [code_in, src_code, tgt_lang], [out3])
-            btn_decode_clear.click(lambda: ("",""), None, [code_in, out3])
-            with gr.Accordion("¿Qué hace esta pestaña?", open=False):
-                gr.Markdown("Convierte **Minimax/Kōmín → ES/EN**. Si hay `~...`, la recuperación es **exacta**.")
         with gr.Tab("🔄 Prueba ida→vuelta"):
             with gr.Row():
                 rt_src = gr.Dropdown(["Español","English"], value="Español", label="Fuente")
                 rt_tgt = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label="Conlang")
             rt_text = gr.Textbox(lines=3, label="Frase", show_copy_button=True)
-            rt_max_comp = gr.Checkbox(value=False, label="Máx. Compresión Exacta")
             rt_mode = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
             rt_out_conlang = gr.Textbox(lines=3, label="Conlang (ida)", show_copy_button=True)
             rt_out_back    = gr.Textbox(lines=3, label="Vuelta", show_copy_button=True)
             with gr.Row():
                 btn_rt = gr.Button("▶️ Probar", variant="primary")
-                btn_rt_clear = gr.Button("🧹 Limpiar")
             btn_rt.click(round_trip, [rt_text, rt_src, rt_tgt, rt_mode, rt_max_comp], [rt_out_conlang, rt_out_back])
-            btn_rt_clear.click(lambda: ("","",""), None, [rt_text, rt_out_conlang, rt_out_back])
-            with gr.Accordion("¿Qué hace esta pestaña?", open=False):
-                gr.Markdown("Hace **ES/EN → Conlang → ES/EN** para comprobar la **reversibilidad**. Con **Máx. Compresión Exacta** la vuelta coincide bit a bit.")
-        gr.Markdown("---")
-        gr.Markdown("Hecho con ❤️ · **spaCy** (opcional) · Todo se ejecuta en este Space.")
-    return group
-# === EN Group ===
 def make_group_en():
-    with gr.Group(visible=False) as group:
         gr.Markdown("# 🌐 Universal Conlang Translator · Max Exact Compression (EN)")
-        show_lex_state = gr.State(False)
-        with gr.Row():
-            btn_lex = gr.Button("ℹ️ **Show lexicon build (OMW → Minimax/Kōmín)**", variant="primary", size="lg")
-        lex_group = gr.Group(visible=False)
-        with lex_group:
-            with gr.Accordion("🧱 Lexicon: how it was built (EN)", open=True):
-                gr.Markdown(LEXICON_BUILD_EN)
-                gr.Markdown("**Preview of `lexicon_master.json` (first rows):**")
-                n_rows = gr.Slider(5, 100, value=20, step=5, label="Rows to show")
-                df_prev = gr.Dataframe(headers=["lemma_es","lemma_en","minimax","komin"], row_count=1, interactive=False)
-                gr.Button("Refresh").click(lambda n: master_preview(int(n)), [n_rows], [df_prev])
-        def toggle_lex(show):
-            show = not bool(show)
-            return show, (gr.update(visible=show), gr.update(value="ℹ️ **Hide lexicon build**" if show else "ℹ️ **Show lexicon build (OMW → Minimax/Kōmín)**"))
-        btn_lex.click(toggle_lex, [show_lex_state], [show_lex_state, lex_group, btn_lex])
         with gr.Row():
             with gr.Column():
-                with gr.Accordion("Summary (what it does)", open=True):
-                    gr.Markdown(EXPLAIN_TOP_EN)
-                with gr.Accordion("Options & compaction", open=False):
-                    gr.Markdown(COMPACT_EN)
-                with gr.Accordion("FAQ", open=False):
-                    gr.Markdown("- **Any loss?** Not with Max Exact Compression (`~...`).\n- **No spaCy?** Works in lexical mode.\n- **Privacy**: runs inside this Space.")
             with gr.Column():
-                with gr.Accordion("Quick start", open=True):
-                    gr.Markdown("1) Pick **Source/Target**.\n2) Type.\n3) Click **Translate**.\n\n> Enable **Max Exact Compression** to recover the exact original later.")
         with gr.Tab("🔁 Translate"):
             with gr.Row():
@@ -797,26 +725,24 @@ def make_group_en():
                 uni_tgt = gr.Dropdown(ALL_LANGS, value="Minimax-ASCII", label="Target")
             uni_text = gr.Textbox(lines=3, label="Text", placeholder="e.g., Hello, how are you?", show_copy_button=True)
             with gr.Row():
-                uni_drop = gr.Checkbox(value=True,  label="Drop articles (ES/EN → conlang)")
-                uni_zero = gr.Checkbox(value=False, label="Zero copula (present affirmative)")
-                uni_rmpr = gr.Checkbox(value=False, label="Remove pronouns (obvious subject/object)")
-                uni_maxc = gr.Checkbox(value=False, label="Max Exact Compression (sidecar `~...`)")
             uni_mode = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
             with gr.Row():
-                btn_translate = gr.Button("🚀 Translate", variant="primary")
-                btn_reset = gr.Button("🧹 Clear")
             uni_out = gr.Textbox(lines=6, label="Translation", show_copy_button=True)
-            btn_translate.click(
-                universal_translate,
-                [uni_text, uni_src, uni_tgt, uni_drop, uni_zero, uni_mode, uni_maxc, uni_rmpr],
-                [uni_out]
-            )
-            btn_reset.click(lambda: ("",""), None, [uni_text, uni_out])
-            with gr.Accordion("What does this tab do?", open=False):
-                gr.Markdown("Translate **between any pair** (ES/EN/Minimax/Kōmín). With **Max Exact Compression**, a `~...` trailer stores the original for bit-perfect recovery.")
         with gr.Tab("🛠️ Build (ES/EN → Conlang)"):
             with gr.Row():
@@ -824,25 +750,23 @@ def make_group_en():
                 target   = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label="Conlang")
             text_in = gr.Textbox(lines=3, label="Sentence", show_copy_button=True)
             with gr.Row():
-                drop_articles  = gr.Checkbox(value=True,  label="Drop articles")
-                zero_copula    = gr.Checkbox(value=False, label="Zero copula (present affirmative)")
-                rm_pron_build  = gr.Checkbox(value=False, label="Remove pronouns")
-                max_comp_build = gr.Checkbox(value=False, label="Max Exact Compression")
             mode_build = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
             with gr.Row():
-                btn_build = gr.Button("🏗️ Build", variant="primary")
-                btn_build_clear = gr.Button("🧹 Clear")
             out = gr.Textbox(lines=6, label="Output", show_copy_button=True)
-            btn_build.click(
-                build_sentence,
-                [text_in, src_lang, target, drop_articles, zero_copula, mode_build, max_comp_build, rm_pron_build],
-                [out]
-            )
-            btn_build_clear.click(lambda: ("",""), None, [text_in, out])
-            with gr.Accordion("What does this tab do?", open=False):
-                gr.Markdown("Forces **conlang output** from ES/EN, applying phrasing rules (order, particles, TAM) and your compacting options.")
         with gr.Tab("🗝️ Decode (Conlang → ES/EN)"):
             with gr.Row():
@@ -859,59 +783,41 @@ def make_group_en():
                 return decode_simple(strip_custom_sidecar(strip_sidecar_b85(text)), src, tgt)
             with gr.Row():
-                btn_decode = gr.Button("🔓 Decode", variant="primary")
-                btn_decode_clear = gr.Button("🧹 Clear")
-            btn_decode.click(decode_lossless_aware, [code_in, src_code, tgt_lang], [out3])
-            btn_decode_clear.click(lambda: ("",""), None, [code_in, out3])
-            with gr.Accordion("What does this tab do?", open=False):
-                gr.Markdown("Converts **Minimax/Kōmín → ES/EN**. If a `~...` trailer is present, recovery is **bit-perfect**.")
         with gr.Tab("🔄 Round-trip"):
             with gr.Row():
                 rt_src = gr.Dropdown(["Español","English"], value="English", label="Source")
                 rt_tgt = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label="Conlang")
             rt_text = gr.Textbox(lines=3, label="Sentence", show_copy_button=True)
-            rt_max_comp = gr.Checkbox(value=False, label="Max Exact Compression")
             rt_mode = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
             rt_out_conlang = gr.Textbox(lines=3, label="Outward (conlang)", show_copy_button=True)
             rt_out_back    = gr.Textbox(lines=3, label="Back", show_copy_button=True)
             with gr.Row():
                 btn_rt = gr.Button("▶️ Test", variant="primary")
-                btn_rt_clear = gr.Button("🧹 Clear")
             btn_rt.click(round_trip, [rt_text, rt_src, rt_tgt, rt_mode, rt_max_comp], [rt_out_conlang, rt_out_back])
-            btn_rt_clear.click(lambda: ("","",""), None, [rt_text, rt_out_conlang, rt_out_back])
-            with gr.Accordion("What does this tab do?", open=False):
-                gr.Markdown("Performs **ES/EN → Conlang → ES/EN** to verify **reversibility**. With **Max Exact Compression**, the return matches the input bit-for-bit.")
-        gr.Markdown("---")
-        gr.Markdown("Made with ❤️ · **spaCy** (optional) · Everything runs inside this Space.")
-    return group
-# ============================== Pestaña global de Léxico ==============================
-def make_lexicon_tab():
-    with gr.TabItem("ℹ️ Léxico / Lexicon (Global)"):
-        gr.Markdown("## 🧱 Construcción del léxico / Lexicon build")
-        with gr.Row():
-            with gr.Column():
-                with gr.Accordion("Resumen (ES)", open=True): gr.Markdown(LEXICON_BUILD_ES)
-            with gr.Column():
-                with gr.Accordion("Summary (EN)", open=False): gr.Markdown(LEXICON_BUILD_EN)
-        gr.Markdown("### 👀 Vista de ejemplo (primeras filas de `lexicon_master.json`)")
-        n_rows = gr.Slider(5, 100, value=20, step=5, label="Filas/Rows")
-        table = gr.Dataframe(headers=["lemma_es","lemma_en","minimax","komin"], row_count=1, interactive=False)
-        gr.Button("Actualizar / Refresh").click(lambda n: master_preview(int(n)), [n_rows], [table])
-# ================================ Lanzador de la app =================================
 with gr.Blocks(title="Universal Conlang Translator", theme=gr.themes.Soft()) as demo:
     gr.Markdown("## 🌍 Idioma / Language")
-    lang_select = gr.Radio(choices=["ES","EN"], value="ES", label="Selecciona / Select")
     group_es = make_group_es()
     group_en = make_group_en()
-    make_lexicon_tab()
     def switch_lang(code):
         if code == "EN":
@@ -927,3 +833,4 @@ if __name__ == "__main__":

 # app.py — Universal Conlang Translator (Max Compresión Exacta)
+# Archivos requeridos en la raíz:
 #  - lexicon_minimax.json
 #  - lexicon_komin.json
 #  - lexicon_master.json
 # ------------ Normalización ------------
 WORD_RE = re.compile(r"[A-Za-zÁÉÍÓÚÜÑáéíóúüñ]+", re.UNICODE)
 STRIP = str.maketrans("ÁÉÍÓÚÜÑáéíóúüñ", "AEIOUUNaeiouun")
+def norm_es(w: str) -> str: return re.sub(r"[^a-záéíóúüñ]", "", (w or "").lower()).translate(STRIP)
+def norm_en(w: str) -> str: return re.sub(r"[^a-z]", "", (w or "").lower())
 # ------------ Carga de léxicos ------------
 def load_json(path: str):
     if not os.path.exists(path): return None
+    with open(path, "r", encoding="utf-8") as f: return json.load(f)
 def load_lexicons():
     mm = load_json(LEX_MINI) or {}
     if isinstance(master, dict) and "entries" in master:
         for e in master["entries"]:
+            es = norm_es(str(e.get("lemma_es",""))); en = norm_en(str(e.get("lemma_en","")))
+            mi = str(e.get("minimax","")); ko = str(e.get("komin",""))
             if es and en:
+                es2en_lemma.setdefault(es, en); en2es_lemma.setdefault(en, es)
             if en and mi: en2mini.setdefault(en, mi)
             if en and ko: en2komi.setdefault(en, ko)
     mini2en = {v:k for k,v in en2mini.items()}
     komi2en = {v:k for k,v in en2komi.items()}
     return (es2mini, es2komi, mini2es, komi2es,
             en2mini, en2komi, mini2en, komi2en,
             es2en_lemma, en2es_lemma, master)
     std = base64.b64encode(b).decode("ascii")
     trans = str.maketrans("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", alphabet)
     return std.translate(trans).rstrip("=")
 def from_custom_b64(s: str, alphabet: str) -> bytes:
     trans = str.maketrans(alphabet, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
+    std = s.translate(trans); pad = "=" * ((4 - len(std) % 4) % 4)
     return base64.b64decode(std + pad)
 def enc_oov_minimax(token: str) -> str: return "~" + to_custom_b64(token.encode("utf-8"), ALPHA_MINI64)
 try:
     import spacy
     try:
+        nlp_es = spacy.load("es_core_news_sm"); nlp_en = spacy.load("en_core_web_sm"); USE_SPACY = True
     except Exception:
         nlp_es = nlp_en = None
 except Exception:
 def lemma_of(tok, src_lang: str) -> str:
     if src_lang == "Español":
+        return norm_es(tok.lemma_ if getattr(tok,"lemma_","") else tok.text)
     else:
+        return norm_en(tok.lemma_ if getattr(tok,"lemma_","") else tok.text)
+# ------------ Detección simple ------------
+def detect_polarity(doc) -> bool: return "?" in getattr(doc,"text","")
 def detect_neg(doc) -> bool:
     for t in doc:
+        if getattr(t,"dep_","")=="neg" or getattr(t,"lower_","").lower() in ("no","not","n't"):
             return True
     return False
 def detect_tense(root):
+    m = str(getattr(root,"morph",""))
     if "Tense=Past" in m: return "Past"
     if "Tense=Fut"  in m: return "Fut"
     if "Tense=Pres" in m: return "Pres"
+    for c in getattr(root,"children",[]):
+        if getattr(c,"pos_","")=="AUX":
+            cm = str(getattr(c,"morph",""))
             if "Tense=Past" in cm: return "Past"
+            if getattr(c,"lower_","").lower()=="will": return "Fut"
     return "Pres"
 def extract_core(doc):
     tokens = list(doc)
+    root = next((t for t in tokens if getattr(t,"dep_","")=="ROOT" and getattr(t,"pos_","") in ("VERB","AUX")), tokens[0] if tokens else doc)
     subs, objs, obls, advs = [], [], [], []
+    for t in getattr(root,"children",[]):
+        dep = getattr(t,"dep_",""); pos = getattr(t,"pos_","")
         if dep in ("nsubj","nsubj:pass","csubj"): subs.append(t)
         elif dep in ("obj","dobj","iobj"):        objs.append(t)
         elif dep in ("obl","pobj"):               obls.append(t)
         elif dep in ("advmod","advcl") and pos=="ADV": advs.append(t)
+    for arr in (subs,objs,obls,advs): arr.sort(key=lambda x: getattr(x,"i",0))
     return root, subs, objs, obls, advs
 def _person_of_doc(doc, src_lang: str) -> Optional[str]:
     try:
         tokens = list(doc)
+        root = next((t for t in tokens if getattr(t,"dep_","")=="ROOT"), tokens[0])
+        subj = next((t for t in getattr(root,"children",[]) if getattr(t,"dep_","").startswith("nsubj")), None)
         if subj is None: return None
+        plur = ("Number=Plur" in str(getattr(subj,"morph",""))) if src_lang=="Español" else (getattr(subj,"tag_","") in ("NNS","NNPS"))
+        low = getattr(subj,"lower_","").lower()
         if src_lang=="Español":
             if low in ("yo",): return "1p" if plur else "1s"
             if low in ("tú","vos"): return "2p" if plur else "2s"
             return "3p" if plur else "3s"
     except Exception:
         return None
 def detect_person(root, src_lang: str) -> Optional[str]:
+    m = str(getattr(root,"morph","")); person_str, number_str = "3","s"
     if "Person=" in m:
         for feat in m.split("|"):
             if feat.startswith("Person="): person_str = feat.split("=")[1]
+            elif feat.startswith("Number="): number_str = "p" if feat.split("=")[1]=="Plur" else "s"
         return person_str + number_str
     return _person_of_doc(root.doc, src_lang)
+# ------------ Mapeo y fraseadores ------------
 def code_es(lemma: str, target: str) -> str:
     lemma = norm_es(lemma)
+    if target=="Minimax-ASCII":
+        return ES2MINI.get(lemma) or enc_oov_minimax(lemma)
+    return ES2KOMI.get(lemma) or enc_oov_komin(lemma)
 def code_en(lemma: str, target: str) -> str:
     lemma = norm_en(lemma)
+    if target=="Minimax-ASCII":
         return (EN2MINI.get(lemma) if EN2MINI else None) or enc_oov_minimax(lemma)
+    return (EN2KOMI.get(lemma) if EN2KOMI else None) or enc_oov_komin(lemma)
+TAM_MINI = {"Pres":"P","Past":"T","Fut":"F","UNK":"P"}
+TAM_KOMI = {"Pres":"Ⓟ","Past":"Ⓣ","Fut":"Ⓕ","UNK":"Ⓟ"}
 def realize_minimax(doc, src_lang: str, drop_articles=True, zero_copula=True,
                     semi_lossless=False, person_hint="2s", remove_pronouns=False):
     root, subs, objs, obls, advs = extract_core(doc)
+    tense = detect_tense(root); is_q, is_neg = detect_polarity(doc), detect_neg(doc)
     vlem  = lemma_of(root, src_lang) if USE_SPACY else ("ser" if "?" in getattr(doc,"text","") else "estar")
     vcode = code_es(vlem, "Minimax-ASCII") if src_lang=="Español" else code_en(vlem, "Minimax-ASCII")
     tail = TAM_MINI.get(tense, "P")
+    if semi_lossless: tail += (detect_person(root, src_lang) or person_hint)
+    if is_neg: tail += "N";
     if is_q:   tail += "Q"
     if tail: vcode = f"{vcode}·{tail}"
         for t in tokens:
             if remove_pronouns:
                 txt = (getattr(t,"text","") or "").lower()
+                if (src_lang=="Español" and txt in PRON_ES) or (src_lang=="English" and txt in PRON_EN): continue
+            lem = lemma_of(t, src_lang) if USE_SPACY else getattr(t,"text","")
+            outs.append(code_es(lem,"Minimax-ASCII") if src_lang=="Español" else code_en(lem,"Minimax-ASCII"))
         return outs
+    S = realize_np(subs); O = realize_np(objs)+realize_np(obls)
     ADV=[]
     for a in advs:
+        lem = lemma_of(a, src_lang) if USE_SPACY else getattr(a,"text","")
+        ADV.append(code_es(lem,"Minimax-ASCII") if src_lang=="Español" else code_en(lem,"Minimax-ASCII"))
+    parts = S+O+ADV if (zero_copula and not semi_lossless and vlem in ("ser","estar","be") and tense=="Pres" and not is_neg and not is_q) else [vcode]+S+O+ADV
     return " ".join(p for p in parts if p)
 def realize_komin(doc, src_lang: str, drop_articles=True, zero_copula=True,
     tense, is_q, is_neg = detect_tense(root), detect_polarity(doc), detect_neg(doc)
     vlem  = lemma_of(root, src_lang) if USE_SPACY else ("ser" if "?" in getattr(doc,"text","") else "estar")
     vcode = code_es(vlem, "Kōmín-CJK") if src_lang=="Español" else code_en(vlem, "Kōmín-CJK")
+    P_SUBJ, P_OBJ = "ᵖ", "ᵒ"; NEG_M, Q_FIN = "̆", "？"
+    TAM = TAM_KOMI.get(tense,"Ⓟ")
+    if semi_lossless: TAM = TAM + f"[{detect_person(root, src_lang) or person_hint}]"
     def realize_np(tokens, particle):
         outs=[]
         for t in tokens:
             if remove_pronouns:
                 txt = (getattr(t,"text","") or "").lower()
+                if (src_lang=="Español" and txt in PRON_ES) or (src_lang=="English" and txt in PRON_EN): continue
+            lem = lemma_of(t, src_lang) if USE_SPACY else getattr(t,"text","")
+            outs.append((code_es(lem,"Kōmín-CJK") if src_lang=="Español" else code_en(lem,"Kōmín-CJK")) + particle)
         return outs
+    S = realize_np(subs, P_SUBJ); O = realize_np(objs+obls, P_OBJ)
     ADV=[]
     for a in advs:
+        lem = lemma_of(a, src_lang) if USE_SPACY else getattr(a,"text","")
+        ADV.append(code_es(lem,"Kōmín-CJK") if src_lang=="Español" else code_en(lem,"Kōmín-CJK"))
+    parts = S+O+ADV if (zero_copula and not semi_lossless and vlem in ("ser","estar","be") and tense=="Pres" and not is_neg and not is_q) else S+O+ADV+[vcode+TAM+("̆" if is_neg else "")]
     out = " ".join(parts)
     if is_q: out += " " + Q_FIN
     return out
         code = ES2MINI.get(key) if target=="Minimax-ASCII" else ES2KOMI.get(key)
         return code or (enc_oov_minimax(m.group(0)) if target=="Minimax-ASCII" else enc_oov_komin(m.group(0)))
     def repl_en(m):
+        key = norm_en(m.group(0)); table = EN2MINI if target=="Minimax-ASCII" else EN2KOMI
         if table and key in table: return table[key]
         return enc_oov_minimax(m.group(0)) if target=="Minimax-ASCII" else enc_oov_komin(m.group(0))
     repl = repl_es if src_lang=="Español" else repl_en
     return WORD_RE.sub(repl, text)
 def pluralize_es(word: str) -> str:
+    exceptions = {"uno":"unos","buen":"buenos","hombre":"hombres"}
     if word in exceptions: return exceptions[word]
+    if word.endswith("z"): return word[:-1]+"ces"
+    if word.endswith(("a","e","i","o")): return word+"s"
+    return word+"es"
 def pluralize_en(word: str) -> str:
     exceptions = {"man":"men","woman":"women","child":"children"}
     if word in exceptions: return exceptions[word]
+    if word.endswith("y") and len(word)>1 and word[-2] not in "aeiou": return word[:-1]+"ies"
+    if word.endswith(("s","sh","ch","x","z")): return word+"es"
+    return word+"s"
+def pluralize(word: str, tgt_lang: str) -> str: return pluralize_es(word) if tgt_lang=="Español" else pluralize_en(word)
 mini_tail_re = re.compile(r"^(?P<stem>.+?)·(?P<tail>[PTFNQ12sp]+)$")
     if not text.strip(): return ""
     code2es = MINI2ES if source=="Minimax-ASCII" else KOMI2ES
     code2en = MINI2EN if source=="Minimax-ASCII" else KOMI2EN
+    if source=="Kōmín-CJK":
+        text = text.replace("？","?").replace(" "," ")
+        return " ".join([code2es.get(w,w) for w in text.split() if w!="?"])
+    tokens = text.split();
     if not tokens: return ""
+    lemma_tokens, pl_flags = [], []; verb_idx=-1; verb_lemma=None; verb_tense="Pres"; verb_person="3s"; has_q=False; is_neg=False
     for part in tokens:
+        look = part.replace("[PL]",""); had_pl = "[PL]" in part; pl_flags.append(had_pl)
         m = mini_tail_re.match(look)
         if m:
+            verb_idx = len(lemma_tokens); stem=m.group("stem"); tail=m.group("tail")
             vlem_es = code2es.get(stem); vlem_en = code2en.get(stem) if code2en else None
             vlem = vlem_es if tgt_lang=="Español" else (vlem_en or vlem_es or stem)
+            if not vlem: vlem = dec_oov_minimax(stem) if is_oov_minimax(stem) else stem
             lemma_tokens.append(vlem); pl_flags.append(False)
             if tail:
                 if tail[0] in "PTF":
+                    verb_tense = {"P":"Pres","T":"Past","F":"Fut"}[tail[0]]; pos=1
                     if len(tail)>pos and tail[pos] in "123":
+                        pos+=1; verb_person = tail[pos-1] + (tail[pos] if len(tail)>pos and tail[pos] in "sp" else "s")
                         if len(tail)>pos and tail[pos] in "sp": pos+=1
+                    is_neg = "N" in tail[pos:]; has_q = "Q" in tail[pos:]
+            verb_lemma = vlem; continue
         w_es = code2es.get(look); w_en = code2en.get(look) if code2en else None
         w = w_es if tgt_lang=="Español" else (w_en or w_es or look)
         if not w: w = dec_oov_minimax(look) if is_oov_minimax(look) else look
         lemma_tokens.append(w); pl_flags.append(had_pl)
     out_parts=[]
     for idx, lem in enumerate(lemma_tokens):
+        if idx==verb_idx:
             v_conj = _es_conj(verb_lemma, verb_tense, verb_person) if tgt_lang=="Español" else _en_conj(verb_lemma, verb_tense, verb_person)
             if is_neg: v_conj = ("no " if tgt_lang=="Español" else "not ") + v_conj
             out_parts.append(v_conj)
         else:
             out_parts.append(pluralize(lem, tgt_lang) if pl_flags[idx] else lem)
     out_text = " ".join(out_parts)
     if has_q:
         start_q = "¿" if tgt_lang=="Español" else ""
 def _es_conj_regular(lemma, tense, person):
     if not lemma.endswith(("ar","er","ir")): return lemma
     stem, vtype = lemma[:-2], lemma[-2:]
+    pres={"ar":{"1s":"o","2s":"as","3s":"a","1p":"amos","2p":"áis","3p":"an"},
+          "er":{"1s":"o","2s":"es","3s":"e","1p":"emos","2p":"éis","3p":"en"},
+          "ir":{"1s":"o","2s":"es","3s":"e","1p":"imos","2p":"ís","3p":"en"}}
+    pret={"ar":{"1s":"é","2s":"aste","3s":"ó","1p":"amos","2p":"asteis","3p":"aron"},
+          "er":{"1s":"í","2s":"iste","3s":"ió","1p":"imos","2p":"isteis","3p":"ieron"},
+          "ir":{"1s":"í","2s":"iste","3s":"ió","1p":"imos","2p":"isteis","3p":"ieron"}}
+    fut={"1s":"é","2s":"ás","3s":"á","1p":"emos","2p":"éis","3p":"án"}
     if tense=="Pres": return stem + pres[vtype].get(person, pres[vtype]["3s"])
     if tense=="Past": return stem + pret[vtype].get(person, pret[vtype]["3s"])
     return lemma + fut.get(person, fut["3s"])
 def _es_conj(lemma, tense, person):
     if lemma=="ser":
         tab={"Pres":{"1s":"soy","2s":"eres","3s":"es","1p":"somos","2p":"sois","3p":"son"},
              "Fut":{"1s":"iré","2s":"irás","3s":"irá","1p":"iremos","2p":"iréis","3p":"irán"}}
         return tab[tense].get(person, tab[tense]["3s"])
     return _es_conj_regular(lemma, tense, person)
 def _en_conj(lemma, tense, person):
     if lemma=="be":
         if tense=="Pres": return {"1s":"am","2s":"are","3s":"is","1p":"are","2p":"are","3p":"are"}.get(person,"is")
                       remove_pronouns: bool) -> str:
     nlp = nlp_es if src_lang=="Español" else nlp_en
     doc = nlp(text)
+    if target=="Minimax-ASCII":
+        return realize_minimax(doc, src_lang, drop_articles, zero_copula, semi_lossless=True, remove_pronouns=remove_pronouns)
     else:
+        return realize_komin(doc, src_lang, drop_articles, zero_copula, semi_lossless=True, remove_pronouns=remove_pronouns)
 def build_sentence(text: str, src_lang: str, target: str,
                    drop_articles: bool, zero_copula: bool, mode: str,
                    max_comp_exact: bool = False, remove_pronouns: bool = False) -> str:
     if not text.strip(): return ""
     if USE_SPACY:
+        core = _build_with_spacy(text, src_lang, target, drop_articles, zero_copula, True, remove_pronouns)
     else:
         if remove_pronouns:
             pron = PRON_ES if src_lang=="Español" else PRON_EN
             tokens = re.findall(r"\w+|[^\w\s]+", text)
         if orig is not None:
             core = strip_custom_sidecar(text)
             es_lemmas = decode_simple(core, src, "Español")
+            words = re.findall(r"\w+|[^\w\s]+", es_lemmas); out=[]
             for w in words:
                 if re.fullmatch(r"\w+", w):
                     code = ES2MINI.get(norm_es(w)) if tgt=="Minimax-ASCII" else ES2KOMI.get(norm_es(w))
                     out.append(code or (enc_oov_minimax(w) if tgt=="Minimax-ASCII" else enc_oov_komin(w)))
+                else: out.append(w)
             return custom_sidecar_enc(" ".join(out), orig)
         es_lemmas = decode_simple(text, src, "Español")
+        words = re.findall(r"\w+|[^\w\s]+", es_lemmas); out=[]
         for w in words:
             if re.fullmatch(r"\w+", w):
                 code = ES2MINI.get(norm_es(w)) if tgt=="Minimax-ASCII" else ES2KOMI.get(norm_es(w))
                 out.append(code or (enc_oov_minimax(w) if tgt=="Minimax-ASCII" else enc_oov_komin(w)))
+            else: out.append(w)
         return " ".join(out)
     return "[No soportado]"
     if not text.strip(): return ""
     if not USE_SPACY: return text
     nlp = nlp_es if src_lang=="Español" else nlp_en
+    doc = nlp(text); out=[]
     for t in doc:
+        if not getattr(t,"is_alpha",False): out.append(getattr(t,"text","")); continue
         lem = lemma_of(t, src_lang)
         if src_lang=="Español":
             tr = ES2EN_LEMMA.get(lem); out.append(tr if tr else lem)
     return conlang, back
 # =====================================================================================
+# ========================= UI bilingüe y explicaciones claras ========================
 # =====================================================================================
 ALL_LANGS = ["Español","English","Minimax-ASCII","Kōmín-CJK"]
+# Secciones de ayuda (ES/EN) — todas en el MISMO nivel, como acordeones
 COMPACT_ES = """
+**📏 Compactación orientativa (haz clic para desplegar)**
+- Sin casillas: **0%**
+- Omitir artículos: **~10–15%**
+- Cópula cero (presente afirm.): **~5–10%**
+- Ambas (artículos + cópula): **~15–20%**
+- Máx. Compresión Exacta: **~40–60%** en textos >100 caracteres (con `~...`). En textos muy cortos puede no reducir.
 """
 COMPACT_EN = """
+**📏 Typical compaction (click to expand)**
+- No options: **0%**
+- Drop articles: **~10–15%**
+- Zero copula (present affirmative): **~5–10%**
+- Both (articles + copula): **~15–20%**
+- Max Exact Compression: **~40–60%** for >100 chars (`~...`). Very short texts may not shrink.
+"""
+EXPLAIN_TAB_TRANSLATE_ES = """
+**🔁 Traducir (haz clic para desplegar)**
+Convierte el *Texto* al *Destino*. Funciona para **cualquier combinación**: Español, English, Minimax-ASCII, Kōmín-CJK.
+- **Máx. Compresión Exacta** añade `~...` con el original comprimido para poder **recuperarlo exactamente** al decodificar.
+- **Omitir artículos / Cópula cero / Quitar pronombres** se aplican **solo cuando el destino es conlang** (Minimax/Kōmín).
+"""
+EXPLAIN_TAB_BUILD_ES = """
+**🛠️ Construir (ES/EN → Conlang) (haz clic para desplegar)**
+Fuerza la salida **en conlang** desde Español o Inglés aplicando reglas de fraseo (orden, partículas/TAM) y tus **checkbox**.
+Útil para ver cómo quedaría la frase **directamente en Minimax/Kōmín** sin ambigüedad de direcciones.
+"""
+EXPLAIN_TAB_DECODE_ES = """
+**🗝️ Decodificar (Conlang → ES/EN) (haz clic para desplegar)**
+Convierte **Minimax/Kōmín** a **Español o Inglés**.
+- Si hay `~...`, devuelve el **original exacto**.
+- Sin `~...`, la vuelta es **semi-lossless** usando el léxico y pistas simples.
+"""
+EXPLAIN_TAB_ROUNDTRIP_ES = """
+**🔄 Prueba ida→vuelta (haz clic para desplegar)**
+Ejecuta **(ES/EN → Conlang) → (Conlang → ES/EN)** para comprobar **reversibilidad**.
+Con **Máx. Compresión Exacta**, la vuelta coincide **bit a bit** con la entrada.
+"""
+EXPLAIN_CHECKBOX_ES = """
+**☑️ ¿Qué hace cada checkbox? (haz clic para desplegar)**
+- **Omitir artículos**: quita *el/la/los/las* (ES) y *a/an/the* (EN) → **~10–15%**.
+- **Cópula cero (presente afirm.)**: esconde *ser/estar/be* cuando suena natural → **~5–10%** extra.
+- **Quitar pronombres**: elimina pronombres de sujeto/objeto **evidentes** (ahorro variable).
+- **Máx. Compresión Exacta**: añade `~...` con zlib para recuperación exacta (**~40–60%** en >100 caracteres).
 """
 LEXICON_BUILD_ES = """
+**ℹ️ Léxico (OMW → Minimax/Kōmín) (haz clic para desplegar)**
+1) Desde **OMW/WordNet 1.4** se extraen **lemas ES** y sus **equivalentes EN** por sinset.
+2) Se normalizan y ordenan por **frecuencia** (wordfreq).
 3) Opcional: **spaCy** refina lemas; **Argos** puede rellenar EN faltantes.
+4) Se asignan **códigos compactos** con alfabetos barajados por **SEED** hasta `MAXLEN_MINI`/`MAXLEN_CJK`.
+5) Se exportan: `lexicon_minimax.json`, `lexicon_komin.json`, `lexicon_master.json` (+TSV).
+**Vista previa** de `lexicon_master.json` abajo.
+"""
+# (EN) versiones cortas
+EXPLAIN_TAB_TRANSLATE_EN = """
+**🔁 Translate (click to expand)** — Converts *Text* to *Target* (any pair: Spanish/English/Minimax/Kōmín).
+With **Max Exact Compression**, appends `~...` to recover the **exact original**. Checkboxes apply when **target is conlang**.
+"""
+EXPLAIN_TAB_BUILD_EN = """
+**🛠️ Build (ES/EN → Conlang) (click to expand)** — Forces conlang output (Minimax/Kōmín) with phrasing rules and your checkboxes.
+"""
+EXPLAIN_TAB_DECODE_EN = """
+**🗝️ Decode (Conlang → ES/EN) (click to expand)** — If `~...` is present, returns the **bit-perfect original**; otherwise semi-lossless.
+"""
+EXPLAIN_TAB_ROUNDTRIP_EN = """
+**🔄 Round-trip (click to expand)** — Runs (ES/EN → Conlang) → (Conlang → ES/EN) to verify reversibility.
+"""
+EXPLAIN_CHECKBOX_EN = """
+**☑️ Checkboxes (click to expand)**
+- **Drop articles**: ~10–15%
+- **Zero copula (present affirm.)**: ~5–10% extra
+- **Remove pronouns**: variable
+- **Max Exact Compression**: ~40–60% for >100 chars (`~...`), exact recovery.
 """
 LEXICON_BUILD_EN = """
+**ℹ️ Lexicon (OMW → Minimax/Kōmín) (click to expand)** — OMW/WordNet ES lemmas + EN counterparts, normalized & frequency-sorted; optional spaCy/Argos; codes assigned with SEED-shuffled alphabets up to MAXLEN; exports JSON/TSV. Preview below.
 """
 def master_preview(n: int = 20) -> List[List[Any]]:
     try:
         entries = (MASTER_OBJ or {}).get("entries", [])
     except Exception:
         return [["lemma_es","lemma_en","minimax","komin"], ["(no data)","","",""]]
+# ========================= Grupos ES / EN =========================
 def make_group_es():
+    with gr.Group(visible=True) as g:
         gr.Markdown("# 🌐 Universal Conlang Translator · Compresión Exacta (ES)")
+        # Acordeones de EXPLICACIÓN — todos al MISMO nivel
         with gr.Row():
             with gr.Column():
+                with gr.Accordion(EXPLAIN_TAB_TRANSLATE_ES, open=False): pass
+                with gr.Accordion(EXPLAIN_TAB_BUILD_ES, open=False): pass
+                with gr.Accordion(EXPLAIN_TAB_DECODE_ES, open=False): pass
+                with gr.Accordion(EXPLAIN_TAB_ROUNDTRIP_ES, open=False): pass
             with gr.Column():
+                with gr.Accordion(EXPLAIN_CHECKBOX_ES, open=False): gr.Markdown(COMPACT_ES)
+                with gr.Accordion(LEXICON_BUILD_ES, open=False):
+                    n_rows = gr.Slider(5, 100, value=20, step=5, label="Filas a mostrar")
+                    table = gr.Dataframe(headers=["lemma_es","lemma_en","minimax","komin"], row_count=1, interactive=False)
+                    gr.Button("Actualizar vista").click(lambda n: master_preview(int(n)), [n_rows], [table])
+        # ==== Tabs funcionales ====
         with gr.Tab("🔁 Traducir"):
             with gr.Row():
                 uni_src = gr.Dropdown(ALL_LANGS, value="Español", label="Fuente")
                 uni_tgt = gr.Dropdown(ALL_LANGS, value="Minimax-ASCII", label="Destino")
             uni_text = gr.Textbox(lines=3, label="Texto", placeholder="Ej.: Hola, ¿cómo estás?", show_copy_button=True)
             with gr.Row():
+                uni_drop = gr.Checkbox(True,  label="Omitir artículos (ES/EN → conlang)")
+                uni_zero = gr.Checkbox(False, label="Cópula cero (presente afirm.)")
+                uni_rmpr = gr.Checkbox(False, label="Quitar pronombres")
+                uni_maxc = gr.Checkbox(False, label="Máx. Compresión Exacta (sidecar `~...`)")
             uni_mode = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
             with gr.Row():
+                btn_tr = gr.Button("🚀 Traducir", variant="primary")
+                btn_tr_cl = gr.Button("🧹 Limpiar")
             uni_out = gr.Textbox(lines=6, label="Traducción", show_copy_button=True)
+            btn_tr.click(universal_translate,
+                         [uni_text, uni_src, uni_tgt, uni_drop, uni_zero, uni_mode, uni_maxc, uni_rmpr],
+                         [uni_out])
+            btn_tr_cl.click(lambda: ("",""), None, [uni_text, uni_out])
+            with gr.Accordion("Ayuda rápida (¿qué hace este botón?)", open=False):
+                gr.Markdown(EXPLAIN_TAB_TRANSLATE_ES + "\n\n" + COMPACT_ES)
         with gr.Tab("🛠️ Construir (ES/EN → Conlang)"):
             with gr.Row():
                 target   = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label="Conlang")
             text_in = gr.Textbox(lines=3, label="Frase", show_copy_button=True)
             with gr.Row():
+                drop_articles  = gr.Checkbox(True,  label="Omitir artículos")
+                zero_copula    = gr.Checkbox(False, label="Cópula cero (presente afirm.)")
+                rm_pron_build  = gr.Checkbox(False, label="Quitar pronombres")
+                max_comp_build = gr.Checkbox(False, label="Máx. Compresión Exacta")
             mode_build = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
             with gr.Row():
+                btn_b = gr.Button("🏗️ Construir", variant="primary")
+                btn_b_cl = gr.Button("🧹 Limpiar")
             out = gr.Textbox(lines=6, label="Salida", show_copy_button=True)
+            btn_b.click(build_sentence,
+                        [text_in, src_lang, target, drop_articles, zero_copula, mode_build, max_comp_build, rm_pron_build],
+                        [out])
+            btn_b_cl.click(lambda: ("",""), None, [text_in, out])
+            with gr.Accordion("Ayuda rápida (¿qué hace este botón?)", open=False):
+                gr.Markdown(EXPLAIN_TAB_BUILD_ES + "\n\n" + COMPACT_ES)
         with gr.Tab("🗝️ Decodificar (Conlang → ES/EN)"):
             with gr.Row():
                 return decode_simple(strip_custom_sidecar(strip_sidecar_b85(text)), src, tgt)
             with gr.Row():
+                btn_d = gr.Button("🔓 Decodificar", variant="primary")
+                btn_d_cl = gr.Button("🧹 Limpiar")
+            btn_d.click(decode_lossless_aware, [code_in, src_code, tgt_lang], [out3])
+            btn_d_cl.click(lambda: ("",""), None, [code_in, out3])
+            with gr.Accordion("Ayuda rápida (¿qué hace este botón?)", open=False):
+                gr.Markdown(EXPLAIN_TAB_DECODE_ES)
         with gr.Tab("🔄 Prueba ida→vuelta"):
             with gr.Row():
                 rt_src = gr.Dropdown(["Español","English"], value="Español", label="Fuente")
                 rt_tgt = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label="Conlang")
             rt_text = gr.Textbox(lines=3, label="Frase", show_copy_button=True)
+            rt_max_comp = gr.Checkbox(False, label="Máx. Compresión Exacta")
             rt_mode = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
             rt_out_conlang = gr.Textbox(lines=3, label="Conlang (ida)", show_copy_button=True)
             rt_out_back    = gr.Textbox(lines=3, label="Vuelta", show_copy_button=True)
             with gr.Row():
                 btn_rt = gr.Button("▶️ Probar", variant="primary")
+                btn_rt_cl = gr.Button("🧹 Limpiar")
             btn_rt.click(round_trip, [rt_text, rt_src, rt_tgt, rt_mode, rt_max_comp], [rt_out_conlang, rt_out_back])
+            btn_rt_cl.click(lambda: ("","",""), None, [rt_text, rt_out_conlang, rt_out_back])
+            with gr.Accordion("Ayuda rápida (¿qué hace este botón?)", open=False):
+                gr.Markdown(EXPLAIN_TAB_ROUNDTRIP_ES)
+    return g
 def make_group_en():
+    with gr.Group(visible=False) as g:
         gr.Markdown("# 🌐 Universal Conlang Translator · Max Exact Compression (EN)")
         with gr.Row():
             with gr.Column():
+                with gr.Accordion(EXPLAIN_TAB_TRANSLATE_EN, open=False): pass
+                with gr.Accordion(EXPLAIN_TAB_BUILD_EN, open=False): pass
+                with gr.Accordion(EXPLAIN_TAB_DECODE_EN, open=False): pass
+                with gr.Accordion(EXPLAIN_TAB_ROUNDTRIP_EN, open=False): pass
             with gr.Column():
+                with gr.Accordion(EXPLAIN_CHECKBOX_EN, open=False): gr.Markdown(COMPACT_EN)
+                with gr.Accordion(LEXICON_BUILD_EN, open=False):
+                    n_rows = gr.Slider(5, 100, value=20, step=5, label="Rows to show")
+                    table = gr.Dataframe(headers=["lemma_es","lemma_en","minimax","komin"], row_count=1, interactive=False)
+                    gr.Button("Refresh").click(lambda n: master_preview(int(n)), [n_rows], [table])
         with gr.Tab("🔁 Translate"):
             with gr.Row():
                 uni_tgt = gr.Dropdown(ALL_LANGS, value="Minimax-ASCII", label="Target")
             uni_text = gr.Textbox(lines=3, label="Text", placeholder="e.g., Hello, how are you?", show_copy_button=True)
             with gr.Row():
+                uni_drop = gr.Checkbox(True,  label="Drop articles (ES/EN → conlang)")
+                uni_zero = gr.Checkbox(False, label="Zero copula (present affirm.)")
+                uni_rmpr = gr.Checkbox(False, label="Remove pronouns")
+                uni_maxc = gr.Checkbox(False, label="Max Exact Compression (sidecar `~...`)")
             uni_mode = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
             with gr.Row():
+                btn_tr = gr.Button("🚀 Translate", variant="primary")
+                btn_tr_cl = gr.Button("🧹 Clear")
             uni_out = gr.Textbox(lines=6, label="Translation", show_copy_button=True)
+            btn_tr.click(universal_translate,
+                         [uni_text, uni_src, uni_tgt, uni_drop, uni_zero, uni_mode, uni_maxc, uni_rmpr],
+                         [uni_out])
+            btn_tr_cl.click(lambda: ("",""), None, [uni_text, uni_out])
+            with gr.Accordion("Quick help (what does this button do?)", open=False):
+                gr.Markdown(EXPLAIN_TAB_TRANSLATE_EN + "\n\n" + COMPACT_EN)
         with gr.Tab("🛠️ Build (ES/EN → Conlang)"):
             with gr.Row():
                 target   = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label="Conlang")
             text_in = gr.Textbox(lines=3, label="Sentence", show_copy_button=True)
             with gr.Row():
+                drop_articles  = gr.Checkbox(True,  label="Drop articles")
+                zero_copula    = gr.Checkbox(False, label="Zero copula (present affirm.)")
+                rm_pron_build  = gr.Checkbox(False, label="Remove pronouns")
+                max_comp_build = gr.Checkbox(False, label="Max Exact Compression")
             mode_build = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
             with gr.Row():
+                btn_b = gr.Button("🏗️ Build", variant="primary")
+                btn_b_cl = gr.Button("🧹 Clear")
             out = gr.Textbox(lines=6, label="Output", show_copy_button=True)
+            btn_b.click(build_sentence,
+                        [text_in, src_lang, target, drop_articles, zero_copula, mode_build, max_comp_build, rm_pron_build],
+                        [out])
+            btn_b_cl.click(lambda: ("",""), None, [text_in, out])
+            with gr.Accordion("Quick help (what does this button do?)", open=False):
+                gr.Markdown(EXPLAIN_TAB_BUILD_EN + "\n\n" + COMPACT_EN)
         with gr.Tab("🗝️ Decode (Conlang → ES/EN)"):
             with gr.Row():
                 return decode_simple(strip_custom_sidecar(strip_sidecar_b85(text)), src, tgt)
             with gr.Row():
+                btn_d = gr.Button("🔓 Decode", variant="primary")
+                btn_d_cl = gr.Button("🧹 Clear")
+            btn_d.click(decode_lossless_aware, [code_in, src_code, tgt_lang], [out3])
+            btn_d_cl.click(lambda: ("",""), None, [code_in, out3])
+            with gr.Accordion("Quick help (what does this button do?)", open=False):
+                gr.Markdown(EXPLAIN_TAB_DECODE_EN)
         with gr.Tab("🔄 Round-trip"):
             with gr.Row():
                 rt_src = gr.Dropdown(["Español","English"], value="English", label="Source")
                 rt_tgt = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label="Conlang")
             rt_text = gr.Textbox(lines=3, label="Sentence", show_copy_button=True)
+            rt_max_comp = gr.Checkbox(False, label="Max Exact Compression")
             rt_mode = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
             rt_out_conlang = gr.Textbox(lines=3, label="Outward (conlang)", show_copy_button=True)
             rt_out_back    = gr.Textbox(lines=3, label="Back", show_copy_button=True)
             with gr.Row():
                 btn_rt = gr.Button("▶️ Test", variant="primary")
+                btn_rt_cl = gr.Button("🧹 Clear")
             btn_rt.click(round_trip, [rt_text, rt_src, rt_tgt, rt_mode, rt_max_comp], [rt_out_conlang, rt_out_back])
+            btn_rt_cl.click(lambda: ("","",""), None, [rt_text, rt_out_conlang, rt_out_back])
+            with gr.Accordion("Quick help (what does this button do?)", open=False):
+                gr.Markdown(EXPLAIN_TAB_ROUNDTRIP_EN)
+    return g
+# ================================ App ================================
 with gr.Blocks(title="Universal Conlang Translator", theme=gr.themes.Soft()) as demo:
     gr.Markdown("## 🌍 Idioma / Language")
+    lang_select = gr.Radio(["ES","EN"], value="ES", label="Selecciona / Select")
     group_es = make_group_es()
     group_en = make_group_en()
     def switch_lang(code):
         if code == "EN":