Spaces:

LoloSemper
/

new_language_maximum_efficiency2

Sleeping

App Files Files Community

LoloSemper commited on Oct 6, 2025

Commit

de90cf7

verified ·

1 Parent(s): eff6688

Update app.py

Browse files

Files changed (1) hide show

app.py +208 -586

app.py CHANGED Viewed

@@ -1,585 +1,171 @@
-# app.py — Universal Conlang Translator (Max Compresión Exacta)
-# Archivos requeridos en la raíz:
-#  - lexicon_minimax.json
-#  - lexicon_komin.json
-#  - lexicon_master.json
-#
-# requirements.txt (para HF Spaces):
-# gradio>=4.36.0
-# spacy>=3.7.4
-# es_core_news_sm @ https://github.com/explosion/spacy-models/releases/download/es_core_news_sm-3.7.0/es_core_news_sm-3.7.0-py3-none-any.whl
-# en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl
-import os, re, json, base64, zlib
-from typing import Dict, Optional, List, Any
-import gradio as gr
-# ------------ Archivos esperados ------------
-LEX_MINI   = "lexicon_minimax.json"
-LEX_KOMI   = "lexicon_komin.json"
-LEX_MASTER = "lexicon_master.json"
-# ------------ Normalización ------------
-WORD_RE = re.compile(r"[A-Za-zÁÉÍÓÚÜÑáéíóúüñ]+", re.UNICODE)
-STRIP = str.maketrans("ÁÉÍÓÚÜÑáéíóúüñ", "AEIOUUNaeiouun")
-def norm_es(w: str) -> str: return re.sub(r"[^a-záéíóúüñ]", "", (w or "").lower()).translate(STRIP)
-def norm_en(w: str) -> str: return re.sub(r"[^a-z]", "", (w or "").lower())
-# ------------ Carga de léxicos ------------
-def load_json(path: str):
-    if not os.path.exists(path): return None
-    with open(path, "r", encoding="utf-8") as f: return json.load(f)
-def load_lexicons():
-    mm = load_json(LEX_MINI) or {}
-    kk = load_json(LEX_KOMI) or {}
-    master = load_json(LEX_MASTER) or {}
-    es2mini = mm.get("mapping", {})
-    es2komi = kk.get("mapping", {})
-    mini2es = {v:k for k,v in es2mini.items()}
-    komi2es = {v:k for k,v in es2komi.items()}
-    es2en_lemma: Dict[str,str] = {}
-    en2es_lemma: Dict[str,str] = {}
-    en2mini, en2komi = {}, {}
-    mini2en, komi2en = {}, {}
-    if isinstance(master, dict) and "entries" in master:
-        for e in master["entries"]:
-            es = norm_es(str(e.get("lemma_es",""))); en = norm_en(str(e.get("lemma_en","")))
-            mi = str(e.get("minimax","")); ko = str(e.get("komin",""))
-            if es and en:
-                es2en_lemma.setdefault(es, en); en2es_lemma.setdefault(en, es)
-            if en and mi: en2mini.setdefault(en, mi)
-            if en and ko: en2komi.setdefault(en, ko)
-    mini2en = {v:k for k,v in en2mini.items()}
-    komi2en = {v:k for k,v in en2komi.items()}
-    return (es2mini, es2komi, mini2es, komi2es,
-            en2mini, en2komi, mini2en, komi2en,
-            es2en_lemma, en2es_lemma, master)
-(ES2MINI, ES2KOMI, MINI2ES, KOMI2ES,
- EN2MINI, EN2KOMI, MINI2EN, KOMI2EN,
- ES2EN_LEMMA, EN2ES_LEMMA, MASTER_OBJ) = load_lexicons()
-# ------------ Pronombres (para “Quitar pronombres”) ------------
-PRON_ES = {"yo","tú","vos","usted","él","ella","nosotros","vosotros","ustedes","ellos","ellas","me","te","se","nos","os"}
-PRON_EN = {"i","you","he","she","it","we","they","me","him","her","us","them"}
-# ------------ OOV reversible (Semi-lossless) ------------
-ALPHA_MINI64 = "@ptkmnslraeiouy0123456789><=:/!?.+-_*#bcdfghjvqwxzACEGHIJKLMNOPRS"[:64]
-CJK_BASE = (
-    "天地人日月山川雨風星火水木土金石光影花草鳥犬猫魚"
-    "東西南北中外上下午夜明暗手口目耳心言書家道路門"
-    "大小長短早晚高低新古青紅白黒金銀銅玉米茶酒米"
-    "文学楽音画体気電海空森林雪雲砂島橋城村国自由静"
-)
-ALPHA_CJK64 = (CJK_BASE * 2)[:64]
-def to_custom_b64(b: bytes, alphabet: str) -> str:
-    std = base64.b64encode(b).decode("ascii")
-    trans = str.maketrans("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", alphabet)
-    return std.translate(trans).rstrip("=")
-def from_custom_b64(s: str, alphabet: str) -> bytes:
-    trans = str.maketrans(alphabet, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
-    std = s.translate(trans); pad = "=" * ((4 - len(std) % 4) % 4)
-    return base64.b64decode(std + pad)
-def enc_oov_minimax(token: str) -> str: return "~" + to_custom_b64(token.encode("utf-8"), ALPHA_MINI64)
-def dec_oov_minimax(code: str) -> str:
-    try: return from_custom_b64(code[1:], ALPHA_MINI64).decode("utf-8")
-    except Exception: return code
-def enc_oov_komin(token: str) -> str:   return "「" + to_custom_b64(token.encode("utf-8"), ALPHA_CJK64) + "」"
-def dec_oov_komin(code: str) -> str:
-    try: return from_custom_b64(code[1:-1], ALPHA_CJK64).decode("utf-8")
-    except Exception: return code
-def is_oov_minimax(code: str) -> bool: return code.startswith("~") and len(code) > 1
-def is_oov_komin(code: str) -> bool:   return len(code) >= 2 and code.startswith("「") and code.endswith("」")
-# ------------ spaCy opcional ------------
-USE_SPACY = False
-try:
-    import spacy
-    try:
-        nlp_es = spacy.load("es_core_news_sm"); nlp_en = spacy.load("en_core_web_sm"); USE_SPACY = True
-    except Exception:
-        nlp_es = nlp_en = None
-except Exception:
-    nlp_es = nlp_en = None
-def lemma_of(tok, src_lang: str) -> str:
-    if src_lang == "Español":
-        return norm_es(tok.lemma_ if getattr(tok,"lemma_","") else tok.text)
-    else:
-        return norm_en(tok.lemma_ if getattr(tok,"lemma_","") else tok.text)
-# ------------ Detección simple ------------
-def detect_polarity(doc) -> bool: return "?" in getattr(doc,"text","")
-def detect_neg(doc) -> bool:
-    for t in doc:
-        if getattr(t,"dep_","")=="neg" or getattr(t,"lower_","").lower() in ("no","not","n't"):
-            return True
-    return False
-def detect_tense(root):
-    m = str(getattr(root,"morph",""))
-    if "Tense=Past" in m: return "Past"
-    if "Tense=Fut"  in m: return "Fut"
-    if "Tense=Pres" in m: return "Pres"
-    for c in getattr(root,"children",[]):
-        if getattr(c,"pos_","")=="AUX":
-            cm = str(getattr(c,"morph",""))
-            if "Tense=Past" in cm: return "Past"
-            if getattr(c,"lower_","").lower()=="will": return "Fut"
-    return "Pres"
-def extract_core(doc):
-    tokens = list(doc)
-    root = next((t for t in tokens if getattr(t,"dep_","")=="ROOT" and getattr(t,"pos_","") in ("VERB","AUX")), tokens[0] if tokens else doc)
-    subs, objs, obls, advs = [], [], [], []
-    for t in getattr(root,"children",[]):
-        dep = getattr(t,"dep_",""); pos = getattr(t,"pos_","")
-        if dep in ("nsubj","nsubj:pass","csubj"): subs.append(t)
-        elif dep in ("obj","dobj","iobj"):        objs.append(t)
-        elif dep in ("obl","pobj"):               obls.append(t)
-        elif dep in ("advmod","advcl") and pos=="ADV": advs.append(t)
-    for arr in (subs,objs,obls,advs): arr.sort(key=lambda x: getattr(x,"i",0))
-    return root, subs, objs, obls, advs
-def _person_of_doc(doc, src_lang: str) -> Optional[str]:
-    try:
-        tokens = list(doc)
-        root = next((t for t in tokens if getattr(t,"dep_","")=="ROOT"), tokens[0])
-        subj = next((t for t in getattr(root,"children",[]) if getattr(t,"dep_","").startswith("nsubj")), None)
-        if subj is None: return None
-        plur = ("Number=Plur" in str(getattr(subj,"morph",""))) if src_lang=="Español" else (getattr(subj,"tag_","") in ("NNS","NNPS"))
-        low = getattr(subj,"lower_","").lower()
-        if src_lang=="Español":
-            if low in ("yo",): return "1p" if plur else "1s"
-            if low in ("tú","vos"): return "2p" if plur else "2s"
-            if low in ("usted","él","ella"): return "3p" if plur else "3s"
-            lem = lemma_of(subj, "Español")
-            if lem in ("yo","nosotros"): return "1p" if plur else "1s"
-            if lem in ("tú","vosotros"): return "2p" if plur else "2s"
-            return "3p" if plur else "3s"
-        else:
-            if low in ("i",): return "1p" if plur else "1s"
-            if low in ("you",): return "2p" if plur else "2s"
-            if low in ("he","she","it"): return "3p" if plur else "3s"
-            return "3p" if plur else "3s"
-    except Exception:
-        return None
-def detect_person(root, src_lang: str) -> Optional[str]:
-    m = str(getattr(root,"morph","")); person_str, number_str = "3","s"
-    if "Person=" in m:
-        for feat in m.split("|"):
-            if feat.startswith("Person="): person_str = feat.split("=")[1]
-            elif feat.startswith("Number="): number_str = "p" if feat.split("=")[1]=="Plur" else "s"
-        return person_str + number_str
-    return _person_of_doc(root.doc, src_lang)
-# ------------ Mapeo y fraseadores ------------
-def code_es(lemma: str, target: str) -> str:
-    lemma = norm_es(lemma)
-    if target=="Minimax-ASCII":
-        return ES2MINI.get(lemma) or enc_oov_minimax(lemma)
-    return ES2KOMI.get(lemma) or enc_oov_komin(lemma)
-def code_en(lemma: str, target: str) -> str:
-    lemma = norm_en(lemma)
-    if target=="Minimax-ASCII":
-        return (EN2MINI.get(lemma) if EN2MINI else None) or enc_oov_minimax(lemma)
-    return (EN2KOMI.get(lemma) if EN2KOMI else None) or enc_oov_komin(lemma)
-TAM_MINI = {"Pres":"P","Past":"T","Fut":"F","UNK":"P"}
-TAM_KOMI = {"Pres":"Ⓟ","Past":"Ⓣ","Fut":"Ⓕ","UNK":"Ⓟ"}
-def realize_minimax(doc, src_lang: str, drop_articles=True, zero_copula=True,
-                    semi_lossless=False, person_hint="2s", remove_pronouns=False):
-    root, subs, objs, obls, advs = extract_core(doc)
-    tense = detect_tense(root); is_q, is_neg = detect_polarity(doc), detect_neg(doc)
-    vlem  = lemma_of(root, src_lang) if USE_SPACY else ("ser" if "?" in getattr(doc,"text","") else "estar")
-    vcode = code_es(vlem, "Minimax-ASCII") if src_lang=="Español" else code_en(vlem, "Minimax-ASCII")
-    tail = TAM_MINI.get(tense, "P")
-    if semi_lossless: tail += (detect_person(root, src_lang) or person_hint)
-    if is_neg: tail += "N";
-    if is_q:   tail += "Q"
-    if tail: vcode = f"{vcode}·{tail}"
-    def realize_np(tokens):
-        outs=[]
-        for t in tokens:
-            if remove_pronouns:
-                txt = (getattr(t,"text","") or "").lower()
-                if (src_lang=="Español" and txt in PRON_ES) or (src_lang=="English" and txt in PRON_EN): continue
-            lem = lemma_of(t, src_lang) if USE_SPACY else getattr(t,"text","")
-            outs.append(code_es(lem,"Minimax-ASCII") if src_lang=="Español" else code_en(lem,"Minimax-ASCII"))
-        return outs
-    S = realize_np(subs); O = realize_np(objs)+realize_np(obls)
-    ADV=[]
-    for a in advs:
-        lem = lemma_of(a, src_lang) if USE_SPACY else getattr(a,"text","")
-        ADV.append(code_es(lem,"Minimax-ASCII") if src_lang=="Español" else code_en(lem,"Minimax-ASCII"))
-    parts = S+O+ADV if (zero_copula and not semi_lossless and vlem in ("ser","estar","be") and tense=="Pres" and not is_neg and not is_q) else [vcode]+S+O+ADV
-    return " ".join(p for p in parts if p)
-def realize_komin(doc, src_lang: str, drop_articles=True, zero_copula=True,
-                  semi_lossless=False, person_hint="2s", remove_pronouns=False):
-    root, subs, objs, obls, advs = extract_core(doc)
-    tense, is_q, is_neg = detect_tense(root), detect_polarity(doc), detect_neg(doc)
-    vlem  = lemma_of(root, src_lang) if USE_SPACY else ("ser" if "?" in getattr(doc,"text","") else "estar")
-    vcode = code_es(vlem, "Kōmín-CJK") if src_lang=="Español" else code_en(vlem, "Kōmín-CJK")
-    P_SUBJ, P_OBJ = "ᵖ", "ᵒ"; NEG_M, Q_FIN = "̆", "？"
-    TAM = TAM_KOMI.get(tense,"Ⓟ")
-    if semi_lossless: TAM = TAM + f"[{detect_person(root, src_lang) or person_hint}]"
-    def realize_np(tokens, particle):
-        outs=[]
-        for t in tokens:
-            if remove_pronouns:
-                txt = (getattr(t,"text","") or "").lower()
-                if (src_lang=="Español" and txt in PRON_ES) or (src_lang=="English" and txt in PRON_EN): continue
-            lem = lemma_of(t, src_lang) if USE_SPACY else getattr(t,"text","")
-            outs.append((code_es(lem,"Kōmín-CJK") if src_lang=="Español" else code_en(lem,"Kōmín-CJK")) + particle)
-        return outs
-    S = realize_np(subs, P_SUBJ); O = realize_np(objs+obls, P_OBJ)
-    ADV=[]
-    for a in advs:
-        lem = lemma_of(a, src_lang) if USE_SPACY else getattr(a,"text","")
-        ADV.append(code_es(lem,"Kōmín-CJK") if src_lang=="Español" else code_en(lem,"Kōmín-CJK"))
-    parts = S+O+ADV if (zero_copula and not semi_lossless and vlem in ("ser","estar","be") and tense=="Pres" and not is_neg and not is_q) else S+O+ADV+[vcode+TAM+("̆" if is_neg else "")]
-    out = " ".join(parts)
-    if is_q: out += " " + Q_FIN
-    return out
-# ------------ Sidecars (compresión exacta) ------------
-SIDECAR_B85_RE = re.compile(r"\s?§\((?P<b85>[A-Za-z0-9!#$%&()*+\-;<=>?@^_`{|}~]+)\)$")
-def b85_enc_raw(s: str) -> str: return base64.a85encode(zlib.compress(s.encode("utf-8"), 9), adobe=False).decode("ascii")
-def b85_dec_raw(b85s: str) -> str: return zlib.decompress(base64.a85decode(b85s.encode("ascii"), adobe=False)).decode("utf-8")
-def attach_sidecar_b85(conlang_text: str, original_text: str) -> str: return f"{conlang_text} §({b85_enc_raw(original_text)})"
-def extract_sidecar_b85(text: str) -> Optional[str]:
-    m = SIDECAR_B85_RE.search(text);
-    if not m: return None
-    try: return b85_dec_raw(m.group("b85"))
-    except Exception: return None
-def strip_sidecar_b85(text: str) -> str: return SIDECAR_B85_RE.sub("", text).rstrip()
-def custom_sidecar_enc(conlang_text: str, original_text: str) -> str:
-    blob = to_custom_b64(zlib.compress(original_text.encode("utf-8"), 9), ALPHA_MINI64)
-    return f"{conlang_text} ~{blob}"
-def extract_custom_sidecar(text: str) -> Optional[str]:
-    if '~' in text:
-        core, blob = text.rsplit('~', 1)
-        try: return zlib.decompress(from_custom_b64(blob, ALPHA_MINI64)).decode("utf-8")
-        except Exception: return None
-    return None
-def strip_custom_sidecar(text: str) -> str: return text.split('~')[0].rstrip() if '~' in text else text
-# ------------ Codificación / decodificación simple ------------
-def encode_simple(text: str, src_lang: str, target: str) -> str:
-    if not text.strip(): return ""
-    def repl_es(m):
-        key = norm_es(m.group(0))
-        code = ES2MINI.get(key) if target=="Minimax-ASCII" else ES2KOMI.get(key)
-        return code or (enc_oov_minimax(m.group(0)) if target=="Minimax-ASCII" else enc_oov_komin(m.group(0)))
-    def repl_en(m):
-        key = norm_en(m.group(0)); table = EN2MINI if target=="Minimax-ASCII" else EN2KOMI
-        if table and key in table: return table[key]
-        return enc_oov_minimax(m.group(0)) if target=="Minimax-ASCII" else enc_oov_komin(m.group(0))
-    repl = repl_es if src_lang=="Español" else repl_en
-    return WORD_RE.sub(repl, text)
-def pluralize_es(word: str) -> str:
-    exceptions = {"uno":"unos","buen":"buenos","hombre":"hombres"}
-    if word in exceptions: return exceptions[word]
-    if word.endswith("z"): return word[:-1]+"ces"
-    if word.endswith(("a","e","i","o")): return word+"s"
-    return word+"es"
-def pluralize_en(word: str) -> str:
-    exceptions = {"man":"men","woman":"women","child":"children"}
-    if word in exceptions: return exceptions[word]
-    if word.endswith("y") and len(word)>1 and word[-2] not in "aeiou": return word[:-1]+"ies"
-    if word.endswith(("s","sh","ch","x","z")): return word+"es"
-    return word+"s"
-def pluralize(word: str, tgt_lang: str) -> str: return pluralize_es(word) if tgt_lang=="Español" else pluralize_en(word)
-mini_tail_re = re.compile(r"^(?P<stem>.+?)·(?P<tail>[PTFNQ12sp]+)$")
-def decode_simple(text: str, source: str, tgt_lang: str) -> str:
-    if not text.strip(): return ""
-    code2es = MINI2ES if source=="Minimax-ASCII" else KOMI2ES
-    code2en = MINI2EN if source=="Minimax-ASCII" else KOMI2EN
-    if source=="Kōmín-CJK":
-        text = text.replace("？","?").replace(" "," ")
-        return " ".join([code2es.get(w,w) for w in text.split() if w!="?"])
-    tokens = text.split();
-    if not tokens: return ""
-    lemma_tokens, pl_flags = [], []; verb_idx=-1; verb_lemma=None; verb_tense="Pres"; verb_person="3s"; has_q=False; is_neg=False
-    for part in tokens:
-        look = part.replace("[PL]",""); had_pl = "[PL]" in part; pl_flags.append(had_pl)
-        m = mini_tail_re.match(look)
-        if m:
-            verb_idx = len(lemma_tokens); stem=m.group("stem"); tail=m.group("tail")
-            vlem_es = code2es.get(stem); vlem_en = code2en.get(stem) if code2en else None
-            vlem = vlem_es if tgt_lang=="Español" else (vlem_en or vlem_es or stem)
-            if not vlem: vlem = dec_oov_minimax(stem) if is_oov_minimax(stem) else stem
-            lemma_tokens.append(vlem); pl_flags.append(False)
-            if tail:
-                if tail[0] in "PTF":
-                    verb_tense = {"P":"Pres","T":"Past","F":"Fut"}[tail[0]]; pos=1
-                    if len(tail)>pos and tail[pos] in "123":
-                        pos+=1; verb_person = tail[pos-1] + (tail[pos] if len(tail)>pos and tail[pos] in "sp" else "s")
-                        if len(tail)>pos and tail[pos] in "sp": pos+=1
-                    is_neg = "N" in tail[pos:]; has_q = "Q" in tail[pos:]
-            verb_lemma = vlem; continue
-        w_es = code2es.get(look); w_en = code2en.get(look) if code2en else None
-        w = w_es if tgt_lang=="Español" else (w_en or w_es or look)
-        if not w: w = dec_oov_minimax(look) if is_oov_minimax(look) else look
-        lemma_tokens.append(w); pl_flags.append(had_pl)
-    out_parts=[]
-    for idx, lem in enumerate(lemma_tokens):
-        if idx==verb_idx:
-            v_conj = _es_conj(verb_lemma, verb_tense, verb_person) if tgt_lang=="Español" else _en_conj(verb_lemma, verb_tense, verb_person)
-            if is_neg: v_conj = ("no " if tgt_lang=="Español" else "not ") + v_conj
-            out_parts.append(v_conj)
-        else:
-            out_parts.append(pluralize(lem, tgt_lang) if pl_flags[idx] else lem)
-    out_text = " ".join(out_parts)
-    if has_q:
-        start_q = "¿" if tgt_lang=="Español" else ""
-        out_text = f"{start_q}{out_text.capitalize()}?"
-    return out_text
-# ------------ Conjugadores mínimos ------------
-def _es_conj_regular(lemma, tense, person):
-    if not lemma.endswith(("ar","er","ir")): return lemma
-    stem, vtype = lemma[:-2], lemma[-2:]
-    pres={"ar":{"1s":"o","2s":"as","3s":"a","1p":"amos","2p":"áis","3p":"an"},
-          "er":{"1s":"o","2s":"es","3s":"e","1p":"emos","2p":"éis","3p":"en"},
-          "ir":{"1s":"o","2s":"es","3s":"e","1p":"imos","2p":"ís","3p":"en"}}
-    pret={"ar":{"1s":"é","2s":"aste","3s":"ó","1p":"amos","2p":"asteis","3p":"aron"},
-          "er":{"1s":"í","2s":"iste","3s":"ió","1p":"imos","2p":"isteis","3p":"ieron"},
-          "ir":{"1s":"í","2s":"iste","3s":"ió","1p":"imos","2p":"isteis","3p":"ieron"}}
-    fut={"1s":"é","2s":"ás","3s":"á","1p":"emos","2p":"éis","3p":"án"}
-    if tense=="Pres": return stem + pres[vtype].get(person, pres[vtype]["3s"])
-    if tense=="Past": return stem + pret[vtype].get(person, pret[vtype]["3s"])
-    return lemma + fut.get(person, fut["3s"])
-def _es_conj(lemma, tense, person):
-    if lemma=="ser":
-        tab={"Pres":{"1s":"soy","2s":"eres","3s":"es","1p":"somos","2p":"sois","3p":"son"},
-             "Past":{"1s":"fui","2s":"fuiste","3s":"fue","1p":"fuimos","2p":"fuisteis","3p":"fueron"},
-             "Fut":{"1s":"seré","2s":"serás","3s":"será","1p":"seremos","2p":"seréis","3p":"serán"}}
-        return tab[tense].get(person, tab[tense]["3s"])
-    if lemma=="estar":
-        tab={"Pres":{"1s":"estoy","2s":"estás","3s":"está","1p":"estamos","2p":"estáis","3p":"están"},
-             "Past":{"1s":"estuve","2s":"estuviste","3s":"estuvo","1p":"estuvimos","2p":"estuvisteis","3p":"estuvieron"},
-             "Fut":{"1s":"estaré","2s":"estarás","3s":"estará","1p":"estaremos","2p":"estaréis","3p":"estarán"}}
-        return tab[tense].get(person, tab[tense]["3s"])
-    if lemma=="ir":
-        tab={"Pres":{"1s":"voy","2s":"vas","3s":"va","1p":"vamos","2p":"vais","3p":"van"},
-             "Past":{"1s":"fui","2s":"fuiste","3s":"fue","1p":"fuimos","2p":"fuisteis","3p":"fueron"},
-             "Fut":{"1s":"iré","2s":"irás","3s":"irá","1p":"iremos","2p":"iréis","3p":"irán"}}
-        return tab[tense].get(person, tab[tense]["3s"])
-    return _es_conj_regular(lemma, tense, person)
-def _en_conj(lemma, tense, person):
-    if lemma=="be":
-        if tense=="Pres": return {"1s":"am","2s":"are","3s":"is","1p":"are","2p":"are","3p":"are"}.get(person,"is")
-        if tense=="Past": return {"1s":"was","2s":"were","3s":"was","1p":"were","2p":"were","3p":"were"}.get(person,"was")
-        return "be"
-    if lemma=="have":
-        if tense=="Pres": return "has" if person=="3s" else "have"
-        if tense=="Past": return "had"
-        return "have"
-    if lemma=="go":
-        if tense=="Past": return "went"
-        return "goes" if (tense=="Pres" and person=="3s") else "go"
-    if lemma=="do":
-        if tense=="Past": return "did"
-        return "does" if (tense=="Pres" and person=="3s") else "do"
-    if tense=="Pres":
-        if person=="3s":
-            if lemma.endswith("y") and (len(lemma)<2 or lemma[-2] not in "aeiou"): return lemma[:-1]+"ies"
-            if lemma.endswith(("s","sh","ch","x","z","o")): return lemma+"es"
-            return lemma+"s"
-        return lemma
-    if tense=="Past":
-        if lemma.endswith("e"): return lemma+"d"
-        if lemma.endswith("y") and (len(lemma)<2 or lemma[-2] not in "aeiou"): return lemma[:-1]+"ied"
-        return lemma+"ed"
-    return lemma
-# ------------ Rutas principales ------------
-def _build_with_spacy(text: str, src_lang: str, target: str,
-                      drop_articles: bool, zero_copula: bool, semi_lossless: bool,
-                      remove_pronouns: bool) -> str:
-    nlp = nlp_es if src_lang=="Español" else nlp_en
-    doc = nlp(text)
-    if target=="Minimax-ASCII":
-        return realize_minimax(doc, src_lang, drop_articles, zero_copula, semi_lossless=True, remove_pronouns=remove_pronouns)
-    else:
-        return realize_komin(doc, src_lang, drop_articles, zero_copula, semi_lossless=True, remove_pronouns=remove_pronouns)
-def build_sentence(text: str, src_lang: str, target: str,
-                   drop_articles: bool, zero_copula: bool, mode: str,
-                   max_comp_exact: bool = False, remove_pronouns: bool = False) -> str:
-    if not text.strip(): return ""
-    if USE_SPACY:
-        core = _build_with_spacy(text, src_lang, target, drop_articles, zero_copula, True, remove_pronouns)
-    else:
-        if remove_pronouns:
-            pron = PRON_ES if src_lang=="Español" else PRON_EN
-            tokens = re.findall(r"\w+|[^\w\s]+", text)
-            text = " ".join([w for w in tokens if w.lower() not in pron])
-        core = encode_simple(text, src_lang, target)
-    return custom_sidecar_enc(core, text) if max_comp_exact else core
-def universal_translate(text: str, src: str, tgt: str,
-                        drop_articles: bool, zero_copula: bool,
-                        mode: str, max_comp_exact: bool = False,
-                        remove_pronouns: bool = False) -> str:
-    if not text.strip(): return ""
-    if src == tgt: return text
-    if src in ("Español","English") and tgt in ("Minimax-ASCII","Kōmín-CJK"):
-        return build_sentence(text, src, tgt, drop_articles, zero_copula, mode, max_comp_exact, remove_pronouns)
-    if src in ("Minimax-ASCII","Kōmín-CJK") and tgt in ("Español","English"):
-        orig = extract_custom_sidecar(text)
-        if orig is not None: return orig
-        orig = extract_sidecar_b85(text)
-        if orig is not None: return orig
-        return decode_simple(strip_custom_sidecar(strip_sidecar_b85(text)), src, tgt)
-    if src in ("Español","English") and tgt in ("Español","English"):
-        return translate_natural(text, src, tgt)
-    if src in ("Minimax-ASCII","Kōmín-CJK") and tgt in ("Minimax-ASCII","Kōmín-CJK"):
-        orig = extract_custom_sidecar(text)
-        if orig is not None:
-            core = strip_custom_sidecar(text)
-            es_lemmas = decode_simple(core, src, "Español")
-            words = re.findall(r"\w+|[^\w\s]+", es_lemmas); out=[]
-            for w in words:
-                if re.fullmatch(r"\w+", w):
-                    code = ES2MINI.get(norm_es(w)) if tgt=="Minimax-ASCII" else ES2KOMI.get(norm_es(w))
-                    out.append(code or (enc_oov_minimax(w) if tgt=="Minimax-ASCII" else enc_oov_komin(w)))
-                else: out.append(w)
-            return custom_sidecar_enc(" ".join(out), orig)
-        es_lemmas = decode_simple(text, src, "Español")
-        words = re.findall(r"\w+|[^\w\s]+", es_lemmas); out=[]
-        for w in words:
-            if re.fullmatch(r"\w+", w):
-                code = ES2MINI.get(norm_es(w)) if tgt=="Minimax-ASCII" else ES2KOMI.get(norm_es(w))
-                out.append(code or (enc_oov_minimax(w) if tgt=="Minimax-ASCII" else enc_oov_komin(w)))
-            else: out.append(w)
-        return " ".join(out)
-    return "[No soportado]"
-def translate_natural(text: str, src_lang: str, tgt_lang: str) -> str:
-    if not text.strip(): return ""
-    if not USE_SPACY: return text
-    nlp = nlp_es if src_lang=="Español" else nlp_en
-    doc = nlp(text); out=[]
-    for t in doc:
-        if not getattr(t,"is_alpha",False): out.append(getattr(t,"text","")); continue
-        lem = lemma_of(t, src_lang)
-        if src_lang=="Español":
-            tr = ES2EN_LEMMA.get(lem); out.append(tr if tr else lem)
-        else:
-            tr = EN2ES_LEMMA.get(lem); out.append(tr if tr else lem)
-    return " ".join(out)
-def round_trip(text, src, tgt, mode, max_comp_exact):
-    conlang = universal_translate(text, src, tgt, True, False, mode, max_comp_exact, False)
-    back    = universal_translate(conlang, tgt, src, True, False, mode, max_comp_exact, False)
-    return conlang, back
 # =====================================================================================
 # ========================= UI bilingüe y explicaciones claras ========================
 # =====================================================================================
 ALL_LANGS = ["Español","English","Minimax-ASCII","Kōmín-CJK"]
-# Secciones de ayuda (ES/EN) — todas en el MISMO nivel, como acordeones
-COMPACT_ES = """
-**📏 Compactación orientativa (haz clic para desplegar)**
-- Sin casillas: **0%**
-- Omitir artículos: **~10–15%**
-- Cópula cero (presente afirm.): **~5–10%**
-- Ambas (artículos + cópula): **~15–20%**
-- Máx. Compresión Exacta: **~40–60%** en textos >100 caracteres (con `~...`). En textos muy cortos puede no reducir.
-"""
-COMPACT_EN = """
-**📏 Typical compaction (click to expand)**
-- No options: **0%**
-- Drop articles: **~10–15%**
-- Zero copula (present affirmative): **~5–10%**
-- Both (articles + copula): **~15–20%**
-- Max Exact Compression: **~40–60%** for >100 chars (`~...`). Very short texts may not shrink.
-"""
 EXPLAIN_TAB_TRANSLATE_ES = """
-**🔁 Traducir (haz clic para desplegar)**
-Convierte el *Texto* al *Destino*. Funciona para **cualquier combinación**: Español, English, Minimax-ASCII, Kōmín-CJK.
-- **Máx. Compresión Exacta** añade `~...` con el original comprimido para poder **recuperarlo exactamente** al decodificar.
-- **Omitir artículos / Cópula cero / Quitar pronombres** se aplican **solo cuando el destino es conlang** (Minimax/Kōmín).
 """
 EXPLAIN_TAB_BUILD_ES = """
-**🛠️ Construir (ES/EN → Conlang) (haz clic para desplegar)**
-Fuerza la salida **en conlang** desde Español o Inglés aplicando reglas de fraseo (orden, partículas/TAM) y tus **checkbox**.
-Útil para ver cómo quedaría la frase **directamente en Minimax/Kōmín** sin ambigüedad de direcciones.
 """
 EXPLAIN_TAB_DECODE_ES = """
-**🗝️ Decodificar (Conlang → ES/EN) (haz clic para desplegar)**
-Convierte **Minimax/Kōmín** a **Español o Inglés**.
-- Si hay `~...`, devuelve el **original exacto**.
-- Sin `~...`, la vuelta es **semi-lossless** usando el léxico y pistas simples.
 """
 EXPLAIN_TAB_ROUNDTRIP_ES = """
-**🔄 Prueba ida→vuelta (haz clic para desplegar)**
-Ejecuta **(ES/EN → Conlang) → (Conlang → ES/EN)** para comprobar **reversibilidad**.
-Con **Máx. Compresión Exacta**, la vuelta coincide **bit a bit** con la entrada.
 """
 EXPLAIN_CHECKBOX_ES = """
-**☑️ ¿Qué hace cada checkbox? (haz clic para desplegar)**
-- **Omitir artículos**: quita *el/la/los/las* (ES) y *a/an/the* (EN) → **~10–15%**.
-- **Cópula cero (presente afirm.)**: esconde *ser/estar/be* cuando suena natural → **~5–10%** extra.
-- **Quitar pronombres**: elimina pronombres de sujeto/objeto **evidentes** (ahorro variable).
-- **Máx. Compresión Exacta**: añade `~...` con zlib para recuperación exacta (**~40–60%** en >100 caracteres).
-"""
-LEXICON_BUILD_ES = """
-**ℹ️ Léxico (OMW → Minimax/Kōmín) (haz clic para desplegar)**
-1) Desde **OMW/WordNet 1.4** se extraen **lemas ES** y sus **equivalentes EN** por sinset.
-2) Se normalizan y ordenan por **frecuencia** (wordfreq).
-3) Opcional: **spaCy** refina lemas; **Argos** puede rellenar EN faltantes.
-4) Se asignan **códigos compactos** con alfabetos barajados por **SEED** hasta `MAXLEN_MINI`/`MAXLEN_CJK`.
-5) Se exportan: `lexicon_minimax.json`, `lexicon_komin.json`, `lexicon_master.json` (+TSV).
-**Vista previa** de `lexicon_master.json` abajo.
 """
-# (EN) versiones cortas
 EXPLAIN_TAB_TRANSLATE_EN = """
-**🔁 Translate (click to expand)** — Converts *Text* to *Target* (any pair: Spanish/English/Minimax/Kōmín).
-With **Max Exact Compression**, appends `~...` to recover the **exact original**. Checkboxes apply when **target is conlang**.
 """
 EXPLAIN_TAB_BUILD_EN = """
-**🛠️ Build (ES/EN → Conlang) (click to expand)** — Forces conlang output (Minimax/Kōmín) with phrasing rules and your checkboxes.
 """
 EXPLAIN_TAB_DECODE_EN = """
-**🗝️ Decode (Conlang → ES/EN) (click to expand)** — If `~...` is present, returns the **bit-perfect original**; otherwise semi-lossless.
 """
 EXPLAIN_TAB_ROUNDTRIP_EN = """
-**🔄 Round-trip (click to expand)** — Runs (ES/EN → Conlang) → (Conlang → ES/EN) to verify reversibility.
 """
 EXPLAIN_CHECKBOX_EN = """
-**☑️ Checkboxes (click to expand)**
-- **Drop articles**: ~10–15%
-- **Zero copula (present affirm.)**: ~5–10% extra
-- **Remove pronouns**: variable
-- **Max Exact Compression**: ~40–60% for >100 chars (`~...`), exact recovery.
 """
 LEXICON_BUILD_EN = """
-**ℹ️ Lexicon (OMW → Minimax/Kōmín) (click to expand)** — OMW/WordNet ES lemmas + EN counterparts, normalized & frequency-sorted; optional spaCy/Argos; codes assigned with SEED-shuffled alphabets up to MAXLEN; exports JSON/TSV. Preview below.
 """
 def master_preview(n: int = 20) -> List[List[Any]]:
     try:
         entries = (MASTER_OBJ or {}).get("entries", [])
@@ -595,16 +181,22 @@ def master_preview(n: int = 20) -> List[List[Any]]:
 def make_group_es():
     with gr.Group(visible=True) as g:
         gr.Markdown("# 🌐 Universal Conlang Translator · Compresión Exacta (ES)")
-        # Acordeones de EXPLICACIÓN — todos al MISMO nivel
         with gr.Row():
             with gr.Column():
-                with gr.Accordion(EXPLAIN_TAB_TRANSLATE_ES, open=False): pass
-                with gr.Accordion(EXPLAIN_TAB_BUILD_ES, open=False): pass
-                with gr.Accordion(EXPLAIN_TAB_DECODE_ES, open=False): pass
-                with gr.Accordion(EXPLAIN_TAB_ROUNDTRIP_ES, open=False): pass
             with gr.Column():
-                with gr.Accordion(EXPLAIN_CHECKBOX_ES, open=False): gr.Markdown(COMPACT_ES)
-                with gr.Accordion(LEXICON_BUILD_ES, open=False):
                     n_rows = gr.Slider(5, 100, value=20, step=5, label="Filas a mostrar")
                     table = gr.Dataframe(headers=["lemma_es","lemma_en","minimax","komin"], row_count=1, interactive=False)
                     gr.Button("Actualizar vista").click(lambda n: master_preview(int(n)), [n_rows], [table])
@@ -626,14 +218,20 @@ def make_group_es():
                 btn_tr = gr.Button("🚀 Traducir", variant="primary")
                 btn_tr_cl = gr.Button("🧹 Limpiar")
             uni_out = gr.Textbox(lines=6, label="Traducción", show_copy_button=True)
-            btn_tr.click(universal_translate,
                          [uni_text, uni_src, uni_tgt, uni_drop, uni_zero, uni_mode, uni_maxc, uni_rmpr],
-                         [uni_out])
             btn_tr_cl.click(lambda: ("",""), None, [uni_text, uni_out])
-            with gr.Accordion("Ayuda rápida (¿qué hace este botón?)", open=False):
-                gr.Markdown(EXPLAIN_TAB_TRANSLATE_ES + "\n\n" + COMPACT_ES)
         with gr.Tab("🛠️ Construir (ES/EN → Conlang)"):
             with gr.Row():
@@ -650,14 +248,20 @@ def make_group_es():
                 btn_b = gr.Button("🏗️ Construir", variant="primary")
                 btn_b_cl = gr.Button("🧹 Limpiar")
             out = gr.Textbox(lines=6, label="Salida", show_copy_button=True)
-            btn_b.click(build_sentence,
                         [text_in, src_lang, target, drop_articles, zero_copula, mode_build, max_comp_build, rm_pron_build],
-                        [out])
             btn_b_cl.click(lambda: ("",""), None, [text_in, out])
-            with gr.Accordion("Ayuda rápida (¿qué hace este botón?)", open=False):
-                gr.Markdown(EXPLAIN_TAB_BUILD_ES + "\n\n" + COMPACT_ES)
         with gr.Tab("🗝️ Decodificar (Conlang → ES/EN)"):
             with gr.Row():
@@ -680,7 +284,7 @@ def make_group_es():
             btn_d.click(decode_lossless_aware, [code_in, src_code, tgt_lang], [out3])
             btn_d_cl.click(lambda: ("",""), None, [code_in, out3])
-            with gr.Accordion("Ayuda rápida (¿qué hace este botón?)", open=False):
                 gr.Markdown(EXPLAIN_TAB_DECODE_ES)
         with gr.Tab("🔄 Prueba ida→vuelta"):
@@ -699,7 +303,7 @@ def make_group_es():
             btn_rt.click(round_trip, [rt_text, rt_src, rt_tgt, rt_mode, rt_max_comp], [rt_out_conlang, rt_out_back])
             btn_rt_cl.click(lambda: ("","",""), None, [rt_text, rt_out_conlang, rt_out_back])
-            with gr.Accordion("Ayuda rápida (¿qué hace este botón?)", open=False):
                 gr.Markdown(EXPLAIN_TAB_ROUNDTRIP_ES)
     return g
@@ -708,13 +312,19 @@ def make_group_en():
         gr.Markdown("# 🌐 Universal Conlang Translator · Max Exact Compression (EN)")
         with gr.Row():
             with gr.Column():
-                with gr.Accordion(EXPLAIN_TAB_TRANSLATE_EN, open=False): pass
-                with gr.Accordion(EXPLAIN_TAB_BUILD_EN, open=False): pass
-                with gr.Accordion(EXPLAIN_TAB_DECODE_EN, open=False): pass
-                with gr.Accordion(EXPLAIN_TAB_ROUNDTRIP_EN, open=False): pass
             with gr.Column():
-                with gr.Accordion(EXPLAIN_CHECKBOX_EN, open=False): gr.Markdown(COMPACT_EN)
-                with gr.Accordion(LEXICON_BUILD_EN, open=False):
                     n_rows = gr.Slider(5, 100, value=20, step=5, label="Rows to show")
                     table = gr.Dataframe(headers=["lemma_es","lemma_en","minimax","komin"], row_count=1, interactive=False)
                     gr.Button("Refresh").click(lambda n: master_preview(int(n)), [n_rows], [table])
@@ -735,14 +345,20 @@ def make_group_en():
                 btn_tr = gr.Button("🚀 Translate", variant="primary")
                 btn_tr_cl = gr.Button("🧹 Clear")
             uni_out = gr.Textbox(lines=6, label="Translation", show_copy_button=True)
-            btn_tr.click(universal_translate,
                          [uni_text, uni_src, uni_tgt, uni_drop, uni_zero, uni_mode, uni_maxc, uni_rmpr],
-                         [uni_out])
             btn_tr_cl.click(lambda: ("",""), None, [uni_text, uni_out])
-            with gr.Accordion("Quick help (what does this button do?)", open=False):
-                gr.Markdown(EXPLAIN_TAB_TRANSLATE_EN + "\n\n" + COMPACT_EN)
         with gr.Tab("🛠️ Build (ES/EN → Conlang)"):
             with gr.Row():
@@ -759,14 +375,20 @@ def make_group_en():
                 btn_b = gr.Button("🏗️ Build", variant="primary")
                 btn_b_cl = gr.Button("🧹 Clear")
             out = gr.Textbox(lines=6, label="Output", show_copy_button=True)
-            btn_b.click(build_sentence,
                         [text_in, src_lang, target, drop_articles, zero_copula, mode_build, max_comp_build, rm_pron_build],
-                        [out])
             btn_b_cl.click(lambda: ("",""), None, [text_in, out])
-            with gr.Accordion("Quick help (what does this button do?)", open=False):
-                gr.Markdown(EXPLAIN_TAB_BUILD_EN + "\n\n" + COMPACT_EN)
         with gr.Tab("🗝️ Decode (Conlang → ES/EN)"):
             with gr.Row():
@@ -775,7 +397,7 @@ def make_group_en():
             code_in = gr.Textbox(lines=3, label="Conlang text (may include `~...`)", show_copy_button=True)
             out3 = gr.Textbox(lines=6, label="Output", show_copy_button=True)
-            def decode_lossless_aware(text, src, tgt):
                 orig = extract_custom_sidecar(text)
                 if orig is not None: return orig
                 orig = extract_sidecar_b85(text)
@@ -786,10 +408,10 @@ def make_group_en():
                 btn_d = gr.Button("🔓 Decode", variant="primary")
                 btn_d_cl = gr.Button("🧹 Clear")
-            btn_d.click(decode_lossless_aware, [code_in, src_code, tgt_lang], [out3])
             btn_d_cl.click(lambda: ("",""), None, [code_in, out3])
-            with gr.Accordion("Quick help (what does this button do?)", open=False):
                 gr.Markdown(EXPLAIN_TAB_DECODE_EN)
         with gr.Tab("🔄 Round-trip"):
@@ -808,7 +430,7 @@ def make_group_en():
             btn_rt.click(round_trip, [rt_text, rt_src, rt_tgt, rt_mode, rt_max_comp], [rt_out_conlang, rt_out_back])
             btn_rt_cl.click(lambda: ("","",""), None, [rt_text, rt_out_conlang, rt_out_back])
-            with gr.Accordion("Quick help (what does this button do?)", open=False):
                 gr.Markdown(EXPLAIN_TAB_ROUNDTRIP_EN)
     return g

 # =====================================================================================
 # ========================= UI bilingüe y explicaciones claras ========================
 # =====================================================================================
 ALL_LANGS = ["Español","English","Minimax-ASCII","Kōmín-CJK"]
+# ---- Bloques de explicación (cortos para TÍTULO + largos para CONTENIDO) ----
+ACC_TITLES_ES = {
+    "translate": "🔁 Traducir — ¿Qué hace? (haz clic para desplegar)",
+    "build":     "🛠️ Construir (ES/EN → Conlang) — ¿Qué hace?",
+    "decode":    "🗝️ Decodificar (Conlang → ES/EN) — ¿Qué hace?",
+    "roundtrip": "🔄 Prueba ida→vuelta — ¿Qué hace?",
+    "checkbox":  "☑️ Opciones y compactación (artículos, cópula, pronombres, exacta)",
+    "lexicon":   "ℹ️ Léxico (OMW → Minimax/Kōmín) — explicación y vista previa"
+}
+ACC_TITLES_EN = {
+    "translate": "🔁 Translate — What does it do? (click to expand)",
+    "build":     "🛠️ Build (ES/EN → Conlang) — What does it do?",
+    "decode":    "🗝️ Decode (Conlang → ES/EN) — What does it do?",
+    "roundtrip": "🔄 Round-trip — What does it do?",
+    "checkbox":  "☑️ Options & compaction (articles, copula, pronouns, exact)",
+    "lexicon":   "ℹ️ Lexicon (OMW → Minimax/Kōmín) — explainer & preview"
+}
+# Contenidos (Markdown) — ya limpios (se verán dentro del Accordion)
 EXPLAIN_TAB_TRANSLATE_ES = """
+Convierte el **Texto** al **Destino**. Funciona para cualquier combinación: Español, English, Minimax-ASCII y Kōmín-CJK.
+- Si activas **Máx. Compresión Exacta**, añade un remolque `~...` con el **original comprimido** para recuperarlo **exactamente** al decodificar.
+- Los **checkbox** (Omitir artículos / Cópula cero / Quitar pronombres) **solo aplican** cuando el **Destino es un conlang** (Minimax o Kōmín).
 """
 EXPLAIN_TAB_BUILD_ES = """
+Fuerza la salida **en conlang** (Minimax o Kōmín) desde Español o Inglés.
+Aplica reglas de fraseo (orden, partículas/TAM) y las opciones de **compactación**.
 """
 EXPLAIN_TAB_DECODE_ES = """
+Convierte **Minimax/Kōmín** a **Español o Inglés**.
+- Si el texto trae `~...`, devuelve el **original exacto**.
+- Si no hay `~...`, la reconstrucción es **semi-lossless** con léxico y pistas simples.
 """
 EXPLAIN_TAB_ROUNDTRIP_ES = """
+Ejecuta **(ES/EN → Conlang) → (Conlang → ES/EN)** para comprobar **reversibilidad**.
+Con **Máx. Compresión Exacta**, la vuelta coincide **bit a bit**.
 """
 EXPLAIN_CHECKBOX_ES = """
+**Qué hace cada opción:**
+- **Omitir artículos** (el/la/los/las; a/an/the): ahorro típico **~10–15%**.
+- **Cópula cero (presente afirm.)**: omite *ser/estar/be* cuando suena natural → **~5–10%** extra.
+- **Quitar pronombres**: elimina pronombres de sujeto/objeto evidentes → ahorro **variable**.
+- **Máx. Compresión Exacta**: añade `~...` (zlib) para recuperación exacta. En >100 caracteres, **~40–60%**; en textos cortos puede no reducir.
+**Referencia orientativa:**
+- Sin casillas: **0%**
+- Solo artículos: **~10–15%**
+- Solo cópula: **~5–10%**
+- Artículos + cópula: **~15–20%**
+- Con exacta: **~40–60%** (si el texto es suficientemente largo)
 """
 EXPLAIN_TAB_TRANSLATE_EN = """
+Converts **Text** to **Target**. Works for any pair: Spanish, English, Minimax-ASCII, Kōmín-CJK.
+- **Max Exact Compression** appends `~...` with the **exact original** for perfect recovery.
+- Checkboxes (Drop articles / Zero copula / Remove pronouns) apply **only when the Target is a conlang**.
 """
 EXPLAIN_TAB_BUILD_EN = """
+Forces **conlang output** (Minimax or Kōmín) from Spanish/English.
+Applies phrasing rules (order, particles/TAM) and **compaction** options.
 """
 EXPLAIN_TAB_DECODE_EN = """
+Converts **Minimax/Kōmín** to **Spanish/English**.
+- If `~...` is present, returns the **bit-perfect original**.
+- Otherwise, reconstructs **semi-losslessly** using the lexicon.
 """
 EXPLAIN_TAB_ROUNDTRIP_EN = """
+Runs **(ES/EN → Conlang) → (Conlang → ES/EN)** to verify **reversibility**.
+With **Max Exact Compression**, the return matches bit-for-bit.
 """
 EXPLAIN_CHECKBOX_EN = """
+**What each option does:**
+- **Drop articles**: **~10–15%**.
+- **Zero copula (present affirmative)**: **~5–10%** extra.
+- **Remove pronouns**: variable savings.
+- **Max Exact Compression**: `~...` (zlib) for exact recovery. For >100 chars, **~40–60%**; very short texts may not shrink.
+**Reference (approx):**
+- No options: **0%**
+- Articles only: **~10–15%**
+- Copula only: **~5–10%**
+- Articles + Copula: **~15–20%**
+- With exact: **~40–60%** (if text is long enough)
+"""
+LEXICON_BUILD_ES = """
+Se construyó así:
+1. De **OMW/WordNet 1.4** se extraen **lemas ES** y sus **equivalentes EN** por sinset.
+2. Normalización y orden por **frecuencia** (*wordfreq*).
+3. Opcional: **spaCy** refina lemas; **Argos** puede rellenar EN faltantes.
+4. Asignación de **códigos compactos** con alfabetos barajados por **SEED** hasta `MAXLEN_MINI`/`MAXLEN_CJK`.
+5. Exporta: `lexicon_minimax.json`, `lexicon_komin.json`, `lexicon_master.json` (+ TSV).
+**Vista previa** de `lexicon_master.json` (elige cuántas filas ver) aquí abajo.
 """
 LEXICON_BUILD_EN = """
+Built as follows:
+1. From **OMW/WordNet 1.4**, gather **ES lemmas** and **EN counterparts** by synset.
+2. Normalize and sort by **frequency** (*wordfreq*).
+3. Optional: **spaCy** refines lemmas; **Argos** may fill missing EN.
+4. Assign **compact codes** with **SEED-shuffled** alphabets up to `MAXLEN_MINI`/`MAXLEN_CJK`.
+5. Exports: `lexicon_minimax.json`, `lexicon_komin.json`, `lexicon_master.json` (+ TSV).
+**Preview** of `lexicon_master.json` below.
 """
+# ---------- Utilidad: cálculo de compactación ----------
+def _pct_comp(original: str, result: str) -> float:
+    if not original: return 0.0
+    return max(0.0, 100.0 * (1.0 - (len(result) / len(original))))
+def compaction_report_es(text, src, tgt, drop, zero, rm, maxc) -> str:
+    if not text.strip(): return "—"
+    if tgt not in ("Minimax-ASCII","Kōmín-CJK"):
+        return "La compactación aplica cuando el **Destino** es Minimax/Kōmín."
+    # Base (sin casillas, sin sidecar)
+    base = build_sentence(text, src, tgt, False, False, "Semi-lossless", False, False)
+    # Actual (con opciones, sin sidecar)
+    curr = build_sentence(text, src, tgt, drop, zero, "Semi-lossless", False, rm)
+    # Si el usuario marcó exacta, también medimos con sidecar
+    curr_exact = build_sentence(text, src, tgt, drop, zero, "Semi-lossless", True, rm) if maxc else None
+    p_base = _pct_comp(text, base)
+    p_curr = _pct_comp(text, curr)
+    msg = f"**Base (sin casillas):** {p_base:.1f}% · **Con tus opciones:** {p_curr:.1f}%"
+    if curr_exact is not None:
+        p_exact = _pct_comp(text, curr_exact)
+        msg += f" · **Con sidecar `~...`:** {p_exact:.1f}%"
+    return msg
+def compaction_report_en(text, src, tgt, drop, zero, rm, maxc) -> str:
+    if not text.strip(): return "—"
+    if tgt not in ("Minimax-ASCII","Kōmín-CJK"):
+        return "Compaction applies when **Target** is Minimax/Kōmín."
+    base = build_sentence(text, src, tgt, False, False, "Semi-lossless", False, False)
+    curr = build_sentence(text, src, tgt, drop, zero, "Semi-lossless", False, rm)
+    curr_exact = build_sentence(text, src, tgt, drop, zero, "Semi-lossless", True, rm) if maxc else None
+    p_base = _pct_comp(text, base)
+    p_curr = _pct_comp(text, curr)
+    msg = f"**Base (no options):** {p_base:.1f}% · **With your options:** {p_curr:.1f}%"
+    if curr_exact is not None:
+        p_exact = _pct_comp(text, curr_exact)
+        msg += f" · **With `~...` sidecar:** {p_exact:.1f}%"
+    return msg
 def master_preview(n: int = 20) -> List[List[Any]]:
     try:
         entries = (MASTER_OBJ or {}).get("entries", [])
 def make_group_es():
     with gr.Group(visible=True) as g:
         gr.Markdown("# 🌐 Universal Conlang Translator · Compresión Exacta (ES)")
+        # Acordeones de explicación — MISMO nivel y con contenido Markdown dentro
         with gr.Row():
             with gr.Column():
+                with gr.Accordion(ACC_TITLES_ES["translate"], open=False):
+                    gr.Markdown(EXPLAIN_TAB_TRANSLATE_ES)
+                with gr.Accordion(ACC_TITLES_ES["build"], open=False):
+                    gr.Markdown(EXPLAIN_TAB_BUILD_ES)
+                with gr.Accordion(ACC_TITLES_ES["decode"], open=False):
+                    gr.Markdown(EXPLAIN_TAB_DECODE_ES)
+                with gr.Accordion(ACC_TITLES_ES["roundtrip"], open=False):
+                    gr.Markdown(EXPLAIN_TAB_ROUNDTRIP_ES)
             with gr.Column():
+                with gr.Accordion(ACC_TITLES_ES["checkbox"], open=False):
+                    gr.Markdown(EXPLAIN_CHECKBOX_ES)
+                with gr.Accordion(ACC_TITLES_ES["lexicon"], open=False):
+                    gr.Markdown(LEXICON_BUILD_ES)
                     n_rows = gr.Slider(5, 100, value=20, step=5, label="Filas a mostrar")
                     table = gr.Dataframe(headers=["lemma_es","lemma_en","minimax","komin"], row_count=1, interactive=False)
                     gr.Button("Actualizar vista").click(lambda n: master_preview(int(n)), [n_rows], [table])
                 btn_tr = gr.Button("🚀 Traducir", variant="primary")
                 btn_tr_cl = gr.Button("🧹 Limpiar")
             uni_out = gr.Textbox(lines=6, label="Traducción", show_copy_button=True)
+            comp_out = gr.Markdown("")  # indicador de compactación
+            def do_translate(text, src, tgt, drop, zero, mode, maxc, rm):
+                res = universal_translate(text, src, tgt, drop, zero, mode, maxc, rm)
+                rep = compaction_report_es(text, src, tgt, drop, zero, rm, maxc)
+                return res, rep
+            btn_tr.click(do_translate,
                          [uni_text, uni_src, uni_tgt, uni_drop, uni_zero, uni_mode, uni_maxc, uni_rmpr],
+                         [uni_out, comp_out])
             btn_tr_cl.click(lambda: ("",""), None, [uni_text, uni_out])
+            with gr.Accordion("Ayuda rápida", open=False):
+                gr.Markdown(EXPLAIN_TAB_TRANSLATE_ES)
         with gr.Tab("🛠️ Construir (ES/EN → Conlang)"):
             with gr.Row():
                 btn_b = gr.Button("🏗️ Construir", variant="primary")
                 btn_b_cl = gr.Button("🧹 Limpiar")
             out = gr.Textbox(lines=6, label="Salida", show_copy_button=True)
+            comp_out_b = gr.Markdown("")
+            def do_build(text, src, tgt, drop, zero, mode, maxc, rm):
+                res = build_sentence(text, src, tgt, drop, zero, mode, maxc, rm)
+                rep = compaction_report_es(text, src, tgt, drop, zero, rm, maxc)
+                return res, rep
+            btn_b.click(do_build,
                         [text_in, src_lang, target, drop_articles, zero_copula, mode_build, max_comp_build, rm_pron_build],
+                        [out, comp_out_b])
             btn_b_cl.click(lambda: ("",""), None, [text_in, out])
+            with gr.Accordion("Ayuda rápida", open=False):
+                gr.Markdown(EXPLAIN_TAB_BUILD_ES)
         with gr.Tab("🗝️ Decodificar (Conlang → ES/EN)"):
             with gr.Row():
             btn_d.click(decode_lossless_aware, [code_in, src_code, tgt_lang], [out3])
             btn_d_cl.click(lambda: ("",""), None, [code_in, out3])
+            with gr.Accordion("Ayuda rápida", open=False):
                 gr.Markdown(EXPLAIN_TAB_DECODE_ES)
         with gr.Tab("🔄 Prueba ida→vuelta"):
             btn_rt.click(round_trip, [rt_text, rt_src, rt_tgt, rt_mode, rt_max_comp], [rt_out_conlang, rt_out_back])
             btn_rt_cl.click(lambda: ("","",""), None, [rt_text, rt_out_conlang, rt_out_back])
+            with gr.Accordion("Ayuda rápida", open=False):
                 gr.Markdown(EXPLAIN_TAB_ROUNDTRIP_ES)
     return g
         gr.Markdown("# 🌐 Universal Conlang Translator · Max Exact Compression (EN)")
         with gr.Row():
             with gr.Column():
+                with gr.Accordion(ACC_TITLES_EN["translate"], open=False):
+                    gr.Markdown(EXPLAIN_TAB_TRANSLATE_EN)
+                with gr.Accordion(ACC_TITLES_EN["build"], open=False):
+                    gr.Markdown(EXPLAIN_TAB_BUILD_EN)
+                with gr.Accordion(ACC_TITLES_EN["decode"], open=False):
+                    gr.Markdown(EXPLAIN_TAB_DECODE_EN)
+                with gr.Accordion(ACC_TITLES_EN["roundtrip"], open=False):
+                    gr.Markdown(EXPLAIN_TAB_ROUNDTRIP_EN)
             with gr.Column():
+                with gr.Accordion(ACC_TITLES_EN["checkbox"], open=False):
+                    gr.Markdown(EXPLAIN_CHECKBOX_EN)
+                with gr.Accordion(ACC_TITLES_EN["lexicon"], open=False):
+                    gr.Markdown(LEXICON_BUILD_EN)
                     n_rows = gr.Slider(5, 100, value=20, step=5, label="Rows to show")
                     table = gr.Dataframe(headers=["lemma_es","lemma_en","minimax","komin"], row_count=1, interactive=False)
                     gr.Button("Refresh").click(lambda n: master_preview(int(n)), [n_rows], [table])
                 btn_tr = gr.Button("🚀 Translate", variant="primary")
                 btn_tr_cl = gr.Button("🧹 Clear")
             uni_out = gr.Textbox(lines=6, label="Translation", show_copy_button=True)
+            comp_out = gr.Markdown("")
+            def do_translate_en(text, src, tgt, drop, zero, mode, maxc, rm):
+                res = universal_translate(text, src, tgt, drop, zero, mode, maxc, rm)
+                rep = compaction_report_en(text, src, tgt, drop, zero, rm, maxc)
+                return res, rep
+            btn_tr.click(do_translate_en,
                          [uni_text, uni_src, uni_tgt, uni_drop, uni_zero, uni_mode, uni_maxc, uni_rmpr],
+                         [uni_out, comp_out])
             btn_tr_cl.click(lambda: ("",""), None, [uni_text, uni_out])
+            with gr.Accordion("Quick help", open=False):
+                gr.Markdown(EXPLAIN_TAB_TRANSLATE_EN)
         with gr.Tab("🛠️ Build (ES/EN → Conlang)"):
             with gr.Row():
                 btn_b = gr.Button("🏗️ Build", variant="primary")
                 btn_b_cl = gr.Button("🧹 Clear")
             out = gr.Textbox(lines=6, label="Output", show_copy_button=True)
+            comp_out_b = gr.Markdown("")
+            def do_build_en(text, src, tgt, drop, zero, mode, maxc, rm):
+                res = build_sentence(text, src, tgt, drop, zero, mode, maxc, rm)
+                rep = compaction_report_en(text, src, tgt, drop, zero, rm, maxc)
+                return res, rep
+            btn_b.click(do_build_en,
                         [text_in, src_lang, target, drop_articles, zero_copula, mode_build, max_comp_build, rm_pron_build],
+                        [out, comp_out_b])
             btn_b_cl.click(lambda: ("",""), None, [text_in, out])
+            with gr.Accordion("Quick help", open=False):
+                gr.Markdown(EXPLAIN_TAB_BUILD_EN)
         with gr.Tab("🗝️ Decode (Conlang → ES/EN)"):
             with gr.Row():
             code_in = gr.Textbox(lines=3, label="Conlang text (may include `~...`)", show_copy_button=True)
             out3 = gr.Textbox(lines=6, label="Output", show_copy_button=True)
+            def decode_lossless_aware_en(text, src, tgt):
                 orig = extract_custom_sidecar(text)
                 if orig is not None: return orig
                 orig = extract_sidecar_b85(text)
                 btn_d = gr.Button("🔓 Decode", variant="primary")
                 btn_d_cl = gr.Button("🧹 Clear")
+            btn_d.click(decode_lossless_aware_en, [code_in, src_code, tgt_lang], [out3])
             btn_d_cl.click(lambda: ("",""), None, [code_in, out3])
+            with gr.Accordion("Quick help", open=False):
                 gr.Markdown(EXPLAIN_TAB_DECODE_EN)
         with gr.Tab("🔄 Round-trip"):
             btn_rt.click(round_trip, [rt_text, rt_src, rt_tgt, rt_mode, rt_max_comp], [rt_out_conlang, rt_out_back])
             btn_rt_cl.click(lambda: ("","",""), None, [rt_text, rt_out_conlang, rt_out_back])
+            with gr.Accordion("Quick help", open=False):
                 gr.Markdown(EXPLAIN_TAB_ROUNDTRIP_EN)
     return g