Spaces:

LoloSemper
/

new_language_maximum_efficiency2

Sleeping

File size: 48,704 Bytes

# app.py — Universal Conlang Translator (Max Compresión Exacta)
# Archivos requeridos en la raíz:
# - lexicon_minimax.json
# - lexicon_komin.json
# - lexicon_master.json
#
# requirements.txt (para HF Spaces):
# gradio>=4.36.0
# spacy>=3.7.4
# es_core_news_sm @ https://github.com/explosion/spacy-models/releases/download/es_core_news_sm-3.7.0/es_core_news_sm-3.7.0-py3-none-any.whl
# en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl

import os, re, json, base64, zlib
from typing import Dict, Optional, List, Any
import gradio as gr

# ------------ Archivos esperados ------------
LEX_MINI = "lexicon_minimax.json"
LEX_KOMI = "lexicon_komin.json"
LEX_MASTER = "lexicon_master.json"

# ------------ Normalización ------------
WORD_RE = re.compile(r"[A-Za-zÁÉÍÓÚÜÑáéíóúüñ]+", re.UNICODE)
STRIP = str.maketrans("ÁÉÍÓÚÜÑáéíóúüñ", "AEIOUUNaeiouun")
def norm_es(w: str) -> str: return re.sub(r"[^a-záéíóúüñ]", "", (w or "").lower()).translate(STRIP)
def norm_en(w: str) -> str: return re.sub(r"[^a-z]", "", (w or "").lower())

# ------------ Carga de léxicos ------------
def load_json(path: str):
    if not os.path.exists(path): return None
    with open(path, "r", encoding="utf-8") as f: return json.load(f)

def load_lexicons():
    mm = load_json(LEX_MINI) or {}
    kk = load_json(LEX_KOMI) or {}
    master = load_json(LEX_MASTER) or {}

    es2mini = mm.get("mapping", {})
    es2komi = kk.get("mapping", {})
    mini2es = {v:k for k,v in es2mini.items()}
    komi2es = {v:k for k,v in es2komi.items()}

    es2en_lemma: Dict[str,str] = {}
    en2es_lemma: Dict[str,str] = {}
    en2mini, en2komi = {}, {}
    mini2en, komi2en = {}, {}

    if isinstance(master, dict) and "entries" in master:
        for e in master["entries"]:
            es = norm_es(str(e.get("lemma_es",""))); en = norm_en(str(e.get("lemma_en","")))
            mi = str(e.get("minimax","")); ko = str(e.get("komin",""))
            if es and en:
                es2en_lemma.setdefault(es, en); en2es_lemma.setdefault(en, es)
            if en and mi: en2mini.setdefault(en, mi)
            if en and ko: en2komi.setdefault(en, ko)

    mini2en = {v:k for k,v in en2mini.items()}
    komi2en = {v:k for k,v in en2komi.items()}
    return (es2mini, es2komi, mini2es, komi2es, en2mini, en2komi, mini2en, komi2en, es2en_lemma, en2es_lemma, master)

(ES2MINI, ES2KOMI, MINI2ES, KOMI2ES, EN2MINI, EN2KOMI, MINI2EN, KOMI2EN, ES2EN_LEMMA, EN2ES_LEMMA, MASTER_OBJ) = load_lexicons()

# ------------ Pronombres ------------
PRON_ES = {"yo","tú","vos","usted","él","ella","nosotros","vosotros","ustedes","ellos","ellas","me","te","se","nos","os"}
PRON_EN = {"i","you","he","she","it","we","they","me","him","her","us","them"}

# ------------ OOV reversible (Semi-lossless) ------------
ALPHA_MINI64 = "@ptkmnslraeiouy0123456789><=:/!?.+-_*#bcdfghjvqwxzACEGHIJKLMNOPRS"[:64]
CJK_BASE = (
    "天地人日月山川雨風星火水木土金石光影花草鳥犬猫魚"
    "東西南北中外上下午夜明暗手口目耳心言書家道路門"
    "大小長短早晚高低新古青紅白黒金銀銅玉米茶酒米"
    "文学楽音画体気電海空森林雪雲砂島橋城村国自由静"
)
ALPHA_CJK64 = (CJK_BASE * 2)[:64]

def to_custom_b64(b: bytes, alphabet: str) -> str:
    std = base64.b64encode(b).decode("ascii")
    trans = str.maketrans("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", alphabet)
    return std.translate(trans).rstrip("=")
def from_custom_b64(s: str, alphabet: str) -> bytes:
    trans = str.maketrans(alphabet, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
    std = s.translate(trans); pad = "=" * ((4 - len(std) % 4) % 4)
    return base64.b64decode(std + pad)
def enc_oov_minimax(token: str) -> str: return "~" + to_custom_b64(token.encode("utf-8"), ALPHA_MINI64)
def dec_oov_minimax(code: str) -> str:
    try: return from_custom_b64(code[1:], ALPHA_MINI64).decode("utf-8")
    except Exception: return code
def enc_oov_komin(token: str) -> str: return "「" + to_custom_b64(token.encode("utf-8"), ALPHA_CJK64) + "」"
def dec_oov_komin(code: str) -> str:
    try: return from_custom_b64(code[1:-1], ALPHA_CJK64).decode("utf-8")
    except Exception: return code
def is_oov_minimax(code: str) -> bool: return code.startswith("~") and len(code) > 1
def is_oov_komin(code: str) -> bool:   return len(code) >= 2 and code.startswith("「") and code.endswith("」")

# ------------ spaCy opcional ------------
USE_SPACY = False
try:
    import spacy
    try:
        nlp_es = spacy.load("es_core_news_sm"); nlp_en = spacy.load("en_core_web_sm"); USE_SPACY = True
    except Exception:
        nlp_es = nlp_en = None
except Exception:
    nlp_es = nlp_en = None

def lemma_of(tok, src_lang: str) -> str:
    if src_lang == "Español":
        return norm_es(tok.lemma_ if getattr(tok,"lemma_","") else tok.text)
    else:
        return norm_en(tok.lemma_ if getattr(tok,"lemma_","") else tok.text)

# ------------ Detección simple y helpers ------------
def detect_polarity(doc) -> bool: return "?" in getattr(doc,"text","")
def detect_neg(doc) -> bool:
    for t in doc:
        if getattr(t,"dep_","")=="neg" or getattr(t,"lower_","").lower() in ("no","not","n't"):
            return True
    return False
def detect_tense(root):
    m = str(getattr(root,"morph",""))
    if "Tense=Past" in m: return "Past"
    if "Tense=Fut"  in m: return "Fut"
    if "Tense=Pres" in m: return "Pres"
    for c in getattr(root,"children",[]):
        if getattr(c,"pos_","")=="AUX":
            cm = str(getattr(c,"morph",""))
            if "Tense=Past" in cm: return "Past"
            if getattr(c,"lower_","").lower()=="will": return "Fut"
    return "Pres"
def extract_core(doc):
    tokens = list(doc)
    root = next((t for t in tokens if getattr(t,"dep_","")=="ROOT" and getattr(t,"pos_","") in ("VERB","AUX")), tokens[0] if tokens else doc)
    subs, objs, obls, advs = [], [], [], []
    for t in getattr(root,"children",[]):
        dep = getattr(t,"dep_",""); pos = getattr(t,"pos_","")
        if dep in ("nsubj","nsubj:pass","csubj"): subs.append(t)
        elif dep in ("obj","dobj","iobj"):        objs.append(t)
        elif dep in ("obl","pobj"):               obls.append(t)
        elif dep in ("advmod","advcl") and pos=="ADV": advs.append(t)
    for arr in (subs,objs,obls,advs): arr.sort(key=lambda x: getattr(x,"i",0))
    return root, subs, objs, obls, advs
def _person_of_doc(doc, src_lang: str) -> Optional[str]:
    try:
        tokens = list(doc)
        root = next((t for t in tokens if getattr(t,"dep_","")=="ROOT"), tokens[0])
        subj = next((t for t in getattr(root,"children",[]) if getattr(t,"dep_","").startswith("nsubj")), None)
        if subj is None: return None
        plur = ("Number=Plur" in str(getattr(subj,"morph",""))) if src_lang=="Español" else (getattr(subj,"tag_","") in ("NNS","NNPS"))
        low = getattr(subj,"lower_","").lower()
        if src_lang=="Español":
            if low in ("yo",): return "1p" if plur else "1s"
            if low in ("tú","vos"): return "2p" if plur else "2s"
            if low in ("usted","él","ella"): return "3p" if plur else "3s"
            lem = lemma_of(subj, "Español")
            if lem in ("yo","nosotros"): return "1p" if plur else "1s"
            if lem in ("tú","vosotros"): return "2p" if plur else "2s"
            return "3p" if plur else "3s"
        else:
            if low in ("i",): return "1p" if plur else "1s"
            if low in ("you",): return "2p" if plur else "2s"
            if low in ("he","she","it"): return "3p" if plur else "3s"
            return "3p" if plur else "3s"
    except Exception:
        return None
def detect_person(root, src_lang: str) -> Optional[str]:
    m = str(getattr(root,"morph","")); person_str, number_str = "3","s"
    if "Person=" in m:
        for feat in m.split("|"):
            if feat.startswith("Person="): person_str = feat.split("=")[1]
            elif feat.startswith("Number="): number_str = "p" if feat.split("=")[1]=="Plur" else "s"
        return person_str + number_str
    return _person_of_doc(root.doc, src_lang)

# ------------ Mapeo y fraseadores ------------
def code_es(lemma: str, target: str) -> str:
    lemma = norm_es(lemma)
    if target=="Minimax-ASCII": return ES2MINI.get(lemma) or enc_oov_minimax(lemma)
    return ES2KOMI.get(lemma) or enc_oov_komin(lemma)
def code_en(lemma: str, target: str) -> str:
    lemma = norm_en(lemma)
    if target=="Minimax-ASCII": return (EN2MINI.get(lemma) if EN2MINI else None) or enc_oov_minimax(lemma)
    return (EN2KOMI.get(lemma) if EN2KOMI else None) or enc_oov_komin(lemma)

TAM_MINI = {"Pres":"P","Past":"T","Fut":"F","UNK":"P"}
TAM_KOMI = {"Pres":"Ⓟ","Past":"Ⓣ","Fut":"Ⓕ","UNK":"Ⓟ"}

def realize_minimax(doc, src_lang: str, drop_articles=True, zero_copula=True, semi_lossless=False, person_hint="2s", remove_pronouns=False):
    root, subs, objs, obls, advs = extract_core(doc)
    tense = detect_tense(root); is_q, is_neg = detect_polarity(doc), detect_neg(doc)
    vlem = lemma_of(root, src_lang) if USE_SPACY else ("ser" if "?" in getattr(doc,"text","") else "estar")
    vcode = code_es(vlem, "Minimax-ASCII") if src_lang=="Español" else code_en(vlem, "Minimax-ASCII")
    tail = TAM_MINI.get(tense, "P")
    if semi_lossless: tail += (detect_person(root, src_lang) or person_hint)
    if is_neg: tail += "N"
    if is_q:   tail += "Q"
    if tail: vcode = f"{vcode}·{tail}"

    def realize_np(tokens):
        outs=[]
        for t in tokens:
            if remove_pronouns:
                txt = (getattr(t,"text","") or "").lower()
                if (src_lang=="Español" and txt in PRON_ES) or (src_lang=="English" and txt in PRON_EN): continue
            lem = lemma_of(t, src_lang) if USE_SPACY else getattr(t,"text","")
            outs.append(code_es(lem,"Minimax-ASCII") if src_lang=="Español" else code_en(lem,"Minimax-ASCII"))
        return outs

    S = realize_np(subs); O = realize_np(objs)+realize_np(obls)
    ADV=[]
    for a in advs:
        lem = lemma_of(a, src_lang) if USE_SPACY else getattr(a,"text","")
        ADV.append(code_es(lem,"Minimax-ASCII") if src_lang=="Español" else code_en(lem,"Minimax-ASCII"))

    parts = S+O+ADV if (zero_copula and not semi_lossless and vlem in ("ser","estar","be") and tense=="Pres" and not is_neg and not is_q) else [vcode]+S+O+ADV
    return " ".join(p for p in parts if p)

def realize_komin(doc, src_lang: str, drop_articles=True, zero_copula=True, semi_lossless=False, person_hint="2s", remove_pronouns=False):
    root, subs, objs, obls, advs = extract_core(doc)
    tense, is_q, is_neg = detect_tense(root), detect_polarity(doc), detect_neg(doc)
    vlem = lemma_of(root, src_lang) if USE_SPACY else ("ser" if "?" in getattr(doc,"text","") else "estar")
    vcode = code_es(vlem, "Kōmín-CJK") if src_lang=="Español" else code_en(vlem, "Kōmín-CJK")
    P_SUBJ, P_OBJ = "ᵖ", "ᵒ"; Q_FIN = "？"
    TAM = TAM_KOMI.get(tense,"Ⓟ")
    if semi_lossless: TAM = TAM + f"[{detect_person(root, src_lang) or person_hint}]"

    def realize_np(tokens, particle):
        outs=[]
        for t in tokens:
            if remove_pronouns:
                txt = (getattr(t,"text","") or "").lower()
                if (src_lang=="Español" and txt in PRON_ES) or (src_lang=="English" and txt in PRON_EN): continue
            lem = lemma_of(t, src_lang) if USE_SPACY else getattr(t,"text","")
            outs.append((code_es(lem,"Kōmín-CJK") if src_lang=="Español" else code_en(lem,"Kōmín-CJK")) + particle)
        return outs

    S = realize_np(subs, P_SUBJ); O = realize_np(objs+obls, P_OBJ)
    ADV=[]
    for a in advs:
        lem = lemma_of(a, src_lang) if USE_SPACY else getattr(a,"text","")
        ADV.append(code_es(lem,"Kōmín-CJK") if src_lang=="Español" else code_en(lem,"Kōmín-CJK"))
    parts = S+O+ADV+[vcode+TAM]
    out = " ".join(parts)
    if is_q: out += " " + Q_FIN
    return out

# ------------ Sidecars (compresión exacta) ------------
SIDECAR_B85_RE = re.compile(r"\s?§\((?P<b85>[A-Za-z0-9!#$%&()*+\-;<=>?@^_{|}~]+)\)$")
def b85_enc_raw(s: str) -> str: return base64.a85encode(zlib.compress(s.encode("utf-8"), 9), adobe=False).decode("ascii")
def b85_dec_raw(b85s: str) -> str: return zlib.decompress(base64.a85decode(b85s.encode("ascii"), adobe=False)).decode("utf-8")
def attach_sidecar_b85(conlang_text: str, original_text: str) -> str: return f"{conlang_text} §({b85_enc_raw(original_text)})"
def extract_sidecar_b85(text: str) -> Optional[str]:
    m = SIDECAR_B85_RE.search(text)
    if not m: return None
    try: return b85_dec_raw(m.group("b85"))
    except Exception: return None
def strip_sidecar_b85(text: str) -> str: return SIDECAR_B85_RE.sub("", text).rstrip()
def custom_sidecar_enc(conlang_text: str, original_text: str) -> str:
    blob = to_custom_b64(zlib.compress(original_text.encode("utf-8"), 9), ALPHA_MINI64)
    return f"{conlang_text} ~{blob}"
def extract_custom_sidecar(text: str) -> Optional[str]:
    if '~' in text:
        _, blob = text.rsplit('~', 1)
        try: return zlib.decompress(from_custom_b64(blob, ALPHA_MINI64)).decode("utf-8")
        except Exception: return None
    return None
def strip_custom_sidecar(text: str) -> str: return text.split('~')[0].rstrip() if '~' in text else text

# ------------ Codificación / decodificación simple ------------
def encode_simple(text: str, src_lang: str, target: str) -> str:
    if not text.strip(): return ""
    def repl_es(m):
        key = norm_es(m.group(0))
        code = ES2MINI.get(key) if target=="Minimax-ASCII" else ES2KOMI.get(key)
        return code or (enc_oov_minimax(m.group(0)) if target=="Minimax-ASCII" else enc_oov_komin(m.group(0)))
    def repl_en(m):
        key = norm_en(m.group(0)); table = EN2MINI if target=="Minimax-ASCII" else EN2KOMI
        if table and key in table: return table[key]
        return enc_oov_minimax(m.group(0)) if target=="Minimax-ASCII" else enc_oov_komin(m.group(0))
    repl = repl_es if src_lang=="Español" else repl_en
    return WORD_RE.sub(repl, text)

def pluralize_es(word: str) -> str:
    exceptions = {"uno":"unos","buen":"buenos","hombre":"hombres"}
    if word in exceptions: return exceptions[word]
    if word.endswith("z"): return word[:-1]+"ces"
    if word.endswith(("a","e","i","o")): return word+"s"
    return word+"es"
def pluralize_en(word: str) -> str:
    exceptions = {"man":"men","woman":"women","child":"children"}
    if word in exceptions: return exceptions[word]
    if word.endswith("y") and len(word)>1 and word[-2] not in "aeiou": return word[:-1]+"ies"
    if word.endswith(("s","sh","ch","x","z")): return word+"es"
    return word+"s"
def pluralize(word: str, tgt_lang: str) -> str: return pluralize_es(word) if tgt_lang=="Español" else pluralize_en(word)

mini_tail_re = re.compile(r"^(?P<stem>.+?)·(?P<tail>[PTFNQ12sp]+)$")

def decode_simple(text: str, source: str, tgt_lang: str) -> str:
    if not text.strip(): return ""
    code2es = MINI2ES if source=="Minimax-ASCII" else KOMI2ES
    code2en = MINI2EN if source=="Minimax-ASCII" else KOMI2EN
    if source=="Kōmín-CJK":
        text = text.replace("？","?").replace(" "," ")
        return " ".join([code2es.get(w,w) for w in text.split() if w!="?"])
    tokens = text.split()
    if not tokens: return ""
    lemma_tokens, pl_flags = [], []
    verb_idx=-1; verb_lemma=None; verb_tense="Pres"; verb_person="3s"; has_q=False; is_neg=False
    for part in tokens:
        look = part.replace("[PL]",""); had_pl = "[PL]" in part; pl_flags.append(had_pl)
        m = mini_tail_re.match(look)
        if m:
            verb_idx = len(lemma_tokens); stem=m.group("stem"); tail=m.group("tail")
            vlem_es = code2es.get(stem); vlem_en = code2en.get(stem) if code2en else None
            vlem = vlem_es if tgt_lang=="Español" else (vlem_en or vlem_es or stem)
            if not vlem: vlem = dec_oov_minimax(stem) if is_oov_minimax(stem) else stem
            lemma_tokens.append(vlem); pl_flags.append(False)
            if tail:
                if tail[0] in "PTF":
                    verb_tense = {"P":"Pres","T":"Past","F":"Fut"}[tail[0]]; pos=1
                    if len(tail)>pos and tail[pos] in "123":
                        pos+=1; verb_person = tail[pos-1] + (tail[pos] if len(tail)>pos and tail[pos] in "sp" else "s")
                        if len(tail)>pos and tail[pos] in "sp": pos+=1
                is_neg = "N" in tail[pos:]; has_q = "Q" in tail[pos:]
            verb_lemma = vlem; continue
        w_es = code2es.get(look); w_en = code2en.get(look) if code2en else None
        w = w_es if tgt_lang=="Español" else (w_en or w_es or look)
        if not w: w = dec_oov_minimax(look) if is_oov_minimax(look) else look
        lemma_tokens.append(w); pl_flags.append(had_pl)
    out_parts=[]
    for idx, lem in enumerate(lemma_tokens):
        if idx==verb_idx:
            v = _es_conj(verb_lemma, verb_tense, verb_person) if tgt_lang=="Español" else _en_conj(verb_lemma, verb_tense, verb_person)
            if is_neg: v = ("no " if tgt_lang=="Español" else "not ") + v
            out_parts.append(v)
        else:
            out_parts.append(pluralize(lem, tgt_lang) if pl_flags[idx] else lem)
    out_text = " ".join(out_parts)
    if has_q:
        start_q = "¿" if tgt_lang=="Español" else ""
        out_text = f"{start_q}{out_text.capitalize()}?"
    return out_text

# ------------ Conjugadores mínimos ------------
def _es_conj_regular(lemma, tense, person):
    if not lemma.endswith(("ar","er","ir")): return lemma
    stem, vtype = lemma[:-2], lemma[-2:]
    pres={"ar":{"1s":"o","2s":"as","3s":"a","1p":"amos","2p":"áis","3p":"an"},
          "er":{"1s":"o","2s":"es","3s":"e","1p":"emos","2p":"éis","3p":"en"},
          "ir":{"1s":"o","2s":"es","3s":"e","1p":"imos","2p":"ís","3p":"en"}}
    pret={"ar":{"1s":"é","2s":"aste","3s":"ó","1p":"amos","2p":"asteis","3p":"aron"},
          "er":{"1s":"í","2s":"iste","3s":"ió","1p":"imos","2p":"isteis","3p":"rieron"},
          "ir":{"1s":"í","2s":"iste","3s":"ió","1p":"imos","2p":"isteis","3p":"rieron"}}  # typo intentionally? keep structure simple
    fut={"1s":"é","2s":"ás","3s":"á","1p":"emos","2p":"éis","3p":"án"}
    if tense=="Pres": return stem + pres[vtype].get(person, pres[vtype]["3s"])
    if tense=="Past": return stem + pret[vtype].get(person, pret[vtype]["3s"])
    return lemma + fut.get(person, fut["3s"])
def _es_conj(lemma, tense, person):
    if lemma=="ser":
        tab={"Pres":{"1s":"soy","2s":"eres","3s":"es","1p":"somos","2p":"sois","3p":"son"},
             "Past":{"1s":"fui","2s":"fuiste","3s":"fue","1p":"fuimos","2p":"fuisteis","3p":"fueron"},
             "Fut":{"1s":"seré","2s":"serás","3s":"será","1p":"seremos","2p":"seréis","3p":"serán"}}
        return tab[tense].get(person, tab[tense]["3s"])
    if lemma=="estar":
        tab={"Pres":{"1s":"estoy","2s":"estás","3s":"está","1p":"estamos","2p":"estáis","3p":"están"},
             "Past":{"1s":"estuviste","2s":"estuviste","3s":"estuvo","1p":"estuvimos","2p":"estuvisteis","3p":"estuvieron"},
             "Fut":{"1s":"estaré","2s":"estarás","3s":"estará","1p":"estaremos","2p":"estaréis","3p":"estarán"}}
        return tab[tense].get(person, tab[tense]["3s"])
    if lemma=="ir":
        tab={"Pres":{"1s":"voy","2s":"vas","3s":"va","1p":"vamos","2p":"vais","3p":"van"},
             "Past":{"1s":"fui","2s":"fuiste","3s":"fue","1p":"fuimos","2p":"fuisteis","3p":"fueron"},
             "Fut":{"1s":"iré","2s":"irás","3s":"irá","1p":"iremos","2p":"iréis","3p":"irán"}}
        return tab[tense].get(person, tab[tense]["3s"])
    return _es_conj_regular(lemma, tense, person)
def _en_conj(lemma, tense, person):
    if lemma=="be":
        if tense=="Pres": return {"1s":"am","2s":"are","3s":"is","1p":"are","2p":"are","3p":"are"}.get(person,"is")
        if tense=="Past": return {"1s":"was","2s":"were","3s":"was","1p":"were","2p":"were","3p":"were"}.get(person,"was")
        return "be"
    if lemma=="have":
        if tense=="Pres": return "has" if person=="3s" else "have"
        if tense=="Past": return "had"
        return "have"
    if lemma=="go":
        if tense=="Past": return "went"
        return "goes" if (tense=="Pres" and person=="3s") else "go"
    if lemma=="do":
        if tense=="Past": return "did"
        return "does" if (tense=="Pres" and person=="3s") else "do"
    if tense=="Pres":
        if person=="3s":
            if lemma.endswith("y") and (len(lemma)<2 or lemma[-2] not in "aeiou"): return lemma[:-1]+"ies"
            if lemma.endswith(("s","sh","ch","x","z","o")): return lemma+"es"
            return lemma+"s"
        return lemma
    if tense=="Past":
        if lemma.endswith("e"): return lemma+"d"
        if lemma.endswith("y") and (len(lemma)<2 or lemma[-2] not in "aeiou"): return lemma[:-1]+"ied"
        return lemma+"ed"
    return lemma

# ================= Helper de construcción/translate =================
def _build_with_spacy(text: str, src_lang: str, target: str, drop_articles: bool, zero_copula: bool, semi_lossless: bool, remove_pronouns: bool) -> str:
    nlp = nlp_es if src_lang=="Español" else nlp_en
    doc = nlp(text)
    if target == "Minimax-ASCII":
        return realize_minimax(doc, src_lang, drop_articles, zero_copula, semi_lossless, remove_pronouns=remove_pronouns)
    else:
        return realize_komin(doc, src_lang, drop_articles, zero_copula, semi_lossless, remove_pronouns=remove_pronouns)

def build_sentence(text: str, src_lang: str, target: str, drop_articles: bool, zero_copula: bool, mode: str, max_comp_exact: bool = False, remove_pronouns: bool = False) -> str:
    if not text.strip(): return ""
    semi = True # siempre semi-lossless en construcción
    core = _build_with_spacy(text, src_lang, target, drop_articles, zero_copula and not semi, semi, remove_pronouns) if USE_SPACY else encode_simple(text, src_lang, target)
    if max_comp_exact:
        return custom_sidecar_enc(core, text)
    return core

def universal_translate(text: str, src: str, tgt: str, drop_articles: bool, zero_copula: bool, mode: str, max_comp_exact: bool = False, remove_pronouns: bool = False) -> str:
    if not text.strip(): return ""
    if src == tgt: return text

    # Natural → Conlang
    if src in ("Español","English") and tgt in ("Minimax-ASCII","Kōmín-CJK"):
        return build_sentence(text, src, tgt, drop_articles, zero_copula, mode, max_comp_exact, remove_pronouns)

    # Conlang → Natural (considera sidecars)
    if src in ("Minimax-ASCII","Kōmín-CJK") and tgt in ("Español","English"):
        orig = extract_custom_sidecar(text)
        if orig is not None: return orig
        orig = extract_sidecar_b85(text)
        if orig is not None: return orig
        return decode_simple(strip_custom_sidecar(strip_sidecar_b85(text)), src, tgt)

    # Natural ↔ Natural (lemas muy simples)
    if src in ("Español","English") and tgt in ("Español","English"):
        return text

    # Conlang ↔ Conlang (simple)
    if src in ("Minimax-ASCII","Kōmín-CJK") and tgt in ("Minimax-ASCII","Kōmín-CJK"):
        core = strip_custom_sidecar(text)
        es_lemmas = decode_simple(core, src, "Español")
        words = re.findall(r"\w+|[^\w\s]+", es_lemmas)
        out=[]
        for w in words:
            if re.fullmatch(r"\w+", w):
                code = ES2MINI.get(norm_es(w)) if tgt=="Minimax-ASCII" else ES2KOMI.get(norm_es(w))
                if not code:
                    code = enc_oov_minimax(w) if tgt=="Minimax-ASCII" else enc_oov_komin(w)
                out.append(code)
            else:
                out.append(w)
        out_text = " ".join(out)
        if extract_custom_sidecar(text) is not None:
            return custom_sidecar_enc(out_text, extract_custom_sidecar(text) or "")
        return out_text

    return "[No soportado]"

# =====================================================================================
# ========================= UI bilingüe y explicaciones claras ========================
# =====================================================================================

ALL_LANGS = ["Español","English","Minimax-ASCII","Kōmín-CJK"]

# ---------- Explicaciones ----------
EXPLAIN_TAB_TRANSLATE_ES = """
**¿Qué hace “Traducir”?**  
Convierte lo que escribes en **Texto** al **Destino** que elijas (ES/EN/Minimax/Kōmín).  
- Con **Máx. Compresión Exacta**, añade un final ~... con el **original comprimido** para recuperarlo tal cual al decodificar.
- Las casillas de **compactación** (artículos, cópula, pronombres) **sólo se aplican si el Destino es conlang**.
"""
EXPLAIN_TAB_BUILD_ES = """
**¿Qué hace “Construir (ES/EN → Conlang)”?**  
Obliga a que la salida sea **Minimax** o **Kōmín** (desde ES/EN). Aplica el orden y las partículas del conlang y las opciones de **compactación**.
"""
EXPLAIN_TAB_DECODE_ES = """
**¿Qué hace “Decodificar (Conlang → ES/EN)”?**  
Convierte de **Minimax/Kōmín** a **Español/Inglés**.  
- Si el texto trae ~..., devolvemos el **original exacto**.  
- Si no, reconstruimos lo más fiel posible con el **diccionario**.
"""
EXPLAIN_TAB_ROUNDTRIP_ES = """
**¿Qué hace “Prueba ida→vuelta”?**  
Hace el camino **(ES/EN → Conlang) → (Conlang → ES/EN)** para comprobar la **reversibilidad**.  
Con **exacta**, la vuelta coincide **bit a bit**.
"""
EXPLAIN_CHECKBOX_ES = """
**Opciones de compactación (para conlang):**
- **Omitir artículos** (*el/la/los/las*; *a/an/the*): ahorro típico **~10–15%**.
- **Cópula cero** (presente afirmativo): oculta *ser/estar/be* → **~5–10%** extra.
- **Quitar pronombres**: suprime pronombres obvios → ahorro **variable**.
- **Máx. Compresión Exacta**: añade ~... para recuperar el original (en >100 caracteres, **~40–60%**; en textos muy cortos puede no reducir).
**Guía rápida:** sin casillas **0%**; artículos+cópula **~15–20%**.
"""
# ¿Qué son los lenguajes?
EXPLAIN_CONLANGS_ES = """
### ¿Qué son Minimax-ASCII y Kōmín-CJK?

Piensa en **dos “idiomas comprimidos”** que sirven para escribir frases de ES/EN con menos caracteres y, además,
**poder volver al original**. Son como “zip para texto”, pero legibles.

---

#### 1) Minimax-ASCII (compacto y tecleable)
- Usa **sólo ASCII**, así que funciona en cualquier sitio (correo, móvil, código).
- Cada **palabra** se cambia por un **código corto** (por frecuencia, lo común es más corto).
- Los **verbos** llevan una colita con marcas:
  - **·P / ·T / ·F** → Presente / Pasado / Futuro  
  - **1s, 2p, 3s…** → Persona y número (1=yo/nosotros, 2=tú/vosotros, 3=él/ellos; s=singular, p=plural)
  - **N** → negación; **Q** → pregunta
- **Ejemplo**: “**¿Estás bien?**” → `k·P2sQ` (estar, Presente, 2ª persona, pregunta)

**Cuándo usarlo**: si quieres **máxima compatibilidad** y **tamaño pequeño** sin símbolos raros.

---

#### 2) Kōmín-CJK (visual y ultracorto)
- Usa ideogramas CJK para **aún más compresión** y un aspecto muy limpio.
- Añade **partículas**:
  - `ᵖ` marca el **sujeto**, `ᵒ` marca el **objeto**.
- El verbo lleva un **círculo de tiempo**:
  - **Ⓟ / Ⓣ / Ⓕ** → Presente / Pasado / Futuro  
- Las **preguntas** suelen acabar en **？**.
- **Ejemplo**: “**Los estudiantes leen libros.**” → `学生ᵖ 书ᵒ 读Ⓟ`

**Cuándo usarlo**: si buscas **máxima compresión** y no te importa usar caracteres CJK.

---

#### ¿Y si falta una palabra?
- Si una palabra no está en el diccionario, se guarda **de forma reversible**:
  - En **Minimax**: `~A9f...` (base64 propio).
  - En **Kōmín**: `「...」`.
  Así **no se pierde nada**.

#### “Compresión exacta” (el `~...`)
- Opcionalmente se añade un **sidecar** `~...` con el **original comprimido**.
- Si existe, al decodificar se recupera el **original al 100%** (puntuación, mayúsculas, etc.).
- En textos largos ahorra mucho, con **ida/vuelta perfecta**.

---

#### Mini-glosario
- **Código**: forma corta de una palabra (p. ej., `g` para “que”).
- **Partícula**: marca de función (sujeto `ᵖ`, objeto `ᵒ`).
- **Cola verbal** (Minimax): `·P/·T/·F`, persona (`1s`, `3p`), `N`, `Q`.
- **Sidecar**: `~...` con el original comprimido para **reconstruir exacto**.

> Resumen: Minimax-ASCII = **universal y tecleable**. Kōmín-CJK = **más corto y visual**. Ambos son **reversibles** y aceptan **sidecar exacto**.
"""

# EN
EXPLAIN_TAB_TRANSLATE_EN = "Converts **Text → Target** (ES/EN/Minimax/Kōmín). With **Max Exact**, adds ~... to recover the **exact original**. Compaction checkboxes apply only when **Target is conlang**."
EXPLAIN_TAB_BUILD_EN = "Forces **conlang output** (Minimax/Kōmín) from ES/EN, applying phrasing rules and compaction options."
EXPLAIN_TAB_DECODE_EN = "Converts **Minimax/Kōmín → ES/EN**. If ~... exists, returns the bit-perfect original; else semi-lossless."
EXPLAIN_TAB_ROUNDTRIP_EN = "Runs **(ES/EN→Conlang)→(Conlang→ES/EN)** to verify reversibility; with exact, it’s bit-for-bit."
EXPLAIN_CHECKBOX_EN = "Drop articles ~10–15%, Zero copula ~5–10% extra, Remove pronouns variable, Max Exact 40–60% for >100 chars."
EXPLAIN_CONLANGS_EN = """
### What are Minimax-ASCII and Kōmín-CJK?

Think of **two “compressed languages”** that let you write ES/EN sentences with fewer characters while you can still
**recover the original**. Like a human-readable “zip” for text.

---

#### 1) Minimax-ASCII (compact & typeable)
- Uses **ASCII only**, so it works everywhere (email, phones, code editors).
- Each **word** becomes a **short code** (high-frequency words get the shortest codes).
- **Verbs** get a small **tail**:
  - **·P / ·T / ·F** → Present / Past / Future  
  - **1s, 2p, 3s…** → Person & number (1=I/we, 2=you, 3=he/they; s=singular, p=plural)
  - **N** → negation; **Q** → question
- **Example**: “**Are you okay?**” → `k·P2sQ` (be, Present, 2nd person, question)

**When to use**: you want **maximum compatibility** and **small size** without special symbols.

---

#### 2) Kōmín-CJK (visual & ultra-short)
- Uses CJK ideograms for **even tighter compression** and a clean visual look.
- Adds **particles**:
  - `ᵖ` marks the **subject**, `ᵒ` marks the **object**.
- Verb shows a **time bubble**:
  - **Ⓟ / Ⓣ / Ⓕ** → Present / Past / Future  
- **Questions** usually end with **？**.
- **Example**: “**Students read books.**” → `学生ᵖ 书ᵒ 读Ⓟ`

**When to use**: you want **maximum compression** and you’re fine with CJK.

---

#### Unknown words?
- If a word isn’t in the lexicon, it’s kept **reversibly**:
  - In **Minimax**: `~A9f...` (custom base64).
  - In **Kōmín**: `「...」`.
  Nothing is lost.

#### “Exact compression” (the `~...` sidecar)
- Optionally appends `~...` with the **compressed original**.
- If present, decoding reproduces the **exact original** (punctuation, casing, etc.).
- Great for longer texts: big savings with **perfect round-trip**.

---

#### Tiny glossary
- **Code**: short form for a word (e.g., `g` for “that/que”).
- **Particle**: role marker (subject `ᵖ`, object `ᵒ`).
- **Verb tail** (Minimax): `·P/·T/·F`, person (`1s`, `3p`), `N`, `Q`.
- **Sidecar**: `~...` holding the compressed original for **bit-perfect recovery**.

> TL;DR: Minimax-ASCII = **universal & typeable**. Kōmín-CJK = **shortest & visual**. Both are **reversible** and support the **exact sidecar**.
"""

# Léxico (amigable)
LEXICON_FRIENDLY_ES = """
**¿De dónde sale el “diccionario” (léxico) y para qué sirve?**  
- Usamos **WordNet (OMW)** para listar palabras españolas y sus equivalentes en inglés.  
- Limpiamos y ordenamos por **frecuencia de uso**.  
- Asignamos un **código corto** a cada lema para **Minimax** y para **Kōmín**.  
- Guardamos tres archivos que la app usa al traducir:
  - lexicon_minimax.json (ES → Minimax)  
  - lexicon_komin.json (ES → Kōmín)  
  - lexicon_master.json (ES + EN + ambos códigos)
**Así** podemos convertir tus frases en **códigos compactos** y volver a texto entendible.
"""
LEXICON_FRIENDLY_EN = "We use **WordNet (OMW)**, pair ES words with EN, sort by frequency, assign short codes (Minimax/Kōmín), and save three JSONs so the app can encode/decode compactly."

# ---------- Utilidades de compactación y vista previa ----------
def _pct_comp(original: str, result: str) -> float:
    if not original: return 0.0
    return max(0.0, 100.0 * (1.0 - (len(result) / len(original))))

def compaction_line_es(text, src, tgt, drop, zero, rm, maxc) -> str:
    if not text.strip(): return "—"
    if tgt not in ("Minimax-ASCII","Kōmín-CJK"):
        return "La compactación aplica cuando el **Destino** es Minimax/Kōmín."
    base = build_sentence(text, src, tgt, False, False, "Semi-lossless", False, False)
    curr = build_sentence(text, src, tgt, drop, zero, "Semi-lossless", False, rm)
    msg = f"**Base (sin casillas):** {_pct_comp(text, base):.1f}% · **Con tus opciones:** {_pct_comp(text, curr):.1f}%"
    if maxc:
        curr_exact = build_sentence(text, src, tgt, drop, zero, "Semi-lossless", True, rm)
        msg += f" · **Con sidecar ~...:** {_pct_comp(text, curr_exact):.1f}%"
    return msg

def compaction_line_en(text, src, tgt, drop, zero, rm, maxc) -> str:
    if not text.strip(): return "—"
    if tgt not in ("Minimax-ASCII","Kōmín-CJK"):
        return "Compaction applies when **Target** is Minimax/Kōmín."
    base = build_sentence(text, src, tgt, False, False, "Semi-lossless", False, False)
    curr = build_sentence(text, src, tgt, drop, zero, "Semi-lossless", False, rm)
    msg = f"**Base (no options):** {_pct_comp(text, base):.1f}% · **With your options:** {_pct_comp(text, curr):.1f}%"
    if maxc:
        curr_exact = build_sentence(text, src, tgt, drop, zero, "Semi-lossless", True, rm)
        msg += f" · **With ~... sidecar:** {_pct_comp(text, curr_exact):.1f}%"
    return msg

def master_preview(n: int = 20) -> List[List[Any]]:
    try:
        entries = (MASTER_OBJ or {}).get("entries", [])
        head = entries[:max(0, int(n))]
        rows = [["lemma_es","lemma_en","minimax","komin"]]
        for e in head:
            rows.append([e.get("lemma_es",""), e.get("lemma_en",""), e.get("minimax",""), e.get("komin","")])
        return rows
    except Exception:
        return [["lemma_es","lemma_en","minimax","komin"], ["(no data)","","",""]]

# ---------- Paneles (uno visible según “modo”) ----------
def make_panel_translate(lang="ES"):
    with gr.Group(visible=True) as g:
        with gr.Accordion(("🔁 Traducir — ayuda" if lang=="ES" else "🔁 Translate — help"), open=False):
            gr.Markdown(EXPLAIN_TAB_TRANSLATE_ES if lang=="ES" else EXPLAIN_TAB_TRANSLATE_EN)
        with gr.Row():
            src = gr.Dropdown(ALL_LANGS, value=("Español" if lang=="ES" else "English"), label=("Fuente" if lang=="ES" else "Source"))
            tgt = gr.Dropdown(ALL_LANGS, value="Minimax-ASCII", label=("Destino" if lang=="ES" else "Target"))
        text = gr.Textbox(lines=3, label=("Texto" if lang=="ES" else "Text"), placeholder=("Ej.: Hola, ¿cómo estás?" if lang=="ES" else "e.g., Hello, how are you?"), show_copy_button=True)
        with gr.Row():
            drop = gr.Checkbox(True, label=("Omitir artículos (ES/EN → conlang)" if lang=="ES" else "Drop articles (ES/EN → conlang)"))
            zero = gr.Checkbox(False, label=("Cópula cero (presente afirm.)" if lang=="ES" else "Zero copula (present affirmative)"))
            rmpr = gr.Checkbox(False, label=("Quitar pronombres" if lang=="ES" else "Remove pronouns"))
            exact = gr.Checkbox(False, label=("Máx. Compresión Exacta (sidecar ~...)" if lang=="ES" else "Max Exact Compression (sidecar ~...)"))
        mode_hidden = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
        out = gr.Textbox(lines=6, label=("Traducción" if lang=="ES" else "Translation"), show_copy_button=True)
        comp = gr.Markdown("")
        def run(text, s, t, d, z, m, e, r):
            if not text.strip(): return "", ""
            res = universal_translate(text, s, t, d, z, m, e, r)
            rep = (compaction_line_es if lang=="ES" else compaction_line_en)(text, s, t, d, z, r, e)
            return res, rep
        for c in [text, src, tgt, drop, zero, rmpr, exact]:
            c.change(run, [text, src, tgt, drop, zero, mode_hidden, exact, rmpr], [out, comp])
    return g

def make_panel_build(lang="ES"):
    with gr.Group(visible=False) as g:
        with gr.Accordion(("🛠️ Construir — ayuda" if lang=="ES" else "🛠️ Build — help"), open=False):
            gr.Markdown(EXPLAIN_TAB_BUILD_ES if lang=="ES" else EXPLAIN_TAB_BUILD_EN)
        with gr.Row():
            src = gr.Dropdown(["Español","English"], value=("Español" if lang=="ES" else "English"), label=("Fuente" if lang=="ES" else "Source"))
            tgt = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label="Conlang")
        text = gr.Textbox(lines=3, label=("Frase" if lang=="ES" else "Sentence"), show_copy_button=True)
        with gr.Row():
            drop = gr.Checkbox(True,  label=("Omitir artículos" if lang=="ES" else "Drop articles"))
            zero = gr.Checkbox(False, label=("Cópula cero (presente afirm.)" if lang=="ES" else "Zero copula (present affirmative)"))
            rmpr = gr.Checkbox(False, label=("Quitar pronombres" if lang=="ES" else "Remove pronouns"))
            exact = gr.Checkbox(False, label=("Máx. Compresión Exacta" if lang=="ES" else "Max Exact Compression"))
        mode_hidden = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
        out = gr.Textbox(lines=6, label=("Salida" if lang=="ES" else "Output"), show_copy_button=True)
        comp = gr.Markdown("")
        def run(text, s, t, d, z, m, e, r):
            if not text.strip(): return "", ""
            res = build_sentence(text, s, t, d, z, m, e, r)
            rep = (compaction_line_es if lang=="ES" else compaction_line_en)(text, s, t, d, z, r, e)
            return res, rep
        for c in [text, src, tgt, drop, zero, rmpr, exact]:
            c.change(run, [text, src, tgt, drop, zero, mode_hidden, exact, rmpr], [out, comp])
    return g

def make_panel_decode(lang="ES"):
    with gr.Group(visible=False) as g:
        with gr.Accordion(("🗝️ Decodificar — ayuda" if lang=="ES" else "🗝️ Decode — help"), open=False):
            gr.Markdown(EXPLAIN_TAB_DECODE_ES if lang=="ES" else EXPLAIN_TAB_DECODE_EN)
        with gr.Row():
            src = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label=("Fuente" if lang=="ES" else "Source"))
            tgt = gr.Dropdown(["Español","English"], value=("Español" if lang=="ES" else "English"), label=("Destino" if lang=="ES" else "Target"))
        text = gr.Textbox(lines=3, label=("Texto en conlang (puede incluir ~...)" if lang=="ES" else "Conlang text (may include ~...)"), show_copy_button=True)
        out = gr.Textbox(lines=6, label=("Salida" if lang=="ES" else "Output"), show_copy_button=True)
        def run(t, s, d):
            if not t.strip(): return ""
            orig = extract_custom_sidecar(t)
            if orig is not None: return orig
            orig = extract_sidecar_b85(t)
            if orig is not None: return orig
            return decode_simple(strip_custom_sidecar(strip_sidecar_b85(t)), s, d)
        for c in [text, src, tgt]:
            c.change(run, [text, src, tgt], [out])
    return g

def make_panel_roundtrip(lang="ES"):
    with gr.Group(visible=False) as g:
        with gr.Accordion(("🔄 Prueba ida→vuelta — ayuda" if lang=="ES" else "🔄 Round-trip — help"), open=False):
            gr.Markdown(EXPLAIN_TAB_ROUNDTRIP_ES if lang=="ES" else EXPLAIN_TAB_ROUNDTRIP_EN)
        with gr.Row():
            src = gr.Dropdown(["Español","English"], value=("Español" if lang=="ES" else "English"), label=("Fuente" if lang=="ES" else "Source"))
            tgt = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label="Conlang")
        text = gr.Textbox(lines=3, label=("Frase" if lang=="ES" else "Sentence"), show_copy_button=True)
        exact = gr.Checkbox(False, label=("Máx. Compresión Exacta" if lang=="ES" else "Max Exact Compression"))
        mode_hidden = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
        out1 = gr.Textbox(lines=3, label=("Conlang (ida)" if lang=="ES" else "Outward (conlang)"), show_copy_button=True)
        out2 = gr.Textbox(lines=3, label=("Vuelta" if lang=="ES" else "Back"), show_copy_button=True)
        def run(t, s, c, m, e):
            if not t.strip(): return "", ""
            conlang = universal_translate(t, s, c, True, False, m, e, False)
            back    = universal_translate(conlang, c, s, True, False, m, e, False)
            return conlang, back
        for c in [text, src, tgt, exact]:
            c.change(run, [text, src, tgt, mode_hidden, exact], [out1, out2])
    return g

# ---------- Página (ES/EN), con “modos” como BOTONES (Radio) ----------
with gr.Blocks(title="Universal Conlang Translator", theme=gr.themes.Soft()) as demo:

    # --- Descargas diccionarios (PDF) ---
    gr.Markdown("### 📥 Diccionarios (PDF)")
    # Mini-CSS para reducir visualmente el tamaño de los botones
    gr.HTML("""
    <style>
      #btn_pdf_es button, #btn_pdf_en button {
        font-size: 0.85rem;
        padding: 0.4rem 0.8rem;
      }
    </style>
    """)
    with gr.Row():
        gr.DownloadButton(
            label="⬇️ Español >> Minimax-ASCII y Kōmín-CJK (PDF)",
            value="dictionary_ES_to_Minimax_Komin.pdf",
            elem_id="btn_pdf_es",
            variant="secondary"
        )
        gr.DownloadButton(
            label="⬇️ English >> Minimax-ASCII y Kōmín-CJK (PDF)",
            value="dictionary_EN_to_Minimax_Komin.pdf",
            elem_id="btn_pdf_en",
            variant="secondary"
        )

    gr.Markdown("## 🌍 Idioma / Language")
    lang = gr.Radio(["ES","EN"], value="ES", label="Selecciona / Select")

    # Acordeones explicativos (mismo nivel)
    acc_conlangs_es = gr.Accordion("🧩 ¿Qué son Minimax-ASCII y Kōmín-CJK? (ES)", open=False, visible=True)
    with acc_conlangs_es: gr.Markdown(EXPLAIN_CONLANGS_ES)
    acc_conlangs_en = gr.Accordion("🧩 What are Minimax-ASCII and Kōmín-CJK? (EN)", open=False, visible=False)
    with acc_conlangs_en: gr.Markdown(EXPLAIN_CONLANGS_EN)

    acc_modes_es = gr.Accordion("📖 ¿Qué hace cada botón / modo? (ES)", open=False, visible=True)
    with acc_modes_es: gr.Markdown(
        "- **🔁 Traducir**: Texto → Destino (ES/EN/Minimax/Kōmín), con opciones de compactación y % mostrado.\n"
        "- **🛠️ Construir**: Obliga salida en conlang (Minimax/Kōmín) desde ES/EN.\n"
        "- **🗝️ Decodificar**: Conlang → ES/EN (si hay ~..., devuelve el original exacto).\n"
        "- **🔄 Prueba ida→vuelta**: Comprueba reversibilidad."
    )
    acc_modes_en = gr.Accordion("📖 What does each button / mode do? (EN)", open=False, visible=False)
    with acc_modes_en: gr.Markdown(
        "- **🔁 Translate**: Text → Target (ES/EN/Minimax/Kōmín) with compaction and %.\n"
        "- **🛠️ Build**: Force conlang output from ES/EN.\n"
        "- **🗝️ Decode**: Conlang → ES/EN (if ~..., exact original).\n"
        "- **🔄 Round-trip**: Check reversibility."
    )

    acc_intro_es = gr.Accordion("☑️ Opciones y compactación — guía rápida (ES)", open=False, visible=True)
    with acc_intro_es: gr.Markdown(EXPLAIN_CHECKBOX_ES)
    acc_intro_en = gr.Accordion("☑️ Options & compaction — quick guide (EN)", open=False, visible=False)
    with acc_intro_en: gr.Markdown(EXPLAIN_CHECKBOX_EN)

    acc_lex_es = gr.Accordion("ℹ️ Léxico — explicación y vista previa (ES)", open=False, visible=True)
    with acc_lex_es:
        gr.Markdown(LEXICON_FRIENDLY_ES)
        n_rows_es = gr.Slider(5, 100, value=20, step=5, label="Filas a mostrar")
        table_es = gr.Dataframe(headers=["lemma_es","lemma_en","minimax","komin"], row_count=1, interactive=False)
        gr.Button("Actualizar vista").click(lambda n: master_preview(int(n)), [n_rows_es], [table_es])

    acc_lex_en = gr.Accordion("ℹ️ Lexicon — explainer & preview (EN)", open=False, visible=False)
    with acc_lex_en:
        gr.Markdown(LEXICON_FRIENDLY_EN)
        n_rows_en = gr.Slider(5, 100, value=20, step=5, label="Rows to show")
        table_en = gr.Dataframe(headers=["lemma_es","lemma_en","minimax","komin"], row_count=1, interactive=False)
        gr.Button("Refresh").click(lambda n: master_preview(int(n)), [n_rows_en], [table_en])

    # Modo de uso como BOTONES (Radio)
    gr.Markdown("### 🧭 Modo de uso (elige uno)")
    mode = gr.Radio(
        choices=[
            "🔁 Traducir / Translate",
            "🛠️ Construir (ES/EN → Conlang) / Build",
            "🗝️ Decodificar (Conlang → ES/EN) / Decode",
            "🔄 Prueba ida→vuelta / Round-trip",
        ],
        value="🔁 Traducir / Translate",
        label=None,
    )

    # Paneles por modo y por idioma
    gr.Markdown("### 🧪 Área de trabajo")
    panel_tr_es = make_panel_translate("ES"); panel_bu_es = make_panel_build("ES")
    panel_de_es = make_panel_decode("ES");   panel_rt_es = make_panel_roundtrip("ES")
    panel_tr_en = make_panel_translate("EN"); panel_bu_en = make_panel_build("EN")
    panel_de_en = make_panel_decode("EN");   panel_rt_en = make_panel_roundtrip("EN")

    def _vis(yes): return gr.update(visible=bool(yes))

    def _mode_to_flags(mode_str):
        order = [
            "🔁 Traducir / Translate",
            "🛠️ Construir (ES/EN → Conlang) / Build",
            "🗝️ Decodificar (Conlang → ES/EN) / Decode",
            "🔄 Prueba ida→vuelta / Round-trip",
        ]
        chosen = mode_str if mode_str in order else order[0]
        return [chosen == o for o in order], chosen

    # Lógica de visibilidad (reutiliza tu función)
    def switch_everything(lang_code, tr, bu, de, rt):
        tr2, bu2, de2, rt2 = False, False, False, False
        if tr or (not bu and not de and not rt): tr2 = True
        elif bu: bu2 = True
        elif de: de2 = True
        else:    rt2 = True

        is_en = (lang_code == "EN")
        vis_es = not is_en; vis_en = is_en
        updates = [
            _vis(vis_es), _vis(vis_en),         # conlangs ES/EN
            _vis(vis_es), _vis(vis_en),         # modos ES/EN
            _vis(vis_es), _vis(vis_en),         # intro ES/EN
            _vis(vis_es), _vis(vis_en),         # léxico ES/EN
        ]
        updates += [
            _vis(vis_es and tr2), _vis(vis_es and bu2), _vis(vis_es and de2), _vis(vis_es and rt2),
            _vis(vis_en and tr2), _vis(vis_en and bu2), _vis(vis_en and de2), _vis(vis_en and rt2),
        ]
        return updates

    def _on_lang_or_mode(lang_code, mode_str):
        flags, chosen = _mode_to_flags(mode_str)
        tr, bu, de, rt = flags
        return switch_everything(lang_code, tr, bu, de, rt)

    # Reacciones
    lang.change(
        _on_lang_or_mode,
        [lang, mode],
        [
            acc_conlangs_es, acc_conlangs_en,
            acc_modes_es, acc_modes_en,
            acc_intro_es, acc_intro_en,
            acc_lex_es, acc_lex_en,
            panel_tr_es, panel_bu_es, panel_de_es, panel_rt_es,
            panel_tr_en, panel_bu_en, panel_de_en, panel_rt_en,
        ],
    )
    mode.change(
        _on_lang_or_mode,
        [lang, mode],
        [
            acc_conlangs_es, acc_conlangs_en,
            acc_modes_es, acc_modes_en,
            acc_intro_es, acc_intro_en,
            acc_lex_es, acc_lex_en,
            panel_tr_es, panel_bu_es, panel_de_es, panel_rt_es,
            panel_tr_en, panel_bu_en, panel_de_en, panel_rt_en,
        ],
    )
    # --- Forzar estado inicial correcto al cargar la Space ---
    demo.load(
        _on_lang_or_mode,
        [lang, mode],
        [
            acc_conlangs_es, acc_conlangs_en,
            acc_modes_es, acc_modes_en,
            acc_intro_es, acc_intro_en,
            acc_lex_es, acc_lex_en,
            panel_tr_es, panel_bu_es, panel_de_es, panel_rt_es,
            panel_tr_en, panel_bu_en, panel_de_en, panel_rt_en,
        ],
    )
if __name__ == "__main__":
    demo.launch()