Spaces:

LoloSemper
/

new_language_maximum_efficiency2

Sleeping

App Files Files Community

LoloSemper commited on Oct 6, 2025

Commit

8752007

verified ·

1 Parent(s): 01da41d

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -77

app.py CHANGED Viewed

@@ -1,8 +1,8 @@
 # app.py — Universal Conlang Translator (Max Compresión Exacta)
 # Archivos requeridos en la raíz:
-#  - lexicon_minimax.json
-#  - lexicon_komin.json
-#  - lexicon_master.json
 #
 # requirements.txt (para HF Spaces):
 # gradio>=4.36.0
@@ -15,8 +15,8 @@ from typing import Dict, Optional, List, Any
 import gradio as gr
 # ------------ Archivos esperados ------------
-LEX_MINI   = "lexicon_minimax.json"
-LEX_KOMI   = "lexicon_komin.json"
 LEX_MASTER = "lexicon_master.json"
 # ------------ Normalización ------------
@@ -56,13 +56,9 @@ def load_lexicons():
     mini2en = {v:k for k,v in en2mini.items()}
     komi2en = {v:k for k,v in en2komi.items()}
-    return (es2mini, es2komi, mini2es, komi2es,
-            en2mini, en2komi, mini2en, komi2en,
-            es2en_lemma, en2es_lemma, master)
-(ES2MINI, ES2KOMI, MINI2ES, KOMI2ES,
- EN2MINI, EN2KOMI, MINI2EN, KOMI2EN,
- ES2EN_LEMMA, EN2ES_LEMMA, MASTER_OBJ) = load_lexicons()
 # ------------ Pronombres ------------
 PRON_ES = {"yo","tú","vos","usted","él","ella","nosotros","vosotros","ustedes","ellos","ellas","me","te","se","nos","os"}
@@ -86,17 +82,11 @@ def from_custom_b64(s: str, alphabet: str) -> bytes:
     trans = str.maketrans(alphabet, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
     std = s.translate(trans); pad = "=" * ((4 - len(std) % 4) % 4)
     return base64.b64decode(std + pad)
-# --- PATCH: evitar OOV vacío ---
-def enc_oov_minimax(token: str) -> str:
-    t = token if token else "_"  # evita cadena vacía
-    return "~" + to_custom_b64(t.encode("utf-8"), ALPHA_MINI64)
 def dec_oov_minimax(code: str) -> str:
     try: return from_custom_b64(code[1:], ALPHA_MINI64).decode("utf-8")
     except Exception: return code
-def enc_oov_komin(token: str) -> str:
-    t = token if token else "_"  # evita cadena vacía
-    return "「" + to_custom_b64(t.encode("utf-8"), ALPHA_CJK64) + "」"
 def dec_oov_komin(code: str) -> str:
     try: return from_custom_b64(code[1:-1], ALPHA_CJK64).decode("utf-8")
     except Exception: return code
@@ -114,18 +104,11 @@ try:
 except Exception:
     nlp_es = nlp_en = None
-# --- PATCH: lemma_of siempre devuelve algo no vacío ---
 def lemma_of(tok, src_lang: str) -> str:
-    raw = (tok.lemma_ if getattr(tok, "lemma_", None) else getattr(tok, "text", "")) or ""
-    norm = norm_es(raw) if src_lang == "Español" else norm_en(raw)
-    if norm:
-        return norm
-    # Fallback 1: normalizar el texto original
-    txt = norm_es(getattr(tok, "text", "")) if src_lang == "Español" else norm_en(getattr(tok, "text", ""))
-    if txt:
-        return txt
-    # Fallback 2: verbo seguro para no dejar vacío
-    return "ser" if src_lang == "Español" else "be"
 # ------------ Detección simple y helpers ------------
 def detect_polarity(doc) -> bool: return "?" in getattr(doc,"text","")
@@ -202,14 +185,10 @@ def code_en(lemma: str, target: str) -> str:
 TAM_MINI = {"Pres":"P","Past":"T","Fut":"F","UNK":"P"}
 TAM_KOMI = {"Pres":"Ⓟ","Past":"Ⓣ","Fut":"Ⓕ","UNK":"Ⓟ"}
-def realize_minimax(doc, src_lang: str, drop_articles=True, zero_copula=True,
-                    semi_lossless=False, person_hint="2s", remove_pronouns=False):
     root, subs, objs, obls, advs = extract_core(doc)
     tense = detect_tense(root); is_q, is_neg = detect_polarity(doc), detect_neg(doc)
-    vlem  = lemma_of(root, src_lang) if USE_SPACY else ("ser" if "?" in getattr(doc,"text","") else "estar")
-    # --- PATCH: si vlem viene vacío, fallback a verbo seguro ---
-    if not vlem:
-        vlem = "ser" if src_lang == "Español" else "be"
     vcode = code_es(vlem, "Minimax-ASCII") if src_lang=="Español" else code_en(vlem, "Minimax-ASCII")
     tail = TAM_MINI.get(tense, "P")
     if semi_lossless: tail += (detect_person(root, src_lang) or person_hint)
@@ -236,14 +215,10 @@ def realize_minimax(doc, src_lang: str, drop_articles=True, zero_copula=True,
     parts = S+O+ADV if (zero_copula and not semi_lossless and vlem in ("ser","estar","be") and tense=="Pres" and not is_neg and not is_q) else [vcode]+S+O+ADV
     return " ".join(p for p in parts if p)
-def realize_komin(doc, src_lang: str, drop_articles=True, zero_copula=True,
-                  semi_lossless=False, person_hint="2s", remove_pronouns=False):
     root, subs, objs, obls, advs = extract_core(doc)
     tense, is_q, is_neg = detect_tense(root), detect_polarity(doc), detect_neg(doc)
-    vlem  = lemma_of(root, src_lang) if USE_SPACY else ("ser" if "?" in getattr(doc,"text","") else "estar")
-    # --- PATCH: si vlem viene vacío, fallback a verbo seguro ---
-    if not vlem:
-        vlem = "ser" if src_lang == "Español" else "be"
     vcode = code_es(vlem, "Kōmín-CJK") if src_lang=="Español" else code_en(vlem, "Kōmín-CJK")
     P_SUBJ, P_OBJ = "ᵖ", "ᵒ"; Q_FIN = "？"
     TAM = TAM_KOMI.get(tense,"Ⓟ")
@@ -270,7 +245,7 @@ def realize_komin(doc, src_lang: str, drop_articles=True, zero_copula=True,
     return out
 # ------------ Sidecars (compresión exacta) ------------
-SIDECAR_B85_RE = re.compile(r"\s?§\((?P<b85>[A-Za-z0-9!#$%&()*+\-;<=>?@^_`{|}~]+)\)$")
 def b85_enc_raw(s: str) -> str: return base64.a85encode(zlib.compress(s.encode("utf-8"), 9), adobe=False).decode("ascii")
 def b85_dec_raw(b85s: str) -> str: return zlib.decompress(base64.a85decode(b85s.encode("ascii"), adobe=False)).decode("utf-8")
 def attach_sidecar_b85(conlang_text: str, original_text: str) -> str: return f"{conlang_text} §({b85_enc_raw(original_text)})"
@@ -426,9 +401,7 @@ def _en_conj(lemma, tense, person):
     return lemma
 # ================= Helper de construcción/translate =================
-def _build_with_spacy(text: str, src_lang: str, target: str,
-                      drop_articles: bool, zero_copula: bool,
-                      semi_lossless: bool, remove_pronouns: bool) -> str:
     nlp = nlp_es if src_lang=="Español" else nlp_en
     doc = nlp(text)
     if target == "Minimax-ASCII":
@@ -436,19 +409,15 @@ def _build_with_spacy(text: str, src_lang: str, target: str,
     else:
         return realize_komin(doc, src_lang, drop_articles, zero_copula, semi_lossless, remove_pronouns=remove_pronouns)
-def build_sentence(text: str, src_lang: str, target: str,
-                   drop_articles: bool, zero_copula: bool,
-                   mode: str, max_comp_exact: bool = False, remove_pronouns: bool = False) -> str:
     if not text.strip(): return ""
-    semi = True  # siempre semi-lossless en construcción
     core = _build_with_spacy(text, src_lang, target, drop_articles, zero_copula and not semi, semi, remove_pronouns) if USE_SPACY else encode_simple(text, src_lang, target)
     if max_comp_exact:
         return custom_sidecar_enc(core, text)
     return core
-def universal_translate(text: str, src: str, tgt: str,
-                        drop_articles: bool, zero_copula: bool,
-                        mode: str, max_comp_exact: bool = False, remove_pronouns: bool = False) -> str:
     if not text.strip(): return ""
     if src == tgt: return text
@@ -499,7 +468,7 @@ ALL_LANGS = ["Español","English","Minimax-ASCII","Kōmín-CJK"]
 EXPLAIN_TAB_TRANSLATE_ES = """
 **¿Qué hace “Traducir”?**
 Convierte lo que escribes en **Texto** al **Destino** que elijas (ES/EN/Minimax/Kōmín).
-- Con **Máx. Compresión Exacta**, añade un final `~...` con el **original comprimido** para recuperarlo tal cual al decodificar.
 - Las casillas de **compactación** (artículos, cópula, pronombres) **sólo se aplican si el Destino es conlang**.
 """
 EXPLAIN_TAB_BUILD_ES = """
@@ -509,7 +478,7 @@ Obliga a que la salida sea **Minimax** o **Kōmín** (desde ES/EN). Aplica el or
 EXPLAIN_TAB_DECODE_ES = """
 **¿Qué hace “Decodificar (Conlang → ES/EN)”?**
 Convierte de **Minimax/Kōmín** a **Español/Inglés**.
-- Si el texto trae `~...`, devolvemos el **original exacto**.
 - Si no, reconstruimos lo más fiel posible con el **diccionario**.
 """
 EXPLAIN_TAB_ROUNDTRIP_ES = """
@@ -522,28 +491,28 @@ EXPLAIN_CHECKBOX_ES = """
 - **Omitir artículos** (*el/la/los/las*; *a/an/the*): ahorro típico **~10–15%**.
 - **Cópula cero** (presente afirmativo): oculta *ser/estar/be* → **~5–10%** extra.
 - **Quitar pronombres**: suprime pronombres obvios → ahorro **variable**.
-- **Máx. Compresión Exacta**: añade `~...` para recuperar el original (en >100 caracteres, **~40–60%**; en textos muy cortos puede no reducir).
 **Guía rápida:** sin casillas **0%**; artículos+cópula **~15–20%**.
 """
 # ¿Qué son los lenguajes?
 EXPLAIN_CONLANGS_ES = """
 **¿Qué son Minimax-ASCII y Kōmín-CJK?**
-- **Minimax-ASCII**: versión compacta que usa sólo caracteres comunes (ASCII). Añade marcas como `·P/·T/·F`, persona (`1s`,`2p`…), negación `N` y pregunta `Q`.
-- **Kōmín-CJK**: versión visual con partículas (ej.: sujeto `ᵖ`, objeto `ᵒ`) y un circulito de tiempo `Ⓟ/Ⓣ/Ⓕ`. Puede terminar en `？`.
-Ambos son “**conlangs**” pensados para **ahorrar espacio** y permitir **decodificación** a ES/EN (exacta si hay `~...`).
 """
 # EN
-EXPLAIN_TAB_TRANSLATE_EN = "Converts **Text → Target** (ES/EN/Minimax/Kōmín). With **Max Exact**, adds `~...` to recover the **exact original**. Compaction checkboxes apply only when **Target is conlang**."
 EXPLAIN_TAB_BUILD_EN = "Forces **conlang output** (Minimax/Kōmín) from ES/EN, applying phrasing rules and compaction options."
-EXPLAIN_TAB_DECODE_EN = "Converts **Minimax/Kōmín → ES/EN**. If `~...`, returns the bit-perfect original; else semi-lossless."
 EXPLAIN_TAB_ROUNDTRIP_EN = "Runs **(ES/EN→Conlang)→(Conlang→ES/EN)** to verify reversibility; with exact, it’s bit-for-bit."
 EXPLAIN_CHECKBOX_EN = "Drop articles ~10–15%, Zero copula ~5–10% extra, Remove pronouns variable, Max Exact 40–60% for >100 chars."
 EXPLAIN_CONLANGS_EN = """
 **What are Minimax-ASCII and Kōmín-CJK?**
-- **Minimax-ASCII**: compact ASCII codes with `·P/·T/·F`, person (`1s`,`2p`…), `N` for negation and `Q` for question.
-- **Kōmín-CJK**: visual style using particles (subject `ᵖ`, object `ᵒ`) and time bubbles `Ⓟ/Ⓣ/Ⓕ`, may end in `？`.
-Both are conlangs for **space-saving** and **decoding** back to ES/EN (bit-perfect when `~...` is present).
 """
 # Léxico (amigable)
@@ -553,9 +522,9 @@ LEXICON_FRIENDLY_ES = """
 - Limpiamos y ordenamos por **frecuencia de uso**.
 - Asignamos un **código corto** a cada lema para **Minimax** y para **Kōmín**.
 - Guardamos tres archivos que la app usa al traducir:
-  - `lexicon_minimax.json` (ES → Minimax)
-  - `lexicon_komin.json` (ES → Kōmín)
-  - `lexicon_master.json` (ES + EN + ambos códigos)
 **Así** podemos convertir tus frases en **códigos compactos** y volver a texto entendible.
 """
 LEXICON_FRIENDLY_EN = "We use **WordNet (OMW)**, pair ES words with EN, sort by frequency, assign short codes (Minimax/Kōmín), and save three JSONs so the app can encode/decode compactly."
@@ -574,7 +543,7 @@ def compaction_line_es(text, src, tgt, drop, zero, rm, maxc) -> str:
     msg = f"**Base (sin casillas):** {_pct_comp(text, base):.1f}% · **Con tus opciones:** {_pct_comp(text, curr):.1f}%"
     if maxc:
         curr_exact = build_sentence(text, src, tgt, drop, zero, "Semi-lossless", True, rm)
-        msg += f" · **Con sidecar `~...`:** {_pct_comp(text, curr_exact):.1f}%"
     return msg
 def compaction_line_en(text, src, tgt, drop, zero, rm, maxc) -> str:
@@ -586,7 +555,7 @@ def compaction_line_en(text, src, tgt, drop, zero, rm, maxc) -> str:
     msg = f"**Base (no options):** {_pct_comp(text, base):.1f}% · **With your options:** {_pct_comp(text, curr):.1f}%"
     if maxc:
         curr_exact = build_sentence(text, src, tgt, drop, zero, "Semi-lossless", True, rm)
-        msg += f" · **With `~...` sidecar:** {_pct_comp(text, curr_exact):.1f}%"
     return msg
 def master_preview(n: int = 20) -> List[List[Any]]:
@@ -608,14 +577,12 @@ def make_panel_translate(lang="ES"):
         with gr.Row():
             src = gr.Dropdown(ALL_LANGS, value=("Español" if lang=="ES" else "English"), label=("Fuente" if lang=="ES" else "Source"))
             tgt = gr.Dropdown(ALL_LANGS, value="Minimax-ASCII", label=("Destino" if lang=="ES" else "Target"))
-        text = gr.Textbox(lines=3, label=("Texto" if lang=="ES" else "Text"),
-                          placeholder=("Ej.: Hola, ¿cómo estás?" if lang=="ES" else "e.g., Hello, how are you?"),
-                          show_copy_button=True)
         with gr.Row():
-            drop = gr.Checkbox(True,  label=("Omitir artículos (ES/EN → conlang)" if lang=="ES" else "Drop articles (ES/EN → conlang)"))
             zero = gr.Checkbox(False, label=("Cópula cero (presente afirm.)" if lang=="ES" else "Zero copula (present affirmative)"))
             rmpr = gr.Checkbox(False, label=("Quitar pronombres" if lang=="ES" else "Remove pronouns"))
-            exact = gr.Checkbox(False, label=("Máx. Compresión Exacta (sidecar `~...`)" if lang=="ES" else "Max Exact Compression (sidecar `~...`)"))
         mode_hidden = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
         out = gr.Textbox(lines=6, label=("Traducción" if lang=="ES" else "Translation"), show_copy_button=True)
         comp = gr.Markdown("")
@@ -660,7 +627,7 @@ def make_panel_decode(lang="ES"):
         with gr.Row():
             src = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label=("Fuente" if lang=="ES" else "Source"))
             tgt = gr.Dropdown(["Español","English"], value=("Español" if lang=="ES" else "English"), label=("Destino" if lang=="ES" else "Target"))
-        text = gr.Textbox(lines=3, label=("Texto en conlang (puede incluir `~...`)" if lang=="ES" else "Conlang text (may include `~...`)"), show_copy_button=True)
         out = gr.Textbox(lines=6, label=("Salida" if lang=="ES" else "Output"), show_copy_button=True)
         def run(t, s, d):
             if not t.strip(): return ""
@@ -709,14 +676,14 @@ with gr.Blocks(title="Universal Conlang Translator", theme=gr.themes.Soft()) as
     with acc_modes_es: gr.Markdown(
         "- **🔁 Traducir**: Texto → Destino (ES/EN/Minimax/Kōmín), con opciones de compactación y % mostrado.\n"
         "- **🛠️ Construir**: Obliga salida en conlang (Minimax/Kōmín) desde ES/EN.\n"
-        "- **🗝️ Decodificar**: Conlang → ES/EN (si hay `~...`, devuelve el original exacto).\n"
         "- **🔄 Prueba ida→vuelta**: Comprueba reversibilidad."
     )
     acc_modes_en = gr.Accordion("📖 What does each button / mode do? (EN)", open=False, visible=False)
     with acc_modes_en: gr.Markdown(
         "- **🔁 Translate**: Text → Target (ES/EN/Minimax/Kōmín) with compaction and %.\n"
         "- **🛠️ Build**: Force conlang output from ES/EN.\n"
-        "- **🗝️ Decode**: Conlang → ES/EN (if `~...`, exact original).\n"
         "- **🔄 Round-trip**: Check reversibility."
     )
@@ -847,3 +814,4 @@ if __name__ == "__main__":

 # app.py — Universal Conlang Translator (Max Compresión Exacta)
 # Archivos requeridos en la raíz:
+# - lexicon_minimax.json
+# - lexicon_komin.json
+# - lexicon_master.json
 #
 # requirements.txt (para HF Spaces):
 # gradio>=4.36.0
 import gradio as gr
 # ------------ Archivos esperados ------------
+LEX_MINI = "lexicon_minimax.json"
+LEX_KOMI = "lexicon_komin.json"
 LEX_MASTER = "lexicon_master.json"
 # ------------ Normalización ------------
     mini2en = {v:k for k,v in en2mini.items()}
     komi2en = {v:k for k,v in en2komi.items()}
+    return (es2mini, es2komi, mini2es, komi2es, en2mini, en2komi, mini2en, komi2en, es2en_lemma, en2es_lemma, master)
+(ES2MINI, ES2KOMI, MINI2ES, KOMI2ES, EN2MINI, EN2KOMI, MINI2EN, KOMI2EN, ES2EN_LEMMA, EN2ES_LEMMA, MASTER_OBJ) = load_lexicons()
 # ------------ Pronombres ------------
 PRON_ES = {"yo","tú","vos","usted","él","ella","nosotros","vosotros","ustedes","ellos","ellas","me","te","se","nos","os"}
     trans = str.maketrans(alphabet, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
     std = s.translate(trans); pad = "=" * ((4 - len(std) % 4) % 4)
     return base64.b64decode(std + pad)
+def enc_oov_minimax(token: str) -> str: return "~" + to_custom_b64(token.encode("utf-8"), ALPHA_MINI64)
 def dec_oov_minimax(code: str) -> str:
     try: return from_custom_b64(code[1:], ALPHA_MINI64).decode("utf-8")
     except Exception: return code
+def enc_oov_komin(token: str) -> str: return "「" + to_custom_b64(token.encode("utf-8"), ALPHA_CJK64) + "」"
 def dec_oov_komin(code: str) -> str:
     try: return from_custom_b64(code[1:-1], ALPHA_CJK64).decode("utf-8")
     except Exception: return code
 except Exception:
     nlp_es = nlp_en = None
 def lemma_of(tok, src_lang: str) -> str:
+    if src_lang == "Español":
+        return norm_es(tok.lemma_ if getattr(tok,"lemma_","") else tok.text)
+    else:
+        return norm_en(tok.lemma_ if getattr(tok,"lemma_","") else tok.text)
 # ------------ Detección simple y helpers ------------
 def detect_polarity(doc) -> bool: return "?" in getattr(doc,"text","")
 TAM_MINI = {"Pres":"P","Past":"T","Fut":"F","UNK":"P"}
 TAM_KOMI = {"Pres":"Ⓟ","Past":"Ⓣ","Fut":"Ⓕ","UNK":"Ⓟ"}
+def realize_minimax(doc, src_lang: str, drop_articles=True, zero_copula=True, semi_lossless=False, person_hint="2s", remove_pronouns=False):
     root, subs, objs, obls, advs = extract_core(doc)
     tense = detect_tense(root); is_q, is_neg = detect_polarity(doc), detect_neg(doc)
+    vlem = lemma_of(root, src_lang) if USE_SPACY else ("ser" if "?" in getattr(doc,"text","") else "estar")
     vcode = code_es(vlem, "Minimax-ASCII") if src_lang=="Español" else code_en(vlem, "Minimax-ASCII")
     tail = TAM_MINI.get(tense, "P")
     if semi_lossless: tail += (detect_person(root, src_lang) or person_hint)
     parts = S+O+ADV if (zero_copula and not semi_lossless and vlem in ("ser","estar","be") and tense=="Pres" and not is_neg and not is_q) else [vcode]+S+O+ADV
     return " ".join(p for p in parts if p)
+def realize_komin(doc, src_lang: str, drop_articles=True, zero_copula=True, semi_lossless=False, person_hint="2s", remove_pronouns=False):
     root, subs, objs, obls, advs = extract_core(doc)
     tense, is_q, is_neg = detect_tense(root), detect_polarity(doc), detect_neg(doc)
+    vlem = lemma_of(root, src_lang) if USE_SPACY else ("ser" if "?" in getattr(doc,"text","") else "estar")
     vcode = code_es(vlem, "Kōmín-CJK") if src_lang=="Español" else code_en(vlem, "Kōmín-CJK")
     P_SUBJ, P_OBJ = "ᵖ", "ᵒ"; Q_FIN = "？"
     TAM = TAM_KOMI.get(tense,"Ⓟ")
     return out
 # ------------ Sidecars (compresión exacta) ------------
+SIDECAR_B85_RE = re.compile(r"\s?§\((?P<b85>[A-Za-z0-9!#$%&()*+\-;<=>?@^_{|}~]+)\)$")
 def b85_enc_raw(s: str) -> str: return base64.a85encode(zlib.compress(s.encode("utf-8"), 9), adobe=False).decode("ascii")
 def b85_dec_raw(b85s: str) -> str: return zlib.decompress(base64.a85decode(b85s.encode("ascii"), adobe=False)).decode("utf-8")
 def attach_sidecar_b85(conlang_text: str, original_text: str) -> str: return f"{conlang_text} §({b85_enc_raw(original_text)})"
     return lemma
 # ================= Helper de construcción/translate =================
+def _build_with_spacy(text: str, src_lang: str, target: str, drop_articles: bool, zero_copula: bool, semi_lossless: bool, remove_pronouns: bool) -> str:
     nlp = nlp_es if src_lang=="Español" else nlp_en
     doc = nlp(text)
     if target == "Minimax-ASCII":
     else:
         return realize_komin(doc, src_lang, drop_articles, zero_copula, semi_lossless, remove_pronouns=remove_pronouns)
+def build_sentence(text: str, src_lang: str, target: str, drop_articles: bool, zero_copula: bool, mode: str, max_comp_exact: bool = False, remove_pronouns: bool = False) -> str:
     if not text.strip(): return ""
+    semi = True # siempre semi-lossless en construcción
     core = _build_with_spacy(text, src_lang, target, drop_articles, zero_copula and not semi, semi, remove_pronouns) if USE_SPACY else encode_simple(text, src_lang, target)
     if max_comp_exact:
         return custom_sidecar_enc(core, text)
     return core
+def universal_translate(text: str, src: str, tgt: str, drop_articles: bool, zero_copula: bool, mode: str, max_comp_exact: bool = False, remove_pronouns: bool = False) -> str:
     if not text.strip(): return ""
     if src == tgt: return text
 EXPLAIN_TAB_TRANSLATE_ES = """
 **¿Qué hace “Traducir”?**
 Convierte lo que escribes en **Texto** al **Destino** que elijas (ES/EN/Minimax/Kōmín).
+- Con **Máx. Compresión Exacta**, añade un final ~... con el **original comprimido** para recuperarlo tal cual al decodificar.
 - Las casillas de **compactación** (artículos, cópula, pronombres) **sólo se aplican si el Destino es conlang**.
 """
 EXPLAIN_TAB_BUILD_ES = """
 EXPLAIN_TAB_DECODE_ES = """
 **¿Qué hace “Decodificar (Conlang → ES/EN)”?**
 Convierte de **Minimax/Kōmín** a **Español/Inglés**.
+- Si el texto trae ~..., devolvemos el **original exacto**.
 - Si no, reconstruimos lo más fiel posible con el **diccionario**.
 """
 EXPLAIN_TAB_ROUNDTRIP_ES = """
 - **Omitir artículos** (*el/la/los/las*; *a/an/the*): ahorro típico **~10–15%**.
 - **Cópula cero** (presente afirmativo): oculta *ser/estar/be* → **~5–10%** extra.
 - **Quitar pronombres**: suprime pronombres obvios → ahorro **variable**.
+- **Máx. Compresión Exacta**: añade ~... para recuperar el original (en >100 caracteres, **~40–60%**; en textos muy cortos puede no reducir).
 **Guía rápida:** sin casillas **0%**; artículos+cópula **~15–20%**.
 """
 # ¿Qué son los lenguajes?
 EXPLAIN_CONLANGS_ES = """
 **¿Qué son Minimax-ASCII y Kōmín-CJK?**
+- **Minimax-ASCII**: versión compacta que usa sólo caracteres comunes (ASCII). Añade marcas como ·P/·T/·F, persona (1s,2p…), negación N y pregunta Q.
+- **Kōmín-CJK**: versión visual con partículas (ej.: sujeto ᵖ, objeto ᵒ) y un circulito de tiempo Ⓟ/Ⓣ/Ⓕ. Puede terminar en ？.
+Ambos son “**conlangs**” pensados para **ahorrar espacio** y permitir **decodificación** a ES/EN (exacta si hay ~...).
 """
 # EN
+EXPLAIN_TAB_TRANSLATE_EN = "Converts **Text → Target** (ES/EN/Minimax/Kōmín). With **Max Exact**, adds ~... to recover the **exact original**. Compaction checkboxes apply only when **Target is conlang**."
 EXPLAIN_TAB_BUILD_EN = "Forces **conlang output** (Minimax/Kōmín) from ES/EN, applying phrasing rules and compaction options."
+EXPLAIN_TAB_DECODE_EN = "Converts **Minimax/Kōmín → ES/EN**. If ~... exists, returns the bit-perfect original; else semi-lossless."
 EXPLAIN_TAB_ROUNDTRIP_EN = "Runs **(ES/EN→Conlang)→(Conlang→ES/EN)** to verify reversibility; with exact, it’s bit-for-bit."
 EXPLAIN_CHECKBOX_EN = "Drop articles ~10–15%, Zero copula ~5–10% extra, Remove pronouns variable, Max Exact 40–60% for >100 chars."
 EXPLAIN_CONLANGS_EN = """
 **What are Minimax-ASCII and Kōmín-CJK?**
+- **Minimax-ASCII**: compact ASCII codes with ·P/·T/·F, person (1s,2p…), N for negation and Q for question.
+- **Kōmín-CJK**: visual style using particles (subject ᵖ, object ᵒ) and time bubbles Ⓟ/Ⓣ/Ⓕ, may end in ？.
+Both are conlangs for **space-saving** and **decoding** back to ES/EN (bit-perfect when ~... is present).
 """
 # Léxico (amigable)
 - Limpiamos y ordenamos por **frecuencia de uso**.
 - Asignamos un **código corto** a cada lema para **Minimax** y para **Kōmín**.
 - Guardamos tres archivos que la app usa al traducir:
+  - lexicon_minimax.json (ES → Minimax)
+  - lexicon_komin.json (ES → Kōmín)
+  - lexicon_master.json (ES + EN + ambos códigos)
 **Así** podemos convertir tus frases en **códigos compactos** y volver a texto entendible.
 """
 LEXICON_FRIENDLY_EN = "We use **WordNet (OMW)**, pair ES words with EN, sort by frequency, assign short codes (Minimax/Kōmín), and save three JSONs so the app can encode/decode compactly."
     msg = f"**Base (sin casillas):** {_pct_comp(text, base):.1f}% · **Con tus opciones:** {_pct_comp(text, curr):.1f}%"
     if maxc:
         curr_exact = build_sentence(text, src, tgt, drop, zero, "Semi-lossless", True, rm)
+        msg += f" · **Con sidecar ~...:** {_pct_comp(text, curr_exact):.1f}%"
     return msg
 def compaction_line_en(text, src, tgt, drop, zero, rm, maxc) -> str:
     msg = f"**Base (no options):** {_pct_comp(text, base):.1f}% · **With your options:** {_pct_comp(text, curr):.1f}%"
     if maxc:
         curr_exact = build_sentence(text, src, tgt, drop, zero, "Semi-lossless", True, rm)
+        msg += f" · **With ~... sidecar:** {_pct_comp(text, curr_exact):.1f}%"
     return msg
 def master_preview(n: int = 20) -> List[List[Any]]:
         with gr.Row():
             src = gr.Dropdown(ALL_LANGS, value=("Español" if lang=="ES" else "English"), label=("Fuente" if lang=="ES" else "Source"))
             tgt = gr.Dropdown(ALL_LANGS, value="Minimax-ASCII", label=("Destino" if lang=="ES" else "Target"))
+        text = gr.Textbox(lines=3, label=("Texto" if lang=="ES" else "Text"), placeholder=("Ej.: Hola, ¿cómo estás?" if lang=="ES" else "e.g., Hello, how are you?"), show_copy_button=True)
         with gr.Row():
+            drop = gr.Checkbox(True, label=("Omitir artículos (ES/EN → conlang)" if lang=="ES" else "Drop articles (ES/EN → conlang)"))
             zero = gr.Checkbox(False, label=("Cópula cero (presente afirm.)" if lang=="ES" else "Zero copula (present affirmative)"))
             rmpr = gr.Checkbox(False, label=("Quitar pronombres" if lang=="ES" else "Remove pronouns"))
+            exact = gr.Checkbox(False, label=("Máx. Compresión Exacta (sidecar ~...)" if lang=="ES" else "Max Exact Compression (sidecar ~...)"))
         mode_hidden = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
         out = gr.Textbox(lines=6, label=("Traducción" if lang=="ES" else "Translation"), show_copy_button=True)
         comp = gr.Markdown("")
         with gr.Row():
             src = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label=("Fuente" if lang=="ES" else "Source"))
             tgt = gr.Dropdown(["Español","English"], value=("Español" if lang=="ES" else "English"), label=("Destino" if lang=="ES" else "Target"))
+        text = gr.Textbox(lines=3, label=("Texto en conlang (puede incluir ~...)" if lang=="ES" else "Conlang text (may include ~...)"), show_copy_button=True)
         out = gr.Textbox(lines=6, label=("Salida" if lang=="ES" else "Output"), show_copy_button=True)
         def run(t, s, d):
             if not t.strip(): return ""
     with acc_modes_es: gr.Markdown(
         "- **🔁 Traducir**: Texto → Destino (ES/EN/Minimax/Kōmín), con opciones de compactación y % mostrado.\n"
         "- **🛠️ Construir**: Obliga salida en conlang (Minimax/Kōmín) desde ES/EN.\n"
+        "- **🗝️ Decodificar**: Conlang → ES/EN (si hay ~..., devuelve el original exacto).\n"
         "- **🔄 Prueba ida→vuelta**: Comprueba reversibilidad."
     )
     acc_modes_en = gr.Accordion("📖 What does each button / mode do? (EN)", open=False, visible=False)
     with acc_modes_en: gr.Markdown(
         "- **🔁 Translate**: Text → Target (ES/EN/Minimax/Kōmín) with compaction and %.\n"
         "- **🛠️ Build**: Force conlang output from ES/EN.\n"
+        "- **🗝️ Decode**: Conlang → ES/EN (if ~..., exact original).\n"
         "- **🔄 Round-trip**: Check reversibility."
     )