Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# app.py — Universal Conlang Translator (Max Compresión Exacta)
|
| 2 |
# Archivos necesarios en la raíz:
|
| 3 |
# - lexicon_minimax.json
|
| 4 |
# - lexicon_komin.json
|
|
@@ -119,7 +119,7 @@ def dec_oov_komin(code: str) -> str:
|
|
| 119 |
def is_oov_minimax(code: str) -> bool:
|
| 120 |
return code.startswith("~") and len(code) > 1
|
| 121 |
def is_oov_komin(code: str) -> bool:
|
| 122 |
-
return len(code) >= 2 and code.startswith("「")
|
| 123 |
|
| 124 |
# ------------ spaCy opcional ------------
|
| 125 |
USE_SPACY = False
|
|
@@ -140,7 +140,7 @@ def lemma_of(tok, src_lang: str) -> str:
|
|
| 140 |
else:
|
| 141 |
return norm_en(tok.lemma_ if tok.lemma_ else tok.text)
|
| 142 |
|
| 143 |
-
# ------------
|
| 144 |
def detect_polarity(doc) -> bool:
|
| 145 |
return "?" in doc.text
|
| 146 |
|
|
@@ -185,7 +185,7 @@ def extract_core(doc):
|
|
| 185 |
objs.append(t)
|
| 186 |
elif t.dep_ in ("obl","pobj"):
|
| 187 |
obls.append(t)
|
| 188 |
-
elif t.dep_ in ("advmod","advcl")
|
| 189 |
advs.append(t)
|
| 190 |
subs.sort(key=lambda x: x.i); objs.sort(key=lambda x: x.i)
|
| 191 |
obls.sort(key=lambda x: x.i); advs.sort(key=lambda x: x.i)
|
|
@@ -263,7 +263,7 @@ def realize_minimax(doc, src_lang: str, drop_articles=True, zero_copula=True, se
|
|
| 263 |
O = realize_np(objs) + realize_np(obls)
|
| 264 |
ADV=[code_es(lemma_of(a, src_lang), "Minimax-ASCII") if src_lang=="Español" else code_en(lemma_of(a, src_lang), "Minimax-ASCII") for a in advs] if USE_SPACY else []
|
| 265 |
|
| 266 |
-
if zero_copula
|
| 267 |
parts = S + O + ADV
|
| 268 |
else:
|
| 269 |
parts = [vcode] + S + O + ADV
|
|
@@ -298,7 +298,7 @@ def realize_komin(doc, src_lang: str, drop_articles=True, zero_copula=True, semi
|
|
| 298 |
|
| 299 |
v_form = vcode + TAM + (NEG_M if is_neg else "")
|
| 300 |
|
| 301 |
-
if zero_copula
|
| 302 |
parts = S + O + ADV
|
| 303 |
else:
|
| 304 |
parts = S + O + ADV + [v_form]
|
|
@@ -306,7 +306,7 @@ def realize_komin(doc, src_lang: str, drop_articles=True, zero_copula=True, semi
|
|
| 306 |
if is_q: out += " " + Q_FIN
|
| 307 |
return out
|
| 308 |
|
| 309 |
-
# ------------ Sidecars
|
| 310 |
SIDECAR_B85_RE = re.compile(r"\s?§\((?P<b85>[A-Za-z0-9!#$%&()*+\-;<=>?@^_`{|}~]+)\)$")
|
| 311 |
|
| 312 |
def b85_enc_raw(s: str) -> str:
|
|
@@ -360,7 +360,7 @@ def encode_simple(text: str, src_lang: str, target: str) -> str:
|
|
| 360 |
def repl_en(m):
|
| 361 |
key = norm_en(m.group(0))
|
| 362 |
table = EN2MINI if target=="Minimax-ASCII" else EN2KOMI
|
| 363 |
-
if table
|
| 364 |
return table[key]
|
| 365 |
return enc_oov_minimax(m.group(0)) if target=="Minimax-ASCII" else enc_oov_komin(m.group(0))
|
| 366 |
repl = repl_es if src_lang=="Español" else repl_en
|
|
@@ -466,6 +466,7 @@ def decode_simple(text: str, source: str, tgt_lang: str) -> str:
|
|
| 466 |
v_conj = ("no " if tgt_lang == "Español" else "not ") + v_conj
|
| 467 |
out_parts.append(v_conj)
|
| 468 |
continue
|
|
|
|
| 469 |
out_parts.append(pluralize(lem, tgt_lang) if pl_flags[idx] else lem)
|
| 470 |
|
| 471 |
out_text = " ".join(out_parts)
|
|
@@ -503,7 +504,7 @@ def _es_conj(lemma, tense, person):
|
|
| 503 |
if lemma == "estar":
|
| 504 |
tab = {
|
| 505 |
"Pres":{"1s":"estoy","2s":"estás","3s":"está","1p":"estamos","2p":"estáis","3p":"están"},
|
| 506 |
-
"Past":{"1s":"
|
| 507 |
"Fut":{"1s":"estaré","2s":"estarás","3s":"estará","1p":"estaremos","2p":"estaréis","3p":"estarán"},
|
| 508 |
}; return tab[tense].get(person, tab[tense]["3s"])
|
| 509 |
if lemma == "ir":
|
|
@@ -530,11 +531,11 @@ def _en_conj(lemma, tense, person):
|
|
| 530 |
return "goes" if (tense=="Pres" and person=="3s") else "go"
|
| 531 |
if lemma == "do":
|
| 532 |
if tense == "Past": return "did"
|
| 533 |
-
return "does" if (tense=="Pres"
|
| 534 |
|
| 535 |
if tense == "Pres":
|
| 536 |
if person == "3s":
|
| 537 |
-
if lemma.endswith("y")
|
| 538 |
return lemma[:-1] + "ies"
|
| 539 |
if lemma.endswith(("s","sh","ch","x","z","o")):
|
| 540 |
return lemma + "es"
|
|
@@ -542,7 +543,7 @@ def _en_conj(lemma, tense, person):
|
|
| 542 |
return lemma
|
| 543 |
elif tense == "Past":
|
| 544 |
if lemma.endswith("e"): return lemma + "d"
|
| 545 |
-
if lemma.endswith("y")
|
| 546 |
return lemma + "ed"
|
| 547 |
else:
|
| 548 |
return lemma
|
|
@@ -561,7 +562,7 @@ def build_sentence(text: str, src_lang: str, target: str,
|
|
| 561 |
drop_articles: bool, zero_copula: bool, mode: str, max_comp_exact: bool = False) -> str:
|
| 562 |
if not text.strip(): return ""
|
| 563 |
semi = True # siempre semi-lossless
|
| 564 |
-
core = _build_with_spacy(text, src_lang, target, drop_articles, zero_copula
|
| 565 |
if max_comp_exact:
|
| 566 |
return custom_sidecar_enc(core, text)
|
| 567 |
return core
|
|
@@ -577,7 +578,7 @@ def universal_translate(text: str, src: str, tgt: str,
|
|
| 577 |
orig = extract_custom_sidecar(text)
|
| 578 |
if orig is not None: return orig
|
| 579 |
orig = extract_sidecar_b85(text)
|
| 580 |
-
if orig
|
| 581 |
return decode_simple(strip_custom_sidecar(strip_sidecar_b85(text)), src, tgt)
|
| 582 |
if src in ("Español","English") and tgt in ("Español","English"):
|
| 583 |
return translate_natural(text, src, tgt)
|
|
@@ -630,306 +631,120 @@ def round_trip(text, src, tgt, mode, max_comp_exact):
|
|
| 630 |
back = universal_translate(conlang, tgt, src, True, False, mode, max_comp_exact)
|
| 631 |
return conlang, back
|
| 632 |
|
| 633 |
-
#
|
| 634 |
-
# ========================== UI bilingüe con selector global ==========================
|
| 635 |
-
# =====================================================================================
|
| 636 |
|
| 637 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 638 |
|
| 639 |
-
|
| 640 |
-
|
| 641 |
-
|
| 642 |
-
Traduce entre **Español / Inglés** y dos lenguajes construidos:
|
| 643 |
-
- **Minimax-ASCII** (compacto y solo ASCII)
|
| 644 |
-
- **Kōmín-CJK** (estilo CJK con partículas)
|
| 645 |
-
|
| 646 |
-
También **comprime sin perder información** si activas **Máx. Compresión Exacta** (`~...` guarda el original).
|
| 647 |
-
Al **decodificar**, si existe ese `~...`, recuperas el texto **exacto**.
|
| 648 |
-
|
| 649 |
-
### 🧠 ¿Por qué me sirve?
|
| 650 |
-
- Para **reducir** tamaño de mensajes/notas.
|
| 651 |
-
- Para **codificar/decodificar** de forma legible y reversible.
|
| 652 |
-
- Para jugar con **conlangs** simples.
|
| 653 |
-
|
| 654 |
-
### ⚙️ Opciones (puedes ignorarlas al principio)
|
| 655 |
-
- **Omitir artículos**: quita *el/la/los/las* o *a/an/the*. Ahorra ~10–15%.
|
| 656 |
-
- **Cópula cero** (presente afirmativo): oculta *ser/estar/be* cuando suena natural. +~5–10%.
|
| 657 |
-
- **Máx. Compresión Exacta**: añade `~...` con el original comprimido (mejor en textos medianos/largos).
|
| 658 |
-
"""
|
| 659 |
-
FAQ_ES = """
|
| 660 |
-
### ❓ Preguntas rápidas
|
| 661 |
-
- **¿Se pierde info?** No, con **Máx. Compresión Exacta** el `~...` guarda el original.
|
| 662 |
-
- **¿Sin spaCy?** Funciona igual (modo léxico). Con spaCy suena más natural.
|
| 663 |
-
- **Privacidad**: todo corre dentro de este Space.
|
| 664 |
-
"""
|
| 665 |
-
TUTORIAL_ES = """
|
| 666 |
-
### 🏁 Empezar (3 pasos)
|
| 667 |
-
1. Elige **Fuente** y **Destino**.
|
| 668 |
-
2. Escribe tu frase.
|
| 669 |
-
3. Pulsa **Traducir**.
|
| 670 |
|
| 671 |
-
|
| 672 |
-
"""
|
| 673 |
|
| 674 |
-
|
| 675 |
-
|
| 676 |
-
|
| 677 |
-
|
| 678 |
-
|
| 679 |
-
- **Kōmín-CJK** (CJK-style with particles)
|
| 680 |
|
| 681 |
-
|
| 682 |
-
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
|
| 686 |
-
- **
|
| 687 |
-
|
| 688 |
-
- **Privacy**: everything runs inside this Space.
|
| 689 |
-
"""
|
| 690 |
-
TUTORIAL_EN = """
|
| 691 |
-
### 🏁 Quick start (3 steps)
|
| 692 |
-
1. Pick **Source** and **Target**.
|
| 693 |
-
2. Type your sentence.
|
| 694 |
-
3. Click **Translate**.
|
| 695 |
|
| 696 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 697 |
"""
|
| 698 |
|
| 699 |
-
|
| 700 |
-
with gr.Group(visible=True) as group:
|
| 701 |
-
gr.Markdown("# 🌐 Universal Conlang Translator · Compresión Exacta (ES)")
|
| 702 |
-
with gr.Row():
|
| 703 |
-
with gr.Column(scale=1):
|
| 704 |
-
with gr.Accordion("Resumen (ES)", open=True):
|
| 705 |
-
gr.Markdown(EXPLAIN_ES)
|
| 706 |
-
with gr.Accordion("FAQ (ES)", open=False):
|
| 707 |
-
gr.Markdown(FAQ_ES)
|
| 708 |
-
with gr.Column(scale=1):
|
| 709 |
-
with gr.Accordion("Tutorial (ES)", open=True):
|
| 710 |
-
gr.Markdown(TUTORIAL_ES)
|
| 711 |
-
gr.Markdown("**Consejo:** Los mensajes muy cortos pueden no reducirse por la cabecera del `~...`.")
|
| 712 |
-
with gr.Tab("🔁 Traducir"):
|
| 713 |
-
with gr.Row():
|
| 714 |
-
uni_src = gr.Dropdown(ALL_LANGS, value="Español", label="Fuente")
|
| 715 |
-
uni_tgt = gr.Dropdown(ALL_LANGS, value="Minimax-ASCII", label="Destino")
|
| 716 |
-
uni_text = gr.Textbox(lines=3, label="Texto", placeholder="Ej.: Hola, ¿cómo estás?", show_copy_button=True)
|
| 717 |
-
with gr.Row():
|
| 718 |
-
uni_drop = gr.Checkbox(value=True, label="Omitir artículos (ES/EN → conlang)")
|
| 719 |
-
uni_zero = gr.Checkbox(value=False, label="Cópula cero (presente afirm.)")
|
| 720 |
-
uni_max_comp = gr.Checkbox(value=False, label="Máx. Compresión Exacta (sidecar `~...`)")
|
| 721 |
-
uni_mode = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
|
| 722 |
-
with gr.Row():
|
| 723 |
-
btn_translate = gr.Button("🚀 Traducir", variant="primary")
|
| 724 |
-
btn_reset = gr.Button("🧹 Limpiar")
|
| 725 |
-
uni_out = gr.Textbox(lines=6, label="Traducción", show_copy_button=True)
|
| 726 |
-
|
| 727 |
-
btn_translate.click(
|
| 728 |
-
universal_translate,
|
| 729 |
-
[uni_text, uni_src, uni_tgt, uni_drop, uni_zero, uni_mode, uni_max_comp],
|
| 730 |
-
[uni_out]
|
| 731 |
-
)
|
| 732 |
-
btn_reset.click(lambda: "", None, [uni_text, uni_out])
|
| 733 |
-
|
| 734 |
-
gr.Markdown("### 🔎 Ejemplos (clic para autocompletar)")
|
| 735 |
-
ex1 = gr.Button("ES→Minimax: «Hola, ¿cómo estás?»")
|
| 736 |
-
ex2 = gr.Button("EN→Kōmín: «This system keeps messages compact.»")
|
| 737 |
-
ex3 = gr.Button("ES→Minimax (con compresión): «El clima hoy es excelente para pasear.»")
|
| 738 |
-
ex4 = gr.Button("EN→Kōmín (con compresión): «Please decode this later with the sidecar.»")
|
| 739 |
-
|
| 740 |
-
ex1.click(lambda: ("Hola, ¿cómo estás?", "Español", "Minimax-ASCII"), None, [uni_text, uni_src, uni_tgt])
|
| 741 |
-
ex2.click(lambda: ("This system keeps messages compact.", "English", "Kōmín-CJK"), None, [uni_text, uni_src, uni_tgt])
|
| 742 |
-
ex3.click(lambda: ("El clima hoy es excelente para pasear.", "Español", "Minimax-ASCII"), None, [uni_text, uni_src, uni_tgt])
|
| 743 |
-
ex4.click(lambda: ("Please decode this later with the sidecar.", "English", "Kōmín-CJK"), None, [uni_text, uni_src, uni_tgt])
|
| 744 |
-
|
| 745 |
-
with gr.Tab("🛠️ Construir (ES/EN → Conlang)"):
|
| 746 |
-
with gr.Row():
|
| 747 |
-
src_lang = gr.Dropdown(["Español","English"], value="Español", label="Fuente")
|
| 748 |
-
target = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label="Conlang")
|
| 749 |
-
text_in = gr.Textbox(lines=3, label="Frase", show_copy_button=True)
|
| 750 |
-
with gr.Row():
|
| 751 |
-
drop_articles = gr.Checkbox(value=True, label="Omitir artículos")
|
| 752 |
-
zero_copula = gr.Checkbox(value=False, label="Cópula cero (presente afirm.)")
|
| 753 |
-
max_comp_build = gr.Checkbox(value=False, label="Máx. Compresión Exacta")
|
| 754 |
-
mode_build = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
|
| 755 |
-
with gr.Row():
|
| 756 |
-
btn_build = gr.Button("🏗️ Construir", variant="primary")
|
| 757 |
-
btn_build_clear = gr.Button("🧹 Limpiar")
|
| 758 |
-
out = gr.Textbox(lines=6, label="Salida", show_copy_button=True)
|
| 759 |
-
|
| 760 |
-
btn_build.click(
|
| 761 |
-
build_sentence,
|
| 762 |
-
[text_in, src_lang, target, drop_articles, zero_copula, mode_build, max_comp_build],
|
| 763 |
-
[out]
|
| 764 |
-
)
|
| 765 |
-
btn_build_clear.click(lambda: "", None, [text_in, out])
|
| 766 |
-
|
| 767 |
-
with gr.Tab("🗝️ Decodificar (Conlang → ES/EN)"):
|
| 768 |
-
with gr.Row():
|
| 769 |
-
src_code = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label="Fuente")
|
| 770 |
-
tgt_lang = gr.Dropdown(["Español","English"], value="Español", label="Destino")
|
| 771 |
-
code_in = gr.Textbox(lines=3, label="Texto en conlang (puede incluir `~...`)", show_copy_button=True)
|
| 772 |
-
out3 = gr.Textbox(lines=6, label="Salida", show_copy_button=True)
|
| 773 |
-
|
| 774 |
-
def decode_lossless_aware(text, src, tgt):
|
| 775 |
-
orig = extract_custom_sidecar(text)
|
| 776 |
-
if orig is not None: return orig
|
| 777 |
-
orig = extract_sidecar_b85(text)
|
| 778 |
-
if orig is not None: return orig
|
| 779 |
-
return decode_simple(strip_custom_sidecar(strip_sidecar_b85(text)), src, tgt)
|
| 780 |
-
|
| 781 |
-
with gr.Row():
|
| 782 |
-
btn_decode = gr.Button("🔓 Decodificar", variant="primary")
|
| 783 |
-
btn_decode_clear = gr.Button("🧹 Limpiar")
|
| 784 |
-
|
| 785 |
-
btn_decode.click(decode_lossless_aware, [code_in, src_code, tgt_lang], [out3])
|
| 786 |
-
btn_decode_clear.click(lambda: "", None, [code_in, out3])
|
| 787 |
-
|
| 788 |
-
gr.Markdown("> **Tip:** si ves `~...`, la decodificación será 100% exacta.")
|
| 789 |
-
|
| 790 |
-
with gr.Tab("🔄 Prueba ida→vuelta"):
|
| 791 |
-
with gr.Row():
|
| 792 |
-
rt_src = gr.Dropdown(["Español","English"], value="Español", label="Fuente")
|
| 793 |
-
rt_tgt = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label="Conlang")
|
| 794 |
-
rt_text = gr.Textbox(lines=3, label="Frase", show_copy_button=True)
|
| 795 |
-
rt_max_comp = gr.Checkbox(value=False, label="Máx. Compresión Exacta")
|
| 796 |
-
rt_mode = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
|
| 797 |
-
rt_out_conlang = gr.Textbox(lines=3, label="Conlang (ida)", show_copy_button=True)
|
| 798 |
-
rt_out_back = gr.Textbox(lines=3, label="Vuelta", show_copy_button=True)
|
| 799 |
-
with gr.Row():
|
| 800 |
-
btn_rt = gr.Button("▶️ Probar", variant="primary")
|
| 801 |
-
btn_rt_clear = gr.Button("🧹 Limpiar")
|
| 802 |
-
|
| 803 |
-
btn_rt.click(round_trip, [rt_text, rt_src, rt_tgt, rt_mode, rt_max_comp], [rt_out_conlang, rt_out_back])
|
| 804 |
-
btn_rt_clear.click(lambda: "", None, [rt_text, rt_out_conlang, rt_out_back])
|
| 805 |
-
|
| 806 |
-
gr.Markdown("---")
|
| 807 |
-
gr.Markdown("Hecho con ❤️ · **spaCy** (opcional) · Todo se ejecuta en este Space.")
|
| 808 |
-
return group
|
| 809 |
-
|
| 810 |
-
def make_group_en():
|
| 811 |
-
with gr.Group(visible=False) as group:
|
| 812 |
-
gr.Markdown("# 🌐 Universal Conlang Translator · Max Exact Compression (EN)")
|
| 813 |
-
with gr.Row():
|
| 814 |
-
with gr.Column(scale=1):
|
| 815 |
-
with gr.Accordion("Summary (EN)", open=True):
|
| 816 |
-
gr.Markdown(EXPLAIN_EN)
|
| 817 |
-
with gr.Accordion("FAQ (EN)", open=False):
|
| 818 |
-
gr.Markdown(FAQ_EN)
|
| 819 |
-
with gr.Column(scale=1):
|
| 820 |
-
with gr.Accordion("Tutorial (EN)", open=True):
|
| 821 |
-
gr.Markdown(TUTORIAL_EN)
|
| 822 |
-
gr.Markdown("**Tip:** Very short messages may not shrink due to the `~...` header.")
|
| 823 |
-
with gr.Tab("🔁 Translate"):
|
| 824 |
-
with gr.Row():
|
| 825 |
-
uni_src = gr.Dropdown(ALL_LANGS, value="English", label="Source")
|
| 826 |
-
uni_tgt = gr.Dropdown(ALL_LANGS, value="Minimax-ASCII", label="Target")
|
| 827 |
-
uni_text = gr.Textbox(lines=3, label="Text", placeholder="e.g., Hello, how are you?", show_copy_button=True)
|
| 828 |
-
with gr.Row():
|
| 829 |
-
uni_drop = gr.Checkbox(value=True, label="Drop articles (ES/EN → conlang)")
|
| 830 |
-
uni_zero = gr.Checkbox(value=False, label="Zero copula (present affirmative)")
|
| 831 |
-
uni_max_comp = gr.Checkbox(value=False, label="Max Exact Compression (sidecar `~...`)")
|
| 832 |
-
uni_mode = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
|
| 833 |
-
with gr.Row():
|
| 834 |
-
btn_translate = gr.Button("🚀 Translate", variant="primary")
|
| 835 |
-
btn_reset = gr.Button("🧹 Clear")
|
| 836 |
-
uni_out = gr.Textbox(lines=6, label="Translation", show_copy_button=True)
|
| 837 |
-
|
| 838 |
-
btn_translate.click(
|
| 839 |
-
universal_translate,
|
| 840 |
-
[uni_text, uni_src, uni_tgt, uni_drop, uni_zero, uni_mode, uni_max_comp],
|
| 841 |
-
[uni_out]
|
| 842 |
-
)
|
| 843 |
-
btn_reset.click(lambda: "", None, [uni_text, uni_out])
|
| 844 |
-
|
| 845 |
-
gr.Markdown("### 🔎 Examples (click to autofill)")
|
| 846 |
-
ex1 = gr.Button("EN→Minimax: “Hello, how are you?”")
|
| 847 |
-
ex2 = gr.Button("ES→Kōmín: “Este sistema mantiene los mensajes compactos.”")
|
| 848 |
-
ex3 = gr.Button("EN→Minimax (compressed): “The weather today is perfect for a walk.”")
|
| 849 |
-
ex4 = gr.Button("ES→Kōmín (compressed): “Por favor decodifica esto luego con el sidecar.”")
|
| 850 |
-
|
| 851 |
-
ex1.click(lambda: ("Hello, how are you?", "English", "Minimax-ASCII"), None, [uni_text, uni_src, uni_tgt])
|
| 852 |
-
ex2.click(lambda: ("Este sistema mantiene los mensajes compactos.", "Español", "Kōmín-CJK"), None, [uni_text, uni_src, uni_tgt])
|
| 853 |
-
ex3.click(lambda: ("The weather today is perfect for a walk.", "English", "Minimax-ASCII"), None, [uni_text, uni_src, uni_tgt])
|
| 854 |
-
ex4.click(lambda: ("Por favor decodifica esto luego con el sidecar.", "Español", "Kōmín-CJK"), None, [uni_text, uni_src, uni_tgt])
|
| 855 |
-
|
| 856 |
-
with gr.Tab("🛠️ Build (ES/EN → Conlang)"):
|
| 857 |
-
with gr.Row():
|
| 858 |
-
src_lang = gr.Dropdown(["Español","English"], value="English", label="Source")
|
| 859 |
-
target = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label="Conlang")
|
| 860 |
-
text_in = gr.Textbox(lines=3, label="Sentence", show_copy_button=True)
|
| 861 |
-
with gr.Row():
|
| 862 |
-
drop_articles = gr.Checkbox(value=True, label="Drop articles")
|
| 863 |
-
zero_copula = gr.Checkbox(value=False, label="Zero copula (present affirmative)")
|
| 864 |
-
max_comp_build = gr.Checkbox(value=False, label="Max Exact Compression")
|
| 865 |
-
mode_build = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
|
| 866 |
-
with gr.Row():
|
| 867 |
-
btn_build = gr.Button("🏗️ Build", variant="primary")
|
| 868 |
-
btn_build_clear = gr.Button("🧹 Clear")
|
| 869 |
-
out = gr.Textbox(lines=6, label="Output", show_copy_button=True)
|
| 870 |
-
|
| 871 |
-
btn_build.click(
|
| 872 |
-
build_sentence,
|
| 873 |
-
[text_in, src_lang, target, drop_articles, zero_copula, mode_build, max_comp_build],
|
| 874 |
-
[out]
|
| 875 |
-
)
|
| 876 |
-
btn_build_clear.click(lambda: "", None, [text_in, out])
|
| 877 |
-
|
| 878 |
-
with gr.Tab("🗝️ Decode (Conlang → ES/EN)"):
|
| 879 |
-
with gr.Row():
|
| 880 |
-
src_code = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label="Source")
|
| 881 |
-
tgt_lang = gr.Dropdown(["Español","English"], value="English", label="Target")
|
| 882 |
-
code_in = gr.Textbox(lines=3, label="Conlang text (may include `~...`)", show_copy_button=True)
|
| 883 |
-
out3 = gr.Textbox(lines=6, label="Output", show_copy_button=True)
|
| 884 |
-
|
| 885 |
-
def decode_lossless_aware(text, src, tgt):
|
| 886 |
-
orig = extract_custom_sidecar(text)
|
| 887 |
-
if orig is not None: return orig
|
| 888 |
-
orig = extract_sidecar_b85(text)
|
| 889 |
-
if orig is not None: return orig
|
| 890 |
-
return decode_simple(strip_custom_sidecar(strip_sidecar_b85(text)), src, tgt)
|
| 891 |
-
|
| 892 |
-
with gr.Row():
|
| 893 |
-
btn_decode = gr.Button("🔓 Decode", variant="primary")
|
| 894 |
-
btn_decode_clear = gr.Button("🧹 Clear")
|
| 895 |
-
|
| 896 |
-
btn_decode.click(decode_lossless_aware, [code_in, src_code, tgt_lang], [out3])
|
| 897 |
-
btn_decode_clear.click(lambda: "", None, [code_in, out3])
|
| 898 |
-
|
| 899 |
-
gr.Markdown("> **Tip:** if you see `~...`, decoding will be bit-perfect.")
|
| 900 |
-
|
| 901 |
-
with gr.Tab("🔄 Round-trip"):
|
| 902 |
-
with gr.Row():
|
| 903 |
-
rt_src = gr.Dropdown(["Español","English"], value="English", label="Source")
|
| 904 |
-
rt_tgt = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label="Conlang")
|
| 905 |
-
rt_text = gr.Textbox(lines=3, label="Sentence", show_copy_button=True)
|
| 906 |
-
rt_max_comp = gr.Checkbox(value=False, label="Max Exact Compression")
|
| 907 |
-
rt_mode = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
|
| 908 |
-
rt_out_conlang = gr.Textbox(lines=3, label="Outward (conlang)", show_copy_button=True)
|
| 909 |
-
rt_out_back = gr.Textbox(lines=3, label="Back", show_copy_button=True)
|
| 910 |
-
with gr.Row():
|
| 911 |
-
btn_rt = gr.Button("▶️ Test", variant="primary")
|
| 912 |
-
btn_rt_clear = gr.Button("🧹 Clear")
|
| 913 |
-
|
| 914 |
-
btn_rt.click(round_trip, [rt_text, rt_src, rt_tgt, rt_mode, rt_max_comp], [rt_out_conlang, rt_out_back])
|
| 915 |
-
btn_rt_clear.click(lambda: "", None, [rt_text, rt_out_conlang, rt_out_back])
|
| 916 |
-
|
| 917 |
-
gr.Markdown("---")
|
| 918 |
-
gr.Markdown("Made with ❤️ · **spaCy** (optional) · Everything runs inside this Space.")
|
| 919 |
-
return group
|
| 920 |
|
| 921 |
with gr.Blocks(title="Universal Conlang Translator", theme=gr.themes.Soft()) as demo:
|
| 922 |
-
gr.Markdown("
|
| 923 |
-
|
| 924 |
-
group_es = make_group_es()
|
| 925 |
-
group_en = make_group_en()
|
| 926 |
-
|
| 927 |
-
def switch_lang(code):
|
| 928 |
-
if code == "EN":
|
| 929 |
-
return gr.update(visible=False), gr.update(visible=True)
|
| 930 |
-
return gr.update(visible=True), gr.update(visible=False)
|
| 931 |
|
| 932 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 933 |
|
| 934 |
if __name__ == "__main__":
|
| 935 |
demo.launch()
|
|
|
|
|
|
| 1 |
+
# app.py — Universal Conlang Translator (Max Compresión Exacta)
|
| 2 |
# Archivos necesarios en la raíz:
|
| 3 |
# - lexicon_minimax.json
|
| 4 |
# - lexicon_komin.json
|
|
|
|
| 119 |
def is_oov_minimax(code: str) -> bool:
|
| 120 |
return code.startswith("~") and len(code) > 1
|
| 121 |
def is_oov_komin(code: str) -> bool:
|
| 122 |
+
return len(code) >= 2 and code.startswith("「") y code.endswith("」")
|
| 123 |
|
| 124 |
# ------------ spaCy opcional ------------
|
| 125 |
USE_SPACY = False
|
|
|
|
| 140 |
else:
|
| 141 |
return norm_en(tok.lemma_ if tok.lemma_ else tok.text)
|
| 142 |
|
| 143 |
+
# ------------ Análisis básico ------------
|
| 144 |
def detect_polarity(doc) -> bool:
|
| 145 |
return "?" in doc.text
|
| 146 |
|
|
|
|
| 185 |
objs.append(t)
|
| 186 |
elif t.dep_ in ("obl","pobj"):
|
| 187 |
obls.append(t)
|
| 188 |
+
elif t.dep_ in ("advmod","advcl") y t.pos_ == "ADV":
|
| 189 |
advs.append(t)
|
| 190 |
subs.sort(key=lambda x: x.i); objs.sort(key=lambda x: x.i)
|
| 191 |
obls.sort(key=lambda x: x.i); advs.sort(key=lambda x: x.i)
|
|
|
|
| 263 |
O = realize_np(objs) + realize_np(obls)
|
| 264 |
ADV=[code_es(lemma_of(a, src_lang), "Minimax-ASCII") if src_lang=="Español" else code_en(lemma_of(a, src_lang), "Minimax-ASCII") for a in advs] if USE_SPACY else []
|
| 265 |
|
| 266 |
+
if zero_copula y not semi_lossless y vlem in ("ser","estar","be") y tense=="Pres" y not is_neg y not is_q:
|
| 267 |
parts = S + O + ADV
|
| 268 |
else:
|
| 269 |
parts = [vcode] + S + O + ADV
|
|
|
|
| 298 |
|
| 299 |
v_form = vcode + TAM + (NEG_M if is_neg else "")
|
| 300 |
|
| 301 |
+
if zero_copula y not semi_lossless y vlem in ("ser","estar","be") y tense=="Pres" y not is_neg y not is_q:
|
| 302 |
parts = S + O + ADV
|
| 303 |
else:
|
| 304 |
parts = S + O + ADV + [v_form]
|
|
|
|
| 306 |
if is_q: out += " " + Q_FIN
|
| 307 |
return out
|
| 308 |
|
| 309 |
+
# ------------ Sidecars (compresión exacta) ------------
|
| 310 |
SIDECAR_B85_RE = re.compile(r"\s?§\((?P<b85>[A-Za-z0-9!#$%&()*+\-;<=>?@^_`{|}~]+)\)$")
|
| 311 |
|
| 312 |
def b85_enc_raw(s: str) -> str:
|
|
|
|
| 360 |
def repl_en(m):
|
| 361 |
key = norm_en(m.group(0))
|
| 362 |
table = EN2MINI if target=="Minimax-ASCII" else EN2KOMI
|
| 363 |
+
if table y key in table:
|
| 364 |
return table[key]
|
| 365 |
return enc_oov_minimax(m.group(0)) if target=="Minimax-ASCII" else enc_oov_komin(m.group(0))
|
| 366 |
repl = repl_es if src_lang=="Español" else repl_en
|
|
|
|
| 466 |
v_conj = ("no " if tgt_lang == "Español" else "not ") + v_conj
|
| 467 |
out_parts.append(v_conj)
|
| 468 |
continue
|
| 469 |
+
# Restante
|
| 470 |
out_parts.append(pluralize(lem, tgt_lang) if pl_flags[idx] else lem)
|
| 471 |
|
| 472 |
out_text = " ".join(out_parts)
|
|
|
|
| 504 |
if lemma == "estar":
|
| 505 |
tab = {
|
| 506 |
"Pres":{"1s":"estoy","2s":"estás","3s":"está","1p":"estamos","2p":"estáis","3p":"están"},
|
| 507 |
+
"Past":{"1s":"estuve","2s":"estuviste","3s":"estuvo","1p":"estuvimos","2p":"estuvisteis","3p":"estuvieron"},
|
| 508 |
"Fut":{"1s":"estaré","2s":"estarás","3s":"estará","1p":"estaremos","2p":"estaréis","3p":"estarán"},
|
| 509 |
}; return tab[tense].get(person, tab[tense]["3s"])
|
| 510 |
if lemma == "ir":
|
|
|
|
| 531 |
return "goes" if (tense=="Pres" and person=="3s") else "go"
|
| 532 |
if lemma == "do":
|
| 533 |
if tense == "Past": return "did"
|
| 534 |
+
return "does" if (tense=="Pres" y person=="3s") else "do"
|
| 535 |
|
| 536 |
if tense == "Pres":
|
| 537 |
if person == "3s":
|
| 538 |
+
if lemma.endswith("y") y (len(lemma)<2 or lemma[-2] not in "aeiou"):
|
| 539 |
return lemma[:-1] + "ies"
|
| 540 |
if lemma.endswith(("s","sh","ch","x","z","o")):
|
| 541 |
return lemma + "es"
|
|
|
|
| 543 |
return lemma
|
| 544 |
elif tense == "Past":
|
| 545 |
if lemma.endswith("e"): return lemma + "d"
|
| 546 |
+
if lemma.endswith("y") y (len(lemma)<2 or lemma[-2] not in "aeiou"): return lemma[:-1] + "ied"
|
| 547 |
return lemma + "ed"
|
| 548 |
else:
|
| 549 |
return lemma
|
|
|
|
| 562 |
drop_articles: bool, zero_copula: bool, mode: str, max_comp_exact: bool = False) -> str:
|
| 563 |
if not text.strip(): return ""
|
| 564 |
semi = True # siempre semi-lossless
|
| 565 |
+
core = _build_with_spacy(text, src_lang, target, drop_articles, zero_copula y not semi, semi_lossless=semi) if USE_SPACY else encode_simple(text, src_lang, target)
|
| 566 |
if max_comp_exact:
|
| 567 |
return custom_sidecar_enc(core, text)
|
| 568 |
return core
|
|
|
|
| 578 |
orig = extract_custom_sidecar(text)
|
| 579 |
if orig is not None: return orig
|
| 580 |
orig = extract_sidecar_b85(text)
|
| 581 |
+
if orig y not None: return orig
|
| 582 |
return decode_simple(strip_custom_sidecar(strip_sidecar_b85(text)), src, tgt)
|
| 583 |
if src in ("Español","English") and tgt in ("Español","English"):
|
| 584 |
return translate_natural(text, src, tgt)
|
|
|
|
| 631 |
back = universal_translate(conlang, tgt, src, True, False, mode, max_comp_exact)
|
| 632 |
return conlang, back
|
| 633 |
|
| 634 |
+
# ====================== UI simple (pestañas + 3 casillas) ======================
|
|
|
|
|
|
|
| 635 |
|
| 636 |
+
EXPLAIN_TOP = """
|
| 637 |
+
## ¿Qué hace esta app? (versión fácil)
|
| 638 |
+
Convierte frases entre **Español / Inglés** y dos lenguajes inventados (conlangs):
|
| 639 |
+
- **Minimax-ASCII**: versión compacta con letras normales (ASCII).
|
| 640 |
+
- **Kōmín-CJK**: versión compacta con símbolos al estilo asiático.
|
| 641 |
|
| 642 |
+
### ¿Para qué sirve?
|
| 643 |
+
- Para **acortar** mensajes manteniendo el sentido.
|
| 644 |
+
- Para **codificar** y luego **recuperar** el texto original si quieres.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 645 |
|
| 646 |
+
---
|
|
|
|
| 647 |
|
| 648 |
+
## Los 4 botones (pestañas)
|
| 649 |
+
1) **Traducir** — Cambia de **cualquier** sistema a **cualquier** otro (ES, EN, Minimax o Kōmín).
|
| 650 |
+
2) **Construir (ES/EN → Conlang)** — Toma tu frase natural y crea su versión **compacta** (Minimax/Kōmín) con opciones.
|
| 651 |
+
3) **Decodificar (Conlang → ES/EN)** — Pega Minimax/Kōmín y te devuelve Español o Inglés. Si trae `~...`, recupera el **original exacto**.
|
| 652 |
+
4) **Prueba ida→vuelta** — Hace “ir” al conlang y “volver” a tu idioma para comprobar el resultado.
|
|
|
|
| 653 |
|
| 654 |
+
---
|
| 655 |
+
|
| 656 |
+
## Las 3 casillas (opciones simples)
|
| 657 |
+
- **Omitir artículos**: quita “el/la/los/las” o “a/an/the” → suele ahorrar ~10–15%.
|
| 658 |
+
- **Cópula cero (presente afirm.)**: elimina “ser/estar/be” cuando suena natural → +~5–10%.
|
| 659 |
+
- **Máx. Compresión Exacta**: añade `~...` con tu texto **comprimido**. Al decodificar, si está, vuelves al original **tal cual**.
|
| 660 |
+
Nota: en frases **muy cortas** puede no reducir tamaño por la cabecera `~...`.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 661 |
|
| 662 |
+
---
|
| 663 |
+
|
| 664 |
+
## Consejos rápidos
|
| 665 |
+
- Si no sabes qué elegir, usa **Traducir**.
|
| 666 |
+
- Para **compactar**, usa **Construir** y marca lo que quieras.
|
| 667 |
+
- Si te pasan algo en conlang, usa **Decodificar**.
|
| 668 |
+
- ¿Dudas? Prueba **ida→vuelta**.
|
| 669 |
"""
|
| 670 |
|
| 671 |
+
ALL_LANGS = ["Español","English","Minimax-ASCII","Kōmín-CJK"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 672 |
|
| 673 |
with gr.Blocks(title="Universal Conlang Translator", theme=gr.themes.Soft()) as demo:
|
| 674 |
+
gr.Markdown("# 🌐 Universal Conlang Translator")
|
| 675 |
+
gr.Markdown(EXPLAIN_TOP)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 676 |
|
| 677 |
+
# --- Traducir (universal) ---
|
| 678 |
+
with gr.Tab("Traducir / Translate"):
|
| 679 |
+
with gr.Row():
|
| 680 |
+
uni_src = gr.Dropdown(ALL_LANGS, value="Español", label="Fuente / Source")
|
| 681 |
+
uni_tgt = gr.Dropdown(ALL_LANGS, value="Minimax-ASCII", label="Destino / Target")
|
| 682 |
+
uni_text = gr.Textbox(lines=3, label="Texto / Text", value="", show_copy_button=True)
|
| 683 |
+
with gr.Row():
|
| 684 |
+
uni_drop = gr.Checkbox(value=True, label="Omitir artículos / Drop articles (ES/EN→conlang)")
|
| 685 |
+
uni_zero = gr.Checkbox(value=False, label="Cópula cero / Zero copula (present aff.)")
|
| 686 |
+
uni_max_comp = gr.Checkbox(value=False, label="Máx. Compresión Exacta / Max Exact Compression")
|
| 687 |
+
uni_mode = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
|
| 688 |
+
uni_out = gr.Textbox(lines=6, label="Traducción / Translation", show_copy_button=True)
|
| 689 |
+
gr.Button("Traducir / Translate").click(
|
| 690 |
+
universal_translate,
|
| 691 |
+
[uni_text, uni_src, uni_tgt, uni_drop, uni_zero, uni_mode, uni_max_comp],
|
| 692 |
+
[uni_out]
|
| 693 |
+
)
|
| 694 |
+
|
| 695 |
+
# --- Construir (ES/EN → Conlang) ---
|
| 696 |
+
with gr.Tab("Construir (ES/EN → Conlang) / Build"):
|
| 697 |
+
with gr.Row():
|
| 698 |
+
src_lang = gr.Dropdown(["Español","English"], value="Español", label="Fuente / Source")
|
| 699 |
+
target = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label="Conlang")
|
| 700 |
+
text_in = gr.Textbox(lines=3, label="Frase / Sentence", value="", show_copy_button=True)
|
| 701 |
+
with gr.Row():
|
| 702 |
+
drop_articles = gr.Checkbox(value=True, label="Omitir artículos / Drop articles")
|
| 703 |
+
zero_copula = gr.Checkbox(value=False, label="Cópula cero / Zero copula (present aff.)")
|
| 704 |
+
max_comp_build = gr.Checkbox(value=False, label="Máx. Compresión Exacta / Max Exact Compression")
|
| 705 |
+
mode_build = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
|
| 706 |
+
out = gr.Textbox(lines=6, label="Salida / Output", show_copy_button=True)
|
| 707 |
+
gr.Button("Construir / Build").click(
|
| 708 |
+
build_sentence,
|
| 709 |
+
[text_in, src_lang, target, drop_articles, zero_copula, mode_build, max_comp_build],
|
| 710 |
+
[out]
|
| 711 |
+
)
|
| 712 |
+
|
| 713 |
+
# --- Decodificar (Conlang → ES/EN) ---
|
| 714 |
+
with gr.Tab("Decodificar (Conlang → ES/EN) / Decode"):
|
| 715 |
+
with gr.Row():
|
| 716 |
+
src_code = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label="Fuente / Source")
|
| 717 |
+
tgt_lang = gr.Dropdown(["Español","English"], value="Español", label="Destino / Target")
|
| 718 |
+
code_in = gr.Textbox(lines=3, label="Texto en conlang (puede incluir `~...`) / Conlang text (may include `~...`)", show_copy_button=True)
|
| 719 |
+
out3 = gr.Textbox(lines=6, label="Salida / Output", show_copy_button=True)
|
| 720 |
+
|
| 721 |
+
def decode_lossless_aware(text, src, tgt):
|
| 722 |
+
orig = extract_custom_sidecar(text)
|
| 723 |
+
if orig is not None: return orig
|
| 724 |
+
orig = extract_sidecar_b85(text)
|
| 725 |
+
if orig is not None: return orig
|
| 726 |
+
return decode_simple(strip_custom_sidecar(strip_sidecar_b85(text)), src, tgt)
|
| 727 |
+
|
| 728 |
+
gr.Button("Decodificar / Decode").click(
|
| 729 |
+
decode_lossless_aware, [code_in, src_code, tgt_lang], [out3]
|
| 730 |
+
)
|
| 731 |
+
|
| 732 |
+
# --- Round-trip ---
|
| 733 |
+
with gr.Tab("Prueba ida→vuelta / Round-trip"):
|
| 734 |
+
with gr.Row():
|
| 735 |
+
rt_src = gr.Dropdown(["Español","English"], value="Español", label="Fuente / Source")
|
| 736 |
+
rt_tgt = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label="Conlang")
|
| 737 |
+
rt_text = gr.Textbox(lines=3, label="Frase / Sentence", value="", show_copy_button=True)
|
| 738 |
+
rt_max_comp = gr.Checkbox(value=False, label="Máx. Compresión Exacta / Max Exact Compression")
|
| 739 |
+
rt_mode = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
|
| 740 |
+
rt_out_conlang = gr.Textbox(lines=3, label="Conlang (ida) / Outward", show_copy_button=True)
|
| 741 |
+
rt_out_back = gr.Textbox(lines=3, label="Vuelta / Back", show_copy_button=True)
|
| 742 |
+
gr.Button("Probar / Test").click(
|
| 743 |
+
round_trip,
|
| 744 |
+
[rt_text, rt_src, rt_tgt, rt_mode, rt_max_comp],
|
| 745 |
+
[rt_out_conlang, rt_out_back]
|
| 746 |
+
)
|
| 747 |
|
| 748 |
if __name__ == "__main__":
|
| 749 |
demo.launch()
|
| 750 |
+
|