Update app.py
Browse files
app.py
CHANGED
|
@@ -11,7 +11,7 @@
|
|
| 11 |
# en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl
|
| 12 |
|
| 13 |
import os, re, json, base64, zlib
|
| 14 |
-
from typing import Dict, Optional, List, Any
|
| 15 |
import gradio as gr
|
| 16 |
|
| 17 |
# ------------ Archivos esperados ------------
|
|
@@ -370,7 +370,7 @@ def _es_conj(lemma, tense, person):
|
|
| 370 |
return tab[tense].get(person, tab[tense]["3s"])
|
| 371 |
if lemma=="estar":
|
| 372 |
tab={"Pres":{"1s":"estoy","2s":"estás","3s":"está","1p":"estamos","2p":"estáis","3p":"están"},
|
| 373 |
-
"Past":{"1s":"
|
| 374 |
"Fut":{"1s":"estaré","2s":"estarás","3s":"estará","1p":"estaremos","2p":"estaréis","3p":"estarán"}}
|
| 375 |
return tab[tense].get(person, tab[tense]["3s"])
|
| 376 |
if lemma=="ir":
|
|
@@ -406,6 +406,70 @@ def _en_conj(lemma, tense, person):
|
|
| 406 |
return lemma+"ed"
|
| 407 |
return lemma
|
| 408 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 409 |
# =====================================================================================
|
| 410 |
# ========================= UI bilingüe y explicaciones claras ========================
|
| 411 |
# =====================================================================================
|
|
@@ -466,7 +530,7 @@ EXPLAIN_TAB_ROUNDTRIP_EN = "Runs **(ES/EN→Conlang)→(Conlang→ES/EN)** to ve
|
|
| 466 |
EXPLAIN_CHECKBOX_EN = "Drop articles ~10–15%, Zero copula ~5–10% extra, Remove pronouns variable, Max Exact 40–60% for >100 chars."
|
| 467 |
LEXICON_FRIENDLY_EN = "We use **WordNet (OMW)**, pair ES words with EN, clean & sort by frequency, assign short **codes** (Minimax/Kōmín), and save three JSONs so the app can encode/decode compactly."
|
| 468 |
|
| 469 |
-
# >>>
|
| 470 |
EXPLAIN_MODES_ES = """
|
| 471 |
**¿Qué hace cada botón / modo?** *(Marca uno en “🧭 Modo de uso”)*
|
| 472 |
|
|
@@ -632,18 +696,16 @@ def make_panel_roundtrip(lang="ES"):
|
|
| 632 |
c.change(run, [text, src, tgt, mode_hidden, exact], [out1, out2])
|
| 633 |
return g
|
| 634 |
|
| 635 |
-
# ---------- Página (ES/EN),
|
| 636 |
with gr.Blocks(title="Universal Conlang Translator", theme=gr.themes.Soft()) as demo:
|
| 637 |
gr.Markdown("## 🌍 Idioma / Language")
|
| 638 |
lang = gr.Radio(["ES","EN"], value="ES", label="Selecciona / Select")
|
| 639 |
|
| 640 |
-
#
|
| 641 |
acc_modes_es = gr.Accordion("📖 ¿Qué hace cada botón / modo? (ES)", open=False, visible=True)
|
| 642 |
-
with acc_modes_es:
|
| 643 |
-
gr.Markdown(EXPLAIN_MODES_ES)
|
| 644 |
acc_modes_en = gr.Accordion("📖 What does each button / mode do? (EN)", open=False, visible=False)
|
| 645 |
-
with acc_modes_en:
|
| 646 |
-
gr.Markdown(EXPLAIN_MODES_EN)
|
| 647 |
|
| 648 |
# Acordeones intro + léxico (mismo nivel)
|
| 649 |
acc_intro_es = gr.Accordion("☑️ Opciones y compactación — guía rápida (ES)", open=False, visible=True)
|
|
@@ -665,14 +727,20 @@ with gr.Blocks(title="Universal Conlang Translator", theme=gr.themes.Soft()) as
|
|
| 665 |
table_en = gr.Dataframe(headers=["lemma_es","lemma_en","minimax","komin"], row_count=1, interactive=False)
|
| 666 |
gr.Button("Refresh").click(lambda n: master_preview(int(n)), [n_rows_en], [table_en])
|
| 667 |
|
| 668 |
-
#
|
| 669 |
gr.Markdown("### 🧭 Modo de uso (marca **uno**)")
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
|
| 673 |
-
|
| 674 |
-
|
| 675 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 676 |
|
| 677 |
# Paneles por modo y por idioma
|
| 678 |
gr.Markdown("### 🧪 Área de trabajo")
|
|
@@ -683,9 +751,19 @@ with gr.Blocks(title="Universal Conlang Translator", theme=gr.themes.Soft()) as
|
|
| 683 |
|
| 684 |
def _vis(yes): return gr.update(visible=bool(yes))
|
| 685 |
|
| 686 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 687 |
def switch_everything(lang_code, tr, bu, de, rt):
|
| 688 |
-
# forzar exclusividad (si varias marcadas, prioriza la primera en orden tr>bu>de>rt)
|
| 689 |
tr2, bu2, de2, rt2 = False, False, False, False
|
| 690 |
if tr or (not bu and not de and not rt): tr2 = True
|
| 691 |
elif bu: bu2 = True
|
|
@@ -693,49 +771,49 @@ with gr.Blocks(title="Universal Conlang Translator", theme=gr.themes.Soft()) as
|
|
| 693 |
else: rt2 = True
|
| 694 |
|
| 695 |
is_en = (lang_code == "EN")
|
| 696 |
-
# Accordions ES/EN
|
| 697 |
vis_es = not is_en; vis_en = is_en
|
| 698 |
updates = [
|
| 699 |
-
_vis(vis_es), _vis(vis_en), #
|
| 700 |
-
_vis(vis_es), _vis(vis_en), # intro
|
| 701 |
-
_vis(vis_es), _vis(vis_en), #
|
| 702 |
]
|
| 703 |
-
# Panels ES
|
| 704 |
updates += [
|
| 705 |
-
_vis(vis_es and tr2), _vis(vis_es and bu2), _vis(vis_es and de2), _vis(vis_es and rt2)
|
|
|
|
| 706 |
]
|
| 707 |
-
# Panels EN
|
| 708 |
-
updates += [
|
| 709 |
-
_vis(vis_en and tr2), _vis(vis_en and bu2), _vis(vis_en and de2), _vis(vis_en and rt2)
|
| 710 |
-
]
|
| 711 |
-
# Checkbox state (exclusivo)
|
| 712 |
updates += [tr2, bu2, de2, rt2]
|
| 713 |
return updates
|
| 714 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 715 |
lang.change(
|
| 716 |
-
|
| 717 |
-
[lang, cb_tr, cb_bu, cb_de, cb_rt],
|
| 718 |
[
|
| 719 |
-
acc_modes_es, acc_modes_en,
|
| 720 |
acc_intro_es, acc_intro_en, acc_lex_es, acc_lex_en,
|
| 721 |
panel_tr_es, panel_bu_es, panel_de_es, panel_rt_es,
|
| 722 |
panel_tr_en, panel_bu_en, panel_de_en, panel_rt_en,
|
| 723 |
-
cb_tr, cb_bu, cb_de, cb_rt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 724 |
],
|
| 725 |
)
|
| 726 |
-
|
| 727 |
-
for box in (cb_tr, cb_bu, cb_de, cb_rt):
|
| 728 |
-
box.change(
|
| 729 |
-
switch_everything,
|
| 730 |
-
[lang, cb_tr, cb_bu, cb_de, cb_rt],
|
| 731 |
-
[
|
| 732 |
-
acc_modes_es, acc_modes_en, # >>> NUEVO <<<
|
| 733 |
-
acc_intro_es, acc_intro_en, acc_lex_es, acc_lex_en,
|
| 734 |
-
panel_tr_es, panel_bu_es, panel_de_es, panel_rt_es,
|
| 735 |
-
panel_tr_en, panel_bu_en, panel_de_en, panel_rt_en,
|
| 736 |
-
cb_tr, cb_bu, cb_de, cb_rt
|
| 737 |
-
],
|
| 738 |
-
)
|
| 739 |
|
| 740 |
if __name__ == "__main__":
|
| 741 |
demo.launch()
|
|
|
|
| 11 |
# en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl
|
| 12 |
|
| 13 |
import os, re, json, base64, zlib
|
| 14 |
+
from typing import Dict, Optional, List, Any
|
| 15 |
import gradio as gr
|
| 16 |
|
| 17 |
# ------------ Archivos esperados ------------
|
|
|
|
| 370 |
return tab[tense].get(person, tab[tense]["3s"])
|
| 371 |
if lemma=="estar":
|
| 372 |
tab={"Pres":{"1s":"estoy","2s":"estás","3s":"está","1p":"estamos","2p":"estáis","3p":"están"},
|
| 373 |
+
"Past":{"1s":"estuviste","2s":"estuviste","3s":"estuvo","1p":"estuvimos","2p":"estuvisteis","3p":"estuvieron"},
|
| 374 |
"Fut":{"1s":"estaré","2s":"estarás","3s":"estará","1p":"estaremos","2p":"estaréis","3p":"estarán"}}
|
| 375 |
return tab[tense].get(person, tab[tense]["3s"])
|
| 376 |
if lemma=="ir":
|
|
|
|
| 406 |
return lemma+"ed"
|
| 407 |
return lemma
|
| 408 |
|
| 409 |
+
# ================= Helper de construcción/translate =================
|
| 410 |
+
def _build_with_spacy(text: str, src_lang: str, target: str,
|
| 411 |
+
drop_articles: bool, zero_copula: bool,
|
| 412 |
+
semi_lossless: bool, remove_pronouns: bool) -> str:
|
| 413 |
+
nlp = nlp_es if src_lang=="Español" else nlp_en
|
| 414 |
+
doc = nlp(text)
|
| 415 |
+
if target == "Minimax-ASCII":
|
| 416 |
+
return realize_minimax(doc, src_lang, drop_articles, zero_copula, semi_lossless, remove_pronouns=remove_pronouns)
|
| 417 |
+
else:
|
| 418 |
+
return realize_komin(doc, src_lang, drop_articles, zero_copula, semi_lossless, remove_pronouns=remove_pronouns)
|
| 419 |
+
|
| 420 |
+
def build_sentence(text: str, src_lang: str, target: str,
|
| 421 |
+
drop_articles: bool, zero_copula: bool,
|
| 422 |
+
mode: str, max_comp_exact: bool = False, remove_pronouns: bool = False) -> str:
|
| 423 |
+
if not text.strip(): return ""
|
| 424 |
+
semi = True # siempre semi-lossless en construcción
|
| 425 |
+
core = _build_with_spacy(text, src_lang, target, drop_articles, zero_copula and not semi, semi, remove_pronouns) if USE_SPACY else encode_simple(text, src_lang, target)
|
| 426 |
+
if max_comp_exact:
|
| 427 |
+
return custom_sidecar_enc(core, text)
|
| 428 |
+
return core
|
| 429 |
+
|
| 430 |
+
def universal_translate(text: str, src: str, tgt: str,
|
| 431 |
+
drop_articles: bool, zero_copula: bool,
|
| 432 |
+
mode: str, max_comp_exact: bool = False, remove_pronouns: bool = False) -> str:
|
| 433 |
+
if not text.strip(): return ""
|
| 434 |
+
if src == tgt: return text
|
| 435 |
+
|
| 436 |
+
# Natural → Conlang
|
| 437 |
+
if src in ("Español","English") and tgt in ("Minimax-ASCII","Kōmín-CJK"):
|
| 438 |
+
return build_sentence(text, src, tgt, drop_articles, zero_copula, mode, max_comp_exact, remove_pronouns)
|
| 439 |
+
|
| 440 |
+
# Conlang → Natural (considera sidecars)
|
| 441 |
+
if src in ("Minimax-ASCII","Kōmín-CJK") and tgt in ("Español","English"):
|
| 442 |
+
orig = extract_custom_sidecar(text)
|
| 443 |
+
if orig is not None: return orig
|
| 444 |
+
orig = extract_sidecar_b85(text)
|
| 445 |
+
if orig is not None: return orig
|
| 446 |
+
return decode_simple(strip_custom_sidecar(strip_sidecar_b85(text)), src, tgt)
|
| 447 |
+
|
| 448 |
+
# Natural ↔ Natural (lemas muy simples)
|
| 449 |
+
if src in ("Español","English") and tgt in ("Español","English"):
|
| 450 |
+
return text
|
| 451 |
+
|
| 452 |
+
# Conlang ↔ Conlang (simple)
|
| 453 |
+
if src in ("Minimax-ASCII","Kōmín-CJK") and tgt in ("Minimax-ASCII","Kōmín-CJK"):
|
| 454 |
+
core = strip_custom_sidecar(text)
|
| 455 |
+
es_lemmas = decode_simple(core, src, "Español")
|
| 456 |
+
words = re.findall(r"\w+|[^\w\s]+", es_lemmas)
|
| 457 |
+
out=[]
|
| 458 |
+
for w in words:
|
| 459 |
+
if re.fullmatch(r"\w+", w):
|
| 460 |
+
code = ES2MINI.get(norm_es(w)) if tgt=="Minimax-ASCII" else ES2KOMI.get(norm_es(w))
|
| 461 |
+
if not code:
|
| 462 |
+
code = enc_oov_minimax(w) if tgt=="Minimax-ASCII" else enc_oov_komin(w)
|
| 463 |
+
out.append(code)
|
| 464 |
+
else:
|
| 465 |
+
out.append(w)
|
| 466 |
+
out_text = " ".join(out)
|
| 467 |
+
if extract_custom_sidecar(text) is not None:
|
| 468 |
+
return custom_sidecar_enc(out_text, extract_custom_sidecar(text) or "")
|
| 469 |
+
return out_text
|
| 470 |
+
|
| 471 |
+
return "[No soportado]"
|
| 472 |
+
|
| 473 |
# =====================================================================================
|
| 474 |
# ========================= UI bilingüe y explicaciones claras ========================
|
| 475 |
# =====================================================================================
|
|
|
|
| 530 |
EXPLAIN_CHECKBOX_EN = "Drop articles ~10–15%, Zero copula ~5–10% extra, Remove pronouns variable, Max Exact 40–60% for >100 chars."
|
| 531 |
LEXICON_FRIENDLY_EN = "We use **WordNet (OMW)**, pair ES words with EN, clean & sort by frequency, assign short **codes** (Minimax/Kōmín), and save three JSONs so the app can encode/decode compactly."
|
| 532 |
|
| 533 |
+
# >>> Explicación directa de los “botones / modos”
|
| 534 |
EXPLAIN_MODES_ES = """
|
| 535 |
**¿Qué hace cada botón / modo?** *(Marca uno en “🧭 Modo de uso”)*
|
| 536 |
|
|
|
|
| 696 |
c.change(run, [text, src, tgt, mode_hidden, exact], [out1, out2])
|
| 697 |
return g
|
| 698 |
|
| 699 |
+
# ---------- Página (ES/EN), modos como CheckboxGroup (selección única) ----------
|
| 700 |
with gr.Blocks(title="Universal Conlang Translator", theme=gr.themes.Soft()) as demo:
|
| 701 |
gr.Markdown("## 🌍 Idioma / Language")
|
| 702 |
lang = gr.Radio(["ES","EN"], value="ES", label="Selecciona / Select")
|
| 703 |
|
| 704 |
+
# Acordeones “¿qué hace cada botón?” (ES/EN)
|
| 705 |
acc_modes_es = gr.Accordion("📖 ¿Qué hace cada botón / modo? (ES)", open=False, visible=True)
|
| 706 |
+
with acc_modes_es: gr.Markdown(EXPLAIN_MODES_ES)
|
|
|
|
| 707 |
acc_modes_en = gr.Accordion("📖 What does each button / mode do? (EN)", open=False, visible=False)
|
| 708 |
+
with acc_modes_en: gr.Markdown(EXPLAIN_MODES_EN)
|
|
|
|
| 709 |
|
| 710 |
# Acordeones intro + léxico (mismo nivel)
|
| 711 |
acc_intro_es = gr.Accordion("☑️ Opciones y compactación — guía rápida (ES)", open=False, visible=True)
|
|
|
|
| 727 |
table_en = gr.Dataframe(headers=["lemma_es","lemma_en","minimax","komin"], row_count=1, interactive=False)
|
| 728 |
gr.Button("Refresh").click(lambda n: master_preview(int(n)), [n_rows_en], [table_en])
|
| 729 |
|
| 730 |
+
# Modo de uso (CheckboxGroup selección única)
|
| 731 |
gr.Markdown("### 🧭 Modo de uso (marca **uno**)")
|
| 732 |
+
cb_modes = gr.CheckboxGroup(
|
| 733 |
+
choices=[
|
| 734 |
+
"🔁 Traducir / Translate",
|
| 735 |
+
"🛠️ Construir (ES/EN → Conlang) / Build",
|
| 736 |
+
"🗝️ Decodificar (Conlang → ES/EN) / Decode",
|
| 737 |
+
"🔄 Prueba ida→vuelta / Round-trip",
|
| 738 |
+
],
|
| 739 |
+
value=["🔁 Traducir / Translate"],
|
| 740 |
+
label=None,
|
| 741 |
+
)
|
| 742 |
+
# Estados para mantener la API existente (tr, bu, de, rt)
|
| 743 |
+
cb_tr = gr.State(True); cb_bu = gr.State(False); cb_de = gr.State(False); cb_rt = gr.State(False)
|
| 744 |
|
| 745 |
# Paneles por modo y por idioma
|
| 746 |
gr.Markdown("### 🧪 Área de trabajo")
|
|
|
|
| 751 |
|
| 752 |
def _vis(yes): return gr.update(visible=bool(yes))
|
| 753 |
|
| 754 |
+
def _modes_to_flags(selected_list):
|
| 755 |
+
sel = selected_list or []
|
| 756 |
+
order = [
|
| 757 |
+
"🔁 Traducir / Translate",
|
| 758 |
+
"🛠️ Construir (ES/EN → Conlang) / Build",
|
| 759 |
+
"🗝️ Decodificar (Conlang → ES/EN) / Decode",
|
| 760 |
+
"🔄 Prueba ida→vuelta / Round-trip",
|
| 761 |
+
]
|
| 762 |
+
chosen = next((o for o in order if o in sel), order[0])
|
| 763 |
+
return [chosen == o for o in order], [chosen]
|
| 764 |
+
|
| 765 |
+
# No tocamos tu lógica de visibilidad: sólo la usamos
|
| 766 |
def switch_everything(lang_code, tr, bu, de, rt):
|
|
|
|
| 767 |
tr2, bu2, de2, rt2 = False, False, False, False
|
| 768 |
if tr or (not bu and not de and not rt): tr2 = True
|
| 769 |
elif bu: bu2 = True
|
|
|
|
| 771 |
else: rt2 = True
|
| 772 |
|
| 773 |
is_en = (lang_code == "EN")
|
|
|
|
| 774 |
vis_es = not is_en; vis_en = is_en
|
| 775 |
updates = [
|
| 776 |
+
_vis(vis_es), _vis(vis_en), # modos ES/EN
|
| 777 |
+
_vis(vis_es), _vis(vis_en), # intro
|
| 778 |
+
_vis(vis_es), _vis(vis_en), # léxico
|
| 779 |
]
|
|
|
|
| 780 |
updates += [
|
| 781 |
+
_vis(vis_es and tr2), _vis(vis_es and bu2), _vis(vis_es and de2), _vis(vis_es and rt2),
|
| 782 |
+
_vis(vis_en and tr2), _vis(vis_en and bu2), _vis(vis_en and de2), _vis(vis_en and rt2),
|
| 783 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 784 |
updates += [tr2, bu2, de2, rt2]
|
| 785 |
return updates
|
| 786 |
|
| 787 |
+
def _lang_or_modes(lang_code, selected_list, tr, bu, de, rt):
|
| 788 |
+
flags, normalized = _modes_to_flags(selected_list)
|
| 789 |
+
updates = switch_everything(lang_code, *flags)
|
| 790 |
+
return updates + flags + [normalized]
|
| 791 |
+
|
| 792 |
+
# Reacciones
|
| 793 |
lang.change(
|
| 794 |
+
_lang_or_modes,
|
| 795 |
+
[lang, cb_modes, cb_tr, cb_bu, cb_de, cb_rt],
|
| 796 |
[
|
| 797 |
+
acc_modes_es, acc_modes_en,
|
| 798 |
acc_intro_es, acc_intro_en, acc_lex_es, acc_lex_en,
|
| 799 |
panel_tr_es, panel_bu_es, panel_de_es, panel_rt_es,
|
| 800 |
panel_tr_en, panel_bu_en, panel_de_en, panel_rt_en,
|
| 801 |
+
cb_tr, cb_bu, cb_de, cb_rt,
|
| 802 |
+
cb_modes,
|
| 803 |
+
],
|
| 804 |
+
)
|
| 805 |
+
cb_modes.change(
|
| 806 |
+
_lang_or_modes,
|
| 807 |
+
[lang, cb_modes, cb_tr, cb_bu, cb_de, cb_rt],
|
| 808 |
+
[
|
| 809 |
+
acc_modes_es, acc_modes_en,
|
| 810 |
+
acc_intro_es, acc_intro_en, acc_lex_es, acc_lex_en,
|
| 811 |
+
panel_tr_es, panel_bu_es, panel_de_es, panel_rt_es,
|
| 812 |
+
panel_tr_en, panel_bu_en, panel_de_en, panel_rt_en,
|
| 813 |
+
cb_tr, cb_bu, cb_de, cb_rt,
|
| 814 |
+
cb_modes,
|
| 815 |
],
|
| 816 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 817 |
|
| 818 |
if __name__ == "__main__":
|
| 819 |
demo.launch()
|