LoloSemper commited on
Commit
a805b35
·
verified ·
1 Parent(s): b0819d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +124 -46
app.py CHANGED
@@ -11,7 +11,7 @@
11
  # en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl
12
 
13
  import os, re, json, base64, zlib
14
- from typing import Dict, Optional, List, Any # <- FIX: List/Any importados
15
  import gradio as gr
16
 
17
  # ------------ Archivos esperados ------------
@@ -370,7 +370,7 @@ def _es_conj(lemma, tense, person):
370
  return tab[tense].get(person, tab[tense]["3s"])
371
  if lemma=="estar":
372
  tab={"Pres":{"1s":"estoy","2s":"estás","3s":"está","1p":"estamos","2p":"estáis","3p":"están"},
373
- "Past":{"1s":"estuve","2s":"estuviste","3s":"estuvo","1p":"estuvimos","2p":"estuvisteis","3p":"estuvieron"},
374
  "Fut":{"1s":"estaré","2s":"estarás","3s":"estará","1p":"estaremos","2p":"estaréis","3p":"estarán"}}
375
  return tab[tense].get(person, tab[tense]["3s"])
376
  if lemma=="ir":
@@ -406,6 +406,70 @@ def _en_conj(lemma, tense, person):
406
  return lemma+"ed"
407
  return lemma
408
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
409
  # =====================================================================================
410
  # ========================= UI bilingüe y explicaciones claras ========================
411
  # =====================================================================================
@@ -466,7 +530,7 @@ EXPLAIN_TAB_ROUNDTRIP_EN = "Runs **(ES/EN→Conlang)→(Conlang→ES/EN)** to ve
466
  EXPLAIN_CHECKBOX_EN = "Drop articles ~10–15%, Zero copula ~5–10% extra, Remove pronouns variable, Max Exact 40–60% for >100 chars."
467
  LEXICON_FRIENDLY_EN = "We use **WordNet (OMW)**, pair ES words with EN, clean & sort by frequency, assign short **codes** (Minimax/Kōmín), and save three JSONs so the app can encode/decode compactly."
468
 
469
- # >>> NUEVO <<< ——— Explicación directa de los “botones / modos” (ES/EN)
470
  EXPLAIN_MODES_ES = """
471
  **¿Qué hace cada botón / modo?** *(Marca uno en “🧭 Modo de uso”)*
472
 
@@ -632,18 +696,16 @@ def make_panel_roundtrip(lang="ES"):
632
  c.change(run, [text, src, tgt, mode_hidden, exact], [out1, out2])
633
  return g
634
 
635
- # ---------- Página (ES/EN), con “modos como CHECKBOX (mutuamente excluyentes) ----------
636
  with gr.Blocks(title="Universal Conlang Translator", theme=gr.themes.Soft()) as demo:
637
  gr.Markdown("## 🌍 Idioma / Language")
638
  lang = gr.Radio(["ES","EN"], value="ES", label="Selecciona / Select")
639
 
640
- # >>> NUEVO <<< — Acordeones con explicación de los botones/modos (mismo nivel)
641
  acc_modes_es = gr.Accordion("📖 ¿Qué hace cada botón / modo? (ES)", open=False, visible=True)
642
- with acc_modes_es:
643
- gr.Markdown(EXPLAIN_MODES_ES)
644
  acc_modes_en = gr.Accordion("📖 What does each button / mode do? (EN)", open=False, visible=False)
645
- with acc_modes_en:
646
- gr.Markdown(EXPLAIN_MODES_EN)
647
 
648
  # Acordeones intro + léxico (mismo nivel)
649
  acc_intro_es = gr.Accordion("☑️ Opciones y compactación — guía rápida (ES)", open=False, visible=True)
@@ -665,14 +727,20 @@ with gr.Blocks(title="Universal Conlang Translator", theme=gr.themes.Soft()) as
665
  table_en = gr.Dataframe(headers=["lemma_es","lemma_en","minimax","komin"], row_count=1, interactive=False)
666
  gr.Button("Refresh").click(lambda n: master_preview(int(n)), [n_rows_en], [table_en])
667
 
668
- # “Modos” como CHECKBOX (mutuamente excluyentes)
669
  gr.Markdown("### 🧭 Modo de uso (marca **uno**)")
670
- row_modes = gr.Row()
671
- with row_modes:
672
- cb_tr = gr.Checkbox(True, label="🔁 Traducir / Translate")
673
- cb_bu = gr.Checkbox(False, label="🛠️ Construir (ES/EN → Conlang) / Build")
674
- cb_de = gr.Checkbox(False, label="🗝️ Decodificar (Conlang → ES/EN) / Decode")
675
- cb_rt = gr.Checkbox(False, label="🔄 Prueba ida→vuelta / Round-trip")
 
 
 
 
 
 
676
 
677
  # Paneles por modo y por idioma
678
  gr.Markdown("### 🧪 Área de trabajo")
@@ -683,9 +751,19 @@ with gr.Blocks(title="Universal Conlang Translator", theme=gr.themes.Soft()) as
683
 
684
  def _vis(yes): return gr.update(visible=bool(yes))
685
 
686
- # Enforce: sólo 1 checkbox activo + visibilidad de paneles/accordions por idioma y modo
 
 
 
 
 
 
 
 
 
 
 
687
  def switch_everything(lang_code, tr, bu, de, rt):
688
- # forzar exclusividad (si varias marcadas, prioriza la primera en orden tr>bu>de>rt)
689
  tr2, bu2, de2, rt2 = False, False, False, False
690
  if tr or (not bu and not de and not rt): tr2 = True
691
  elif bu: bu2 = True
@@ -693,49 +771,49 @@ with gr.Blocks(title="Universal Conlang Translator", theme=gr.themes.Soft()) as
693
  else: rt2 = True
694
 
695
  is_en = (lang_code == "EN")
696
- # Accordions ES/EN
697
  vis_es = not is_en; vis_en = is_en
698
  updates = [
699
- _vis(vis_es), _vis(vis_en), # >>> NUEVO <<< acc_modes_es, acc_modes_en
700
- _vis(vis_es), _vis(vis_en), # intro accordions
701
- _vis(vis_es), _vis(vis_en), # lexicon accordions
702
  ]
703
- # Panels ES
704
  updates += [
705
- _vis(vis_es and tr2), _vis(vis_es and bu2), _vis(vis_es and de2), _vis(vis_es and rt2)
 
706
  ]
707
- # Panels EN
708
- updates += [
709
- _vis(vis_en and tr2), _vis(vis_en and bu2), _vis(vis_en and de2), _vis(vis_en and rt2)
710
- ]
711
- # Checkbox state (exclusivo)
712
  updates += [tr2, bu2, de2, rt2]
713
  return updates
714
 
 
 
 
 
 
 
715
  lang.change(
716
- switch_everything,
717
- [lang, cb_tr, cb_bu, cb_de, cb_rt],
718
  [
719
- acc_modes_es, acc_modes_en, # >>> NUEVO <<<
720
  acc_intro_es, acc_intro_en, acc_lex_es, acc_lex_en,
721
  panel_tr_es, panel_bu_es, panel_de_es, panel_rt_es,
722
  panel_tr_en, panel_bu_en, panel_de_en, panel_rt_en,
723
- cb_tr, cb_bu, cb_de, cb_rt
 
 
 
 
 
 
 
 
 
 
 
 
 
724
  ],
725
  )
726
-
727
- for box in (cb_tr, cb_bu, cb_de, cb_rt):
728
- box.change(
729
- switch_everything,
730
- [lang, cb_tr, cb_bu, cb_de, cb_rt],
731
- [
732
- acc_modes_es, acc_modes_en, # >>> NUEVO <<<
733
- acc_intro_es, acc_intro_en, acc_lex_es, acc_lex_en,
734
- panel_tr_es, panel_bu_es, panel_de_es, panel_rt_es,
735
- panel_tr_en, panel_bu_en, panel_de_en, panel_rt_en,
736
- cb_tr, cb_bu, cb_de, cb_rt
737
- ],
738
- )
739
 
740
  if __name__ == "__main__":
741
  demo.launch()
 
11
  # en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl
12
 
13
  import os, re, json, base64, zlib
14
+ from typing import Dict, Optional, List, Any
15
  import gradio as gr
16
 
17
  # ------------ Archivos esperados ------------
 
370
  return tab[tense].get(person, tab[tense]["3s"])
371
  if lemma=="estar":
372
  tab={"Pres":{"1s":"estoy","2s":"estás","3s":"está","1p":"estamos","2p":"estáis","3p":"están"},
373
+ "Past":{"1s":"estuviste","2s":"estuviste","3s":"estuvo","1p":"estuvimos","2p":"estuvisteis","3p":"estuvieron"},
374
  "Fut":{"1s":"estaré","2s":"estarás","3s":"estará","1p":"estaremos","2p":"estaréis","3p":"estarán"}}
375
  return tab[tense].get(person, tab[tense]["3s"])
376
  if lemma=="ir":
 
406
  return lemma+"ed"
407
  return lemma
408
 
409
+ # ================= Helper de construcción/translate =================
410
+ def _build_with_spacy(text: str, src_lang: str, target: str,
411
+ drop_articles: bool, zero_copula: bool,
412
+ semi_lossless: bool, remove_pronouns: bool) -> str:
413
+ nlp = nlp_es if src_lang=="Español" else nlp_en
414
+ doc = nlp(text)
415
+ if target == "Minimax-ASCII":
416
+ return realize_minimax(doc, src_lang, drop_articles, zero_copula, semi_lossless, remove_pronouns=remove_pronouns)
417
+ else:
418
+ return realize_komin(doc, src_lang, drop_articles, zero_copula, semi_lossless, remove_pronouns=remove_pronouns)
419
+
420
+ def build_sentence(text: str, src_lang: str, target: str,
421
+ drop_articles: bool, zero_copula: bool,
422
+ mode: str, max_comp_exact: bool = False, remove_pronouns: bool = False) -> str:
423
+ if not text.strip(): return ""
424
+ semi = True # siempre semi-lossless en construcción
425
+ core = _build_with_spacy(text, src_lang, target, drop_articles, zero_copula and not semi, semi, remove_pronouns) if USE_SPACY else encode_simple(text, src_lang, target)
426
+ if max_comp_exact:
427
+ return custom_sidecar_enc(core, text)
428
+ return core
429
+
430
+ def universal_translate(text: str, src: str, tgt: str,
431
+ drop_articles: bool, zero_copula: bool,
432
+ mode: str, max_comp_exact: bool = False, remove_pronouns: bool = False) -> str:
433
+ if not text.strip(): return ""
434
+ if src == tgt: return text
435
+
436
+ # Natural → Conlang
437
+ if src in ("Español","English") and tgt in ("Minimax-ASCII","Kōmín-CJK"):
438
+ return build_sentence(text, src, tgt, drop_articles, zero_copula, mode, max_comp_exact, remove_pronouns)
439
+
440
+ # Conlang → Natural (considera sidecars)
441
+ if src in ("Minimax-ASCII","Kōmín-CJK") and tgt in ("Español","English"):
442
+ orig = extract_custom_sidecar(text)
443
+ if orig is not None: return orig
444
+ orig = extract_sidecar_b85(text)
445
+ if orig is not None: return orig
446
+ return decode_simple(strip_custom_sidecar(strip_sidecar_b85(text)), src, tgt)
447
+
448
+ # Natural ↔ Natural (lemas muy simples)
449
+ if src in ("Español","English") and tgt in ("Español","English"):
450
+ return text
451
+
452
+ # Conlang ↔ Conlang (simple)
453
+ if src in ("Minimax-ASCII","Kōmín-CJK") and tgt in ("Minimax-ASCII","Kōmín-CJK"):
454
+ core = strip_custom_sidecar(text)
455
+ es_lemmas = decode_simple(core, src, "Español")
456
+ words = re.findall(r"\w+|[^\w\s]+", es_lemmas)
457
+ out=[]
458
+ for w in words:
459
+ if re.fullmatch(r"\w+", w):
460
+ code = ES2MINI.get(norm_es(w)) if tgt=="Minimax-ASCII" else ES2KOMI.get(norm_es(w))
461
+ if not code:
462
+ code = enc_oov_minimax(w) if tgt=="Minimax-ASCII" else enc_oov_komin(w)
463
+ out.append(code)
464
+ else:
465
+ out.append(w)
466
+ out_text = " ".join(out)
467
+ if extract_custom_sidecar(text) is not None:
468
+ return custom_sidecar_enc(out_text, extract_custom_sidecar(text) or "")
469
+ return out_text
470
+
471
+ return "[No soportado]"
472
+
473
  # =====================================================================================
474
  # ========================= UI bilingüe y explicaciones claras ========================
475
  # =====================================================================================
 
530
  EXPLAIN_CHECKBOX_EN = "Drop articles ~10–15%, Zero copula ~5–10% extra, Remove pronouns variable, Max Exact 40–60% for >100 chars."
531
  LEXICON_FRIENDLY_EN = "We use **WordNet (OMW)**, pair ES words with EN, clean & sort by frequency, assign short **codes** (Minimax/Kōmín), and save three JSONs so the app can encode/decode compactly."
532
 
533
+ # >>> Explicación directa de los “botones / modos”
534
  EXPLAIN_MODES_ES = """
535
  **¿Qué hace cada botón / modo?** *(Marca uno en “🧭 Modo de uso”)*
536
 
 
696
  c.change(run, [text, src, tgt, mode_hidden, exact], [out1, out2])
697
  return g
698
 
699
+ # ---------- Página (ES/EN), modos como CheckboxGroup (selección única) ----------
700
  with gr.Blocks(title="Universal Conlang Translator", theme=gr.themes.Soft()) as demo:
701
  gr.Markdown("## 🌍 Idioma / Language")
702
  lang = gr.Radio(["ES","EN"], value="ES", label="Selecciona / Select")
703
 
704
+ # Acordeones “¿qué hace cada botón?” (ES/EN)
705
  acc_modes_es = gr.Accordion("📖 ¿Qué hace cada botón / modo? (ES)", open=False, visible=True)
706
+ with acc_modes_es: gr.Markdown(EXPLAIN_MODES_ES)
 
707
  acc_modes_en = gr.Accordion("📖 What does each button / mode do? (EN)", open=False, visible=False)
708
+ with acc_modes_en: gr.Markdown(EXPLAIN_MODES_EN)
 
709
 
710
  # Acordeones intro + léxico (mismo nivel)
711
  acc_intro_es = gr.Accordion("☑️ Opciones y compactación — guía rápida (ES)", open=False, visible=True)
 
727
  table_en = gr.Dataframe(headers=["lemma_es","lemma_en","minimax","komin"], row_count=1, interactive=False)
728
  gr.Button("Refresh").click(lambda n: master_preview(int(n)), [n_rows_en], [table_en])
729
 
730
+ # Modo de uso (CheckboxGroup selección única)
731
  gr.Markdown("### 🧭 Modo de uso (marca **uno**)")
732
+ cb_modes = gr.CheckboxGroup(
733
+ choices=[
734
+ "🔁 Traducir / Translate",
735
+ "🛠️ Construir (ES/EN → Conlang) / Build",
736
+ "🗝️ Decodificar (Conlang → ES/EN) / Decode",
737
+ "🔄 Prueba ida→vuelta / Round-trip",
738
+ ],
739
+ value=["🔁 Traducir / Translate"],
740
+ label=None,
741
+ )
742
+ # Estados para mantener la API existente (tr, bu, de, rt)
743
+ cb_tr = gr.State(True); cb_bu = gr.State(False); cb_de = gr.State(False); cb_rt = gr.State(False)
744
 
745
  # Paneles por modo y por idioma
746
  gr.Markdown("### 🧪 Área de trabajo")
 
751
 
752
  def _vis(yes): return gr.update(visible=bool(yes))
753
 
754
+ def _modes_to_flags(selected_list):
755
+ sel = selected_list or []
756
+ order = [
757
+ "🔁 Traducir / Translate",
758
+ "🛠️ Construir (ES/EN → Conlang) / Build",
759
+ "🗝️ Decodificar (Conlang → ES/EN) / Decode",
760
+ "🔄 Prueba ida→vuelta / Round-trip",
761
+ ]
762
+ chosen = next((o for o in order if o in sel), order[0])
763
+ return [chosen == o for o in order], [chosen]
764
+
765
+ # No tocamos tu lógica de visibilidad: sólo la usamos
766
  def switch_everything(lang_code, tr, bu, de, rt):
 
767
  tr2, bu2, de2, rt2 = False, False, False, False
768
  if tr or (not bu and not de and not rt): tr2 = True
769
  elif bu: bu2 = True
 
771
  else: rt2 = True
772
 
773
  is_en = (lang_code == "EN")
 
774
  vis_es = not is_en; vis_en = is_en
775
  updates = [
776
+ _vis(vis_es), _vis(vis_en), # modos ES/EN
777
+ _vis(vis_es), _vis(vis_en), # intro
778
+ _vis(vis_es), _vis(vis_en), # léxico
779
  ]
 
780
  updates += [
781
+ _vis(vis_es and tr2), _vis(vis_es and bu2), _vis(vis_es and de2), _vis(vis_es and rt2),
782
+ _vis(vis_en and tr2), _vis(vis_en and bu2), _vis(vis_en and de2), _vis(vis_en and rt2),
783
  ]
 
 
 
 
 
784
  updates += [tr2, bu2, de2, rt2]
785
  return updates
786
 
787
+ def _lang_or_modes(lang_code, selected_list, tr, bu, de, rt):
788
+ flags, normalized = _modes_to_flags(selected_list)
789
+ updates = switch_everything(lang_code, *flags)
790
+ return updates + flags + [normalized]
791
+
792
+ # Reacciones
793
  lang.change(
794
+ _lang_or_modes,
795
+ [lang, cb_modes, cb_tr, cb_bu, cb_de, cb_rt],
796
  [
797
+ acc_modes_es, acc_modes_en,
798
  acc_intro_es, acc_intro_en, acc_lex_es, acc_lex_en,
799
  panel_tr_es, panel_bu_es, panel_de_es, panel_rt_es,
800
  panel_tr_en, panel_bu_en, panel_de_en, panel_rt_en,
801
+ cb_tr, cb_bu, cb_de, cb_rt,
802
+ cb_modes,
803
+ ],
804
+ )
805
+ cb_modes.change(
806
+ _lang_or_modes,
807
+ [lang, cb_modes, cb_tr, cb_bu, cb_de, cb_rt],
808
+ [
809
+ acc_modes_es, acc_modes_en,
810
+ acc_intro_es, acc_intro_en, acc_lex_es, acc_lex_en,
811
+ panel_tr_es, panel_bu_es, panel_de_es, panel_rt_es,
812
+ panel_tr_en, panel_bu_en, panel_de_en, panel_rt_en,
813
+ cb_tr, cb_bu, cb_de, cb_rt,
814
+ cb_modes,
815
  ],
816
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
817
 
818
  if __name__ == "__main__":
819
  demo.launch()