LoloSemper commited on
Commit
92099bc
·
verified ·
1 Parent(s): 01292b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +225 -285
app.py CHANGED
@@ -412,108 +412,87 @@ def _en_conj(lemma, tense, person):
412
 
413
  ALL_LANGS = ["Español","English","Minimax-ASCII","Kōmín-CJK"]
414
 
415
- # ---- Títulos y contenidos de acordeones (resumen) ----
416
- ACC_TITLES_ES = {
417
- "translate": "🔁 Traducir — ¿Qué hace? (despliega)",
418
- "build": "🛠️ Construir (ES/EN → Conlang) — ¿Qué hace?",
419
- "decode": "🗝️ Decodificar (Conlang → ES/EN) — ¿Qué hace?",
420
- "roundtrip": "🔄 Prueba ida→vuelta — ¿Qué hace?",
421
- "checkbox": "☑️ Opciones y compactación",
422
- "lexicon": "ℹ️ Léxico (OMW → Minimax/Kōmín) — explicación y vista previa"
423
- }
424
- ACC_TITLES_EN = {
425
- "translate": "🔁 Translate — What does it do? (expand)",
426
- "build": "🛠️ Build (ES/EN → Conlang) — What does it do?",
427
- "decode": "🗝️ Decode (Conlang → ES/EN) — What does it do?",
428
- "roundtrip": "🔄 Round-trip — What does it do?",
429
- "checkbox": "☑️ Options & compaction",
430
- "lexicon": "ℹ️ Lexicon (OMW → Minimax/Kōmín) — explainer & preview"
431
- }
432
-
433
  EXPLAIN_TAB_TRANSLATE_ES = """
434
- Convierte el **Texto** al **Destino** (ES/EN/Minimax/Kōmín).
435
- - **Máx. Compresión Exacta** añade `~...` para recuperar el **original exacto**.
436
- - Los **checkbox** (artículos/cópula/pronombres) **solo aplican** cuando el **Destino es conlang**.
 
437
  """
438
  EXPLAIN_TAB_BUILD_ES = """
439
- Fuerza salida **en conlang** (Minimax/Kōmín) desde Español o Inglés, con reglas de fraseo y opciones de compactación.
 
440
  """
441
  EXPLAIN_TAB_DECODE_ES = """
442
- Convierte **Minimax/Kōmín → ES/EN**. Si hay `~...`, devuelve el **original exacto**; si no, hace reconstrucción **semi-lossless**.
 
 
 
443
  """
444
  EXPLAIN_TAB_ROUNDTRIP_ES = """
445
- Ejecuta **(ES/EN→Conlang) (ConlangES/EN)** para comprobar **reversibilidad**. Con exacta, la vuelta es **bit a bit**.
 
 
446
  """
447
  EXPLAIN_CHECKBOX_ES = """
448
- - **Omitir artículos**: **~10–15%**
449
- - **Cópula cero (presente afirm.)**: **~510%** extra
450
- - **Quitar pronombres**: ahorro variable
451
- - **Máx. Compresión Exacta**: **~40–60%** en >100 caracteres (sidecar `~...`)
452
- **Referencia:** sin casillas **0%**; artículos+cópula **~1520%**.
 
453
  """
454
 
455
- EXPLAIN_TAB_TRANSLATE_EN = """
456
- Converts **Text → Target** (ES/EN/Minimax/Kōmín). **Max Exact Compression** adds `~...` for bit-perfect recovery. Checkboxes apply when **Target is a conlang**.
457
- """
458
- EXPLAIN_TAB_BUILD_EN = """Forces **conlang output** (Minimax/Kōmín) with phrasing rules and compaction options."""
459
- EXPLAIN_TAB_DECODE_EN = """Converts **Minimax/Kōmín ES/EN**. If `~...` exists, returns the exact original; otherwise semi-lossless."""
460
- EXPLAIN_TAB_ROUNDTRIP_EN = """Runs **(ES/EN→Conlang)→(Conlang→ES/EN)** to verify reversibility."""
461
- EXPLAIN_CHECKBOX_EN = """
462
- - **Drop articles**: **~10–15%**
463
- - **Zero copula (present affirm.)**: **~5–10%** extra
464
- - **Remove pronouns**: variable
465
- - **Max Exact Compression**: **~40–60%** for >100 chars (`~...`)
466
- Reference: no options **0%**; articles+copula **~15–20%**.
 
467
  """
468
 
469
- LEXICON_BUILD_ES = """
470
- **Cómo se construyó el léxico**
471
- 1) OMW/WordNet: lemas **ES** y equivalentes **EN** por sinset.
472
- 2) Normaliza y ordena por **frecuencia** (*wordfreq*).
473
- 3) (Opcional) **spaCy** refina; **Argos** puede rellenar EN.
474
- 4) Asigna **códigos** con alfabetos barajados por **SEED** hasta `MAXLEN`.
475
- 5) Exporta: `lexicon_minimax.json`, `lexicon_komin.json`, `lexicon_master.json` (+TSV).
476
- """
477
- LEXICON_BUILD_EN = """
478
- **How the lexicon was built**
479
- 1) OMW/WordNet ES lemmas + EN counterparts per synset.
480
- 2) Normalize & sort by **frequency** (*wordfreq*).
481
- 3) (Optional) **spaCy** refine; **Argos** may fill EN.
482
- 4) Assign **codes** with **SEED-shuffled** alphabets up to `MAXLEN`.
483
- 5) Exports: `lexicon_minimax.json`, `lexicon_komin.json`, `lexicon_master.json` (+TSV).
484
- """
485
 
486
- # ---------- Utilidad: cálculo de compactación ----------
487
  def _pct_comp(original: str, result: str) -> float:
488
  if not original: return 0.0
489
  return max(0.0, 100.0 * (1.0 - (len(result) / len(original))))
490
 
491
- def compaction_report_es(text, src, tgt, drop, zero, rm, maxc) -> str:
492
  if not text.strip(): return "—"
493
  if tgt not in ("Minimax-ASCII","Kōmín-CJK"):
494
  return "La compactación aplica cuando el **Destino** es Minimax/Kōmín."
495
  base = build_sentence(text, src, tgt, False, False, "Semi-lossless", False, False)
496
  curr = build_sentence(text, src, tgt, drop, zero, "Semi-lossless", False, rm)
497
- p_base = _pct_comp(text, base); p_curr = _pct_comp(text, curr)
498
- msg = f"**Base (sin casillas):** {p_base:.1f}% · **Con tus opciones:** {p_curr:.1f}%"
499
  if maxc:
500
  curr_exact = build_sentence(text, src, tgt, drop, zero, "Semi-lossless", True, rm)
501
- p_exact = _pct_comp(text, curr_exact)
502
- msg += f" · **Con sidecar `~...`:** {p_exact:.1f}%"
503
  return msg
504
 
505
- def compaction_report_en(text, src, tgt, drop, zero, rm, maxc) -> str:
506
  if not text.strip(): return "—"
507
  if tgt not in ("Minimax-ASCII","Kōmín-CJK"):
508
  return "Compaction applies when **Target** is Minimax/Kōmín."
509
  base = build_sentence(text, src, tgt, False, False, "Semi-lossless", False, False)
510
  curr = build_sentence(text, src, tgt, drop, zero, "Semi-lossless", False, rm)
511
- p_base = _pct_comp(text, base); p_curr = _pct_comp(text, curr)
512
- msg = f"**Base (no options):** {p_base:.1f}% · **With your options:** {p_curr:.1f}%"
513
  if maxc:
514
  curr_exact = build_sentence(text, src, tgt, drop, zero, "Semi-lossless", True, rm)
515
- p_exact = _pct_comp(text, curr_exact)
516
- msg += f" · **With `~...` sidecar:** {p_exact:.1f}%"
517
  return msg
518
 
519
  def master_preview(n: int = 20) -> List[List[Any]]:
@@ -527,233 +506,193 @@ def master_preview(n: int = 20) -> List[List[Any]]:
527
  except Exception:
528
  return [["lemma_es","lemma_en","minimax","komin"], ["(no data)","","",""]]
529
 
530
- # ========================= Grupos ES / EN =========================
531
- def make_group_es():
532
  with gr.Group(visible=True) as g:
533
- gr.Markdown("# 🌐 Universal Conlang Translator · Compresión Exacta (ES)")
534
- # Acordeones de explicación — MISMO nivel
 
 
 
 
 
 
535
  with gr.Row():
536
- with gr.Column():
537
- with gr.Accordion(ACC_TITLES_ES["translate"], open=False): gr.Markdown(EXPLAIN_TAB_TRANSLATE_ES)
538
- with gr.Accordion(ACC_TITLES_ES["build"], open=False): gr.Markdown(EXPLAIN_TAB_BUILD_ES)
539
- with gr.Accordion(ACC_TITLES_ES["decode"], open=False): gr.Markdown(EXPLAIN_TAB_DECODE_ES)
540
- with gr.Accordion(ACC_TITLES_ES["roundtrip"], open=False): gr.Markdown(EXPLAIN_TAB_ROUNDTRIP_ES)
541
- with gr.Column():
542
- with gr.Accordion(ACC_TITLES_ES["checkbox"], open=False):
543
- gr.Markdown(EXPLAIN_CHECKBOX_ES)
544
- with gr.Accordion(ACC_TITLES_ES["lexicon"], open=False):
545
- gr.Markdown(LEXICON_BUILD_ES)
546
- n_rows = gr.Slider(5, 100, value=20, step=5, label="Filas a mostrar")
547
- table = gr.Dataframe(headers=["lemma_es","lemma_en","minimax","komin"], row_count=1, interactive=False)
548
- gr.Button("Actualizar vista").click(lambda n: master_preview(int(n)), [n_rows], [table])
549
-
550
- # ==== Tabs reactivas (sin botones) ====
551
- with gr.Tab("🔁 Traducir"):
552
- with gr.Row():
553
- uni_src = gr.Dropdown(ALL_LANGS, value="Español", label="Fuente")
554
- uni_tgt = gr.Dropdown(ALL_LANGS, value="Minimax-ASCII", label="Destino")
555
- uni_text = gr.Textbox(lines=3, label="Texto", placeholder="Ej.: Hola, ¿cómo estás?", show_copy_button=True)
556
- with gr.Row():
557
- uni_drop = gr.Checkbox(True, label="Omitir artículos (ES/EN → conlang)")
558
- uni_zero = gr.Checkbox(False, label="Cópula cero (presente afirm.)")
559
- uni_rmpr = gr.Checkbox(False, label="Quitar pronombres")
560
- uni_maxc = gr.Checkbox(False, label="Máx. Compresión Exacta (sidecar `~...`)")
561
- uni_mode = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
562
- uni_out = gr.Textbox(lines=6, label="Traducción", show_copy_button=True)
563
- comp_out = gr.Markdown("")
564
-
565
- def do_translate(text, src, tgt, drop, zero, mode, maxc, rm):
566
- if not text.strip(): return "", ""
567
- res = universal_translate(text, src, tgt, drop, zero, mode, maxc, rm)
568
- rep = compaction_report_es(text, src, tgt, drop, zero, rm, maxc)
569
- return res, rep
570
-
571
- for c in [uni_text, uni_src, uni_tgt, uni_drop, uni_zero, uni_rmpr, uni_maxc]:
572
- c.change(do_translate,
573
- [uni_text, uni_src, uni_tgt, uni_drop, uni_zero, uni_mode, uni_maxc, uni_rmpr],
574
- [uni_out, comp_out])
575
-
576
- with gr.Tab("🛠️ Construir (ES/EN → Conlang)"):
577
- with gr.Row():
578
- src_lang = gr.Dropdown(["Español","English"], value="Español", label="Fuente")
579
- target = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label="Conlang")
580
- text_in = gr.Textbox(lines=3, label="Frase", show_copy_button=True)
581
- with gr.Row():
582
- drop_articles = gr.Checkbox(True, label="Omitir artículos")
583
- zero_copula = gr.Checkbox(False, label="Cópula cero (presente afirm.)")
584
- rm_pron_build = gr.Checkbox(False, label="Quitar pronombres")
585
- max_comp_build = gr.Checkbox(False, label="Máx. Compresión Exacta")
586
- mode_build = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
587
- out = gr.Textbox(lines=6, label="Salida", show_copy_button=True)
588
- comp_out_b = gr.Markdown("")
589
-
590
- def do_build(text, src, tgt, drop, zero, mode, maxc, rm):
591
- if not text.strip(): return "", ""
592
- res = build_sentence(text, src, tgt, drop, zero, mode, maxc, rm)
593
- rep = compaction_report_es(text, src, tgt, drop, zero, rm, maxc)
594
- return res, rep
595
-
596
- for c in [text_in, src_lang, target, drop_articles, zero_copula, rm_pron_build, max_comp_build]:
597
- c.change(do_build,
598
- [text_in, src_lang, target, drop_articles, zero_copula, mode_build, max_comp_build, rm_pron_build],
599
- [out, comp_out_b])
600
-
601
- with gr.Tab("🗝️ Decodificar (Conlang → ES/EN)"):
602
- with gr.Row():
603
- src_code = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label="Fuente")
604
- tgt_lang = gr.Dropdown(["Español","English"], value="Español", label="Destino")
605
- code_in = gr.Textbox(lines=3, label="Texto en conlang (puede incluir `~...`)", show_copy_button=True)
606
- out3 = gr.Textbox(lines=6, label="Salida", show_copy_button=True)
607
-
608
- def decode_lossless_aware(text, src, tgt):
609
- if not text.strip(): return ""
610
- orig = extract_custom_sidecar(text)
611
- if orig is not None: return orig
612
- orig = extract_sidecar_b85(text)
613
- if orig is not None: return orig
614
- return decode_simple(strip_custom_sidecar(strip_sidecar_b85(text)), src, tgt)
615
-
616
- for c in [code_in, src_code, tgt_lang]:
617
- c.change(decode_lossless_aware, [code_in, src_code, tgt_lang], [out3])
618
-
619
- with gr.Tab("🔄 Prueba ida→vuelta"):
620
- with gr.Row():
621
- rt_src = gr.Dropdown(["Español","English"], value="Español", label="Fuente")
622
- rt_tgt = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label="Conlang")
623
- rt_text = gr.Textbox(lines=3, label="Frase", show_copy_button=True)
624
- rt_max_comp = gr.Checkbox(False, label="Máx. Compresión Exacta")
625
- rt_mode = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
626
- rt_out_conlang = gr.Textbox(lines=3, label="Conlang (ida)", show_copy_button=True)
627
- rt_out_back = gr.Textbox(lines=3, label="Vuelta", show_copy_button=True)
628
-
629
- def do_roundtrip(text, src, tgt, mode, maxc):
630
- if not text.strip(): return "", ""
631
- conlang = universal_translate(text, src, tgt, True, False, mode, maxc, False)
632
- back = universal_translate(conlang, tgt, src, True, False, mode, maxc, False)
633
- return conlang, back
634
-
635
- for c in [rt_text, rt_src, rt_tgt, rt_max_comp]:
636
- c.change(do_roundtrip, [rt_text, rt_src, rt_tgt, rt_mode, rt_max_comp], [rt_out_conlang, rt_out_back])
637
  return g
638
 
639
- def make_group_en():
640
  with gr.Group(visible=False) as g:
641
- gr.Markdown("# 🌐 Universal Conlang Translator · Max Exact Compression (EN)")
 
642
  with gr.Row():
643
- with gr.Column():
644
- with gr.Accordion(ACC_TITLES_EN["translate"], open=False): gr.Markdown(EXPLAIN_TAB_TRANSLATE_EN)
645
- with gr.Accordion(ACC_TITLES_EN["build"], open=False): gr.Markdown(EXPLAIN_TAB_BUILD_EN)
646
- with gr.Accordion(ACC_TITLES_EN["decode"], open=False): gr.Markdown(EXPLAIN_TAB_DECODE_EN)
647
- with gr.Accordion(ACC_TITLES_EN["roundtrip"], open=False): gr.Markdown(EXPLAIN_TAB_ROUNDTRIP_EN)
648
- with gr.Column():
649
- with gr.Accordion(ACC_TITLES_EN["checkbox"], open=False): gr.Markdown(EXPLAIN_CHECKBOX_EN)
650
- with gr.Accordion(ACC_TITLES_EN["lexicon"], open=False):
651
- gr.Markdown(LEXICON_BUILD_EN)
652
- n_rows = gr.Slider(5, 100, value=20, step=5, label="Rows to show")
653
- table = gr.Dataframe(headers=["lemma_es","lemma_en","minimax","komin"], row_count=1, interactive=False)
654
- gr.Button("Refresh").click(lambda n: master_preview(int(n)), [n_rows], [table])
655
-
656
- with gr.Tab("🔁 Translate"):
657
- with gr.Row():
658
- uni_src = gr.Dropdown(ALL_LANGS, value="English", label="Source")
659
- uni_tgt = gr.Dropdown(ALL_LANGS, value="Minimax-ASCII", label="Target")
660
- uni_text = gr.Textbox(lines=3, label="Text", placeholder="e.g., Hello, how are you?", show_copy_button=True)
661
- with gr.Row():
662
- uni_drop = gr.Checkbox(True, label="Drop articles (ES/EN → conlang)")
663
- uni_zero = gr.Checkbox(False, label="Zero copula (present affirm.)")
664
- uni_rmpr = gr.Checkbox(False, label="Remove pronouns")
665
- uni_maxc = gr.Checkbox(False, label="Max Exact Compression (sidecar `~...`)")
666
- uni_mode = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
667
- uni_out = gr.Textbox(lines=6, label="Translation", show_copy_button=True)
668
- comp_out = gr.Markdown("")
669
-
670
- def do_translate_en(text, src, tgt, drop, zero, mode, maxc, rm):
671
- if not text.strip(): return "", ""
672
- res = universal_translate(text, src, tgt, drop, zero, mode, maxc, rm)
673
- rep = compaction_report_en(text, src, tgt, drop, zero, rm, maxc)
674
- return res, rep
675
-
676
- for c in [uni_text, uni_src, uni_tgt, uni_drop, uni_zero, uni_rmpr, uni_maxc]:
677
- c.change(do_translate_en,
678
- [uni_text, uni_src, uni_tgt, uni_drop, uni_zero, uni_mode, uni_maxc, uni_rmpr],
679
- [uni_out, comp_out])
680
-
681
- with gr.Tab("🛠️ Build (ES/EN → Conlang)"):
682
- with gr.Row():
683
- src_lang = gr.Dropdown(["Español","English"], value="English", label="Source")
684
- target = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label="Conlang")
685
- text_in = gr.Textbox(lines=3, label="Sentence", show_copy_button=True)
686
- with gr.Row():
687
- drop_articles = gr.Checkbox(True, label="Drop articles")
688
- zero_copula = gr.Checkbox(False, label="Zero copula (present affirm.)")
689
- rm_pron_build = gr.Checkbox(False, label="Remove pronouns")
690
- max_comp_build = gr.Checkbox(False, label="Max Exact Compression")
691
- mode_build = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
692
- out = gr.Textbox(lines=6, label="Output", show_copy_button=True)
693
- comp_out_b = gr.Markdown("")
694
-
695
- def do_build_en(text, src, tgt, drop, zero, mode, maxc, rm):
696
- if not text.strip(): return "", ""
697
- res = build_sentence(text, src, tgt, drop, zero, mode, maxc, rm)
698
- rep = compaction_report_en(text, src, tgt, drop, zero, rm, maxc)
699
- return res, rep
700
-
701
- for c in [text_in, src_lang, target, drop_articles, zero_copula, rm_pron_build, max_comp_build]:
702
- c.change(do_build_en,
703
- [text_in, src_lang, target, drop_articles, zero_copula, mode_build, max_comp_build, rm_pron_build],
704
- [out, comp_out_b])
705
-
706
- with gr.Tab("🗝️ Decode (Conlang → ES/EN)"):
707
- with gr.Row():
708
- src_code = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label="Source")
709
- tgt_lang = gr.Dropdown(["Español","English"], value="English", label="Target")
710
- code_in = gr.Textbox(lines=3, label="Conlang text (may include `~...`)", show_copy_button=True)
711
- out3 = gr.Textbox(lines=6, label="Output", show_copy_button=True)
712
-
713
- def decode_lossless_aware_en(text, src, tgt):
714
- if not text.strip(): return ""
715
- orig = extract_custom_sidecar(text)
716
- if orig is not None: return orig
717
- orig = extract_sidecar_b85(text)
718
- if orig is not None: return orig
719
- return decode_simple(strip_custom_sidecar(strip_sidecar_b85(text)), src, tgt)
720
-
721
- for c in [code_in, src_code, tgt_lang]:
722
- c.change(decode_lossless_aware_en, [code_in, src_code, tgt_lang], [out3])
723
-
724
- with gr.Tab("🔄 Round-trip"):
725
- with gr.Row():
726
- rt_src = gr.Dropdown(["Español","English"], value="English", label="Source")
727
- rt_tgt = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label="Conlang")
728
- rt_text = gr.Textbox(lines=3, label="Sentence", show_copy_button=True)
729
- rt_max_comp = gr.Checkbox(False, label="Max Exact Compression")
730
- rt_mode = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
731
- rt_out_conlang = gr.Textbox(lines=3, label="Outward (conlang)", show_copy_button=True)
732
- rt_out_back = gr.Textbox(lines=3, label="Back", show_copy_button=True)
733
-
734
- def do_roundtrip_en(text, src, tgt, mode, maxc):
735
- if not text.strip(): return "", ""
736
- conlang = universal_translate(text, src, tgt, True, False, mode, maxc, False)
737
- back = universal_translate(conlang, tgt, src, True, False, mode, maxc, False)
738
- return conlang, back
739
-
740
- for c in [rt_text, rt_src, rt_tgt, rt_max_comp]:
741
- c.change(do_roundtrip_en, [rt_text, rt_src, rt_tgt, rt_mode, rt_max_comp], [rt_out_conlang, rt_out_back])
742
  return g
743
 
744
- # ================================ App ================================
745
- with gr.Blocks(title="Universal Conlang Translator", theme=gr.themes.Soft()) as demo:
746
- gr.Markdown("## 🌍 Idioma / Language")
747
- lang_select = gr.Radio(["ES","EN"], value="ES", label="Selecciona / Select")
748
- group_es = make_group_es()
749
- group_en = make_group_en()
 
 
 
 
 
 
 
 
 
 
 
 
 
750
 
751
- def switch_lang(code):
752
- if code == "EN":
753
- return gr.update(visible=False), gr.update(visible=True)
754
- return gr.update(visible=True), gr.update(visible=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
755
 
756
- lang_select.change(switch_lang, [lang_select], [group_es, group_en])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
757
 
758
  if __name__ == "__main__":
759
  demo.launch()
@@ -764,3 +703,4 @@ if __name__ == "__main__":
764
 
765
 
766
 
 
 
412
 
413
  ALL_LANGS = ["Español","English","Minimax-ASCII","Kōmín-CJK"]
414
 
415
+ # ---------- Explicaciones en lenguaje llano ----------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
416
  EXPLAIN_TAB_TRANSLATE_ES = """
417
+ **¿Qué hace “Traducir”?**
418
+ Convierte lo que escribes en **Texto** al **Destino** que elijas. Funciona con Español, Inglés y los dos **conlangs** (Minimax/Kōmín).
419
+ - Si marcas **Máx. Compresión Exacta**, añade un final `~...` con el **original comprimido** para poder **recuperarlo tal cual** al decodificar.
420
+ - Los checkbox de **compactación** (artículos, cópula, pronombres) **sólo se aplican cuando el destino es conlang**.
421
  """
422
  EXPLAIN_TAB_BUILD_ES = """
423
+ **¿Qué hace “Construir (ES/EN Conlang)”?**
424
+ Obliga a que la salida sea **Minimax o Kōmín** (desde ES/EN). Aplica el orden y las partículas propias del conlang y las opciones de **compactación**.
425
  """
426
  EXPLAIN_TAB_DECODE_ES = """
427
+ **¿Qué hace “Decodificar (Conlang → ES/EN)”?**
428
+ Convierte de **Minimax/Kōmín** a **Español/Inglés**.
429
+ - Si el texto trae `~...`, devolvemos el **original exacto**.
430
+ - Si no, reconstruimos lo más fiel posible usando el **diccionario**.
431
  """
432
  EXPLAIN_TAB_ROUNDTRIP_ES = """
433
+ **¿Qué hace “Prueba ida→vuelta”?**
434
+ Hace el camino completo: **(ES/EN → Conlang) → (Conlang → ES/EN)** para comprobar que lo que sale **vuelve bien**.
435
+ Con **exacta**, la vuelta coincide **bit a bit**.
436
  """
437
  EXPLAIN_CHECKBOX_ES = """
438
+ **Opciones de compactación (para conlang):**
439
+ - **Omitir artículos** (*el/la/los/las*; *a/an/the*): ahorro típico **~1015%**.
440
+ - **Cópula cero** (presente afirmativo): oculta *ser/estar/be* → **~5–10%** extra.
441
+ - **Quitar pronombres**: suprime pronombres obvios ahorro **variable**.
442
+ - **Máx. Compresión Exacta**: añade `~...` para recuperar el original. En textos >100 caracteres: **~4060%** (en muy cortos puede no reducir).
443
+ **Guía rápida:** sin casillas **0%**; artículos+cópula **~15–20%**.
444
  """
445
 
446
+ # Léxico en lenguaje muy sencillo
447
+ LEXICON_FRIENDLY_ES = """
448
+ **¿De dónde sale el “diccionario” (léxico) y para qué sirve?**
449
+ - Partimos de una base pública llamada **WordNet** (versión multilingüe OMW).
450
+ - Para cada palabra española buscamos su **equivalente en inglés**.
451
+ - Limpiamos y ordenamos las palabras por **frecuencia de uso** (las comunes primero).
452
+ - A cada lema le damos un **código corto** (para Minimax o para Kōmín). Los códigos cortos ayudan a **ahorrar espacio**.
453
+ - Guardamos todo en **tres archivos** que la app usa al traducir:
454
+ - `lexicon_minimax.json` (ES códigos Minimax)
455
+ - `lexicon_komin.json` (ES → códigos Kōmín)
456
+ - `lexicon_master.json` (ES + EN + ambos códigos)
457
+
458
+ **Conclusión:** este diccionario permite que tus frases se conviertan a **códigos compactos**, y también volver de esos códigos a una frase entendible.
459
  """
460
 
461
+ # Versiones EN breves
462
+ EXPLAIN_TAB_TRANSLATE_EN = "Converts **Text Target** (ES/EN/Minimax/Kōmín). With **Max Exact**, adds `~...` to recover the **exact original**. Compaction checkboxes apply only when **Target is conlang**."
463
+ EXPLAIN_TAB_BUILD_EN = "Forces **conlang output** (Minimax/Kōmín) from ES/EN, applying phrasing rules and compaction options."
464
+ EXPLAIN_TAB_DECODE_EN = "Converts **Minimax/Kōmín ES/EN**. If `~...` exists, returns the bit-perfect original; else semi-lossless."
465
+ EXPLAIN_TAB_ROUNDTRIP_EN = "Runs **(ES/EN→Conlang)→(Conlang→ES/EN)** to verify reversibility; with exact, it’s bit-perfect."
466
+ EXPLAIN_CHECKBOX_EN = "Drop articles ~10–15%, Zero copula ~5–10% extra, Remove pronouns variable, Max Exact 40–60% for >100 chars."
467
+ LEXICON_FRIENDLY_EN = "We use **WordNet (OMW)**, pair ES words with EN, clean & sort by frequency, assign short **codes** (Minimax/Kōmín), and save three JSONs so the app can encode/decode compactly."
 
 
 
 
 
 
 
 
 
468
 
469
+ # ---------- Utilidades de compactación y vista previa ----------
470
  def _pct_comp(original: str, result: str) -> float:
471
  if not original: return 0.0
472
  return max(0.0, 100.0 * (1.0 - (len(result) / len(original))))
473
 
474
+ def compaction_line_es(text, src, tgt, drop, zero, rm, maxc) -> str:
475
  if not text.strip(): return "—"
476
  if tgt not in ("Minimax-ASCII","Kōmín-CJK"):
477
  return "La compactación aplica cuando el **Destino** es Minimax/Kōmín."
478
  base = build_sentence(text, src, tgt, False, False, "Semi-lossless", False, False)
479
  curr = build_sentence(text, src, tgt, drop, zero, "Semi-lossless", False, rm)
480
+ msg = f"**Base (sin casillas):** {_pct_comp(text, base):.1f}% · **Con tus opciones:** {_pct_comp(text, curr):.1f}%"
 
481
  if maxc:
482
  curr_exact = build_sentence(text, src, tgt, drop, zero, "Semi-lossless", True, rm)
483
+ msg += f" · **Con sidecar `~...`:** {_pct_comp(text, curr_exact):.1f}%"
 
484
  return msg
485
 
486
+ def compaction_line_en(text, src, tgt, drop, zero, rm, maxc) -> str:
487
  if not text.strip(): return "—"
488
  if tgt not in ("Minimax-ASCII","Kōmín-CJK"):
489
  return "Compaction applies when **Target** is Minimax/Kōmín."
490
  base = build_sentence(text, src, tgt, False, False, "Semi-lossless", False, False)
491
  curr = build_sentence(text, src, tgt, drop, zero, "Semi-lossless", False, rm)
492
+ msg = f"**Base (no options):** {_pct_comp(text, base):.1f}% · **With your options:** {_pct_comp(text, curr):.1f}%"
 
493
  if maxc:
494
  curr_exact = build_sentence(text, src, tgt, drop, zero, "Semi-lossless", True, rm)
495
+ msg += f" · **With `~...` sidecar:** {_pct_comp(text, curr_exact):.1f}%"
 
496
  return msg
497
 
498
  def master_preview(n: int = 20) -> List[List[Any]]:
 
506
  except Exception:
507
  return [["lemma_es","lemma_en","minimax","komin"], ["(no data)","","",""]]
508
 
509
+ # ---------- Paneles (uno visible según “modo”) ----------
510
+ def make_panel_translate(lang="ES"):
511
  with gr.Group(visible=True) as g:
512
+ with gr.Accordion(("🔁 Traducir ayuda" if lang=="ES" else "🔁 Translate — help"), open=False):
513
+ gr.Markdown(EXPLAIN_TAB_TRANSLATE_ES if lang=="ES" else EXPLAIN_TAB_TRANSLATE_EN)
514
+ with gr.Row():
515
+ src = gr.Dropdown(ALL_LANGS, value=("Español" if lang=="ES" else "English"), label=("Fuente" if lang=="ES" else "Source"))
516
+ tgt = gr.Dropdown(ALL_LANGS, value="Minimax-ASCII", label=("Destino" if lang=="ES" else "Target"))
517
+ text = gr.Textbox(lines=3, label=("Texto" if lang=="ES" else "Text"),
518
+ placeholder=("Ej.: Hola, ¿cómo estás?" if lang=="ES" else "e.g., Hello, how are you?"),
519
+ show_copy_button=True)
520
  with gr.Row():
521
+ drop = gr.Checkbox(True, label=("Omitir artículos (ES/EN → conlang)" if lang=="ES" else "Drop articles (ES/EN → conlang)"))
522
+ zero = gr.Checkbox(False, label=("Cópula cero (presente afirm.)" if lang=="ES" else "Zero copula (present affirmative)"))
523
+ rmpr = gr.Checkbox(False, label=("Quitar pronombres" if lang=="ES" else "Remove pronouns"))
524
+ exact = gr.Checkbox(False, label=("Máx. Compresión Exacta (sidecar `~...`)" if lang=="ES" else "Max Exact Compression (sidecar `~...`)"))
525
+ mode_hidden = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
526
+ out = gr.Textbox(lines=6, label=("Traducción" if lang=="ES" else "Translation"), show_copy_button=True)
527
+ comp = gr.Markdown("")
528
+ def run(text, s, t, d, z, m, e, r):
529
+ if not text.strip(): return "", ""
530
+ res = universal_translate(text, s, t, d, z, m, e, r)
531
+ rep = (compaction_line_es if lang=="ES" else compaction_line_en)(text, s, t, d, z, r, e)
532
+ return res, rep
533
+ for c in [text, src, tgt, drop, zero, rmpr, exact]:
534
+ c.change(run, [text, src, tgt, drop, zero, mode_hidden, exact, rmpr], [out, comp])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
535
  return g
536
 
537
+ def make_panel_build(lang="ES"):
538
  with gr.Group(visible=False) as g:
539
+ with gr.Accordion(("🛠️ Construir ayuda" if lang=="ES" else "🛠️ Build — help"), open=False):
540
+ gr.Markdown(EXPLAIN_TAB_BUILD_ES if lang=="ES" else EXPLAIN_TAB_BUILD_EN)
541
  with gr.Row():
542
+ src = gr.Dropdown(["Español","English"], value=("Español" if lang=="ES" else "English"), label=("Fuente" if lang=="ES" else "Source"))
543
+ tgt = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label="Conlang")
544
+ text = gr.Textbox(lines=3, label=("Frase" if lang=="ES" else "Sentence"), show_copy_button=True)
545
+ with gr.Row():
546
+ drop = gr.Checkbox(True, label=("Omitir artículos" if lang=="ES" else "Drop articles"))
547
+ zero = gr.Checkbox(False, label=("Cópula cero (presente afirm.)" if lang=="ES" else "Zero copula (present affirmative)"))
548
+ rmpr = gr.Checkbox(False, label=("Quitar pronombres" if lang=="ES" else "Remove pronouns"))
549
+ exact = gr.Checkbox(False, label=("Máx. Compresión Exacta" if lang=="ES" else "Max Exact Compression"))
550
+ mode_hidden = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
551
+ out = gr.Textbox(lines=6, label=("Salida" if lang=="ES" else "Output"), show_copy_button=True)
552
+ comp = gr.Markdown("")
553
+ def run(text, s, t, d, z, m, e, r):
554
+ if not text.strip(): return "", ""
555
+ res = build_sentence(text, s, t, d, z, m, e, r)
556
+ rep = (compaction_line_es if lang=="ES" else compaction_line_en)(text, s, t, d, z, r, e)
557
+ return res, rep
558
+ for c in [text, src, tgt, drop, zero, rmpr, exact]:
559
+ c.change(run, [text, src, tgt, drop, zero, mode_hidden, exact, rmpr], [out, comp])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
560
  return g
561
 
562
+ def make_panel_decode(lang="ES"):
563
+ with gr.Group(visible=False) as g:
564
+ with gr.Accordion(("🗝️ Decodificar ayuda" if lang=="ES" else "🗝️ Decode — help"), open=False):
565
+ gr.Markdown(EXPLAIN_TAB_DECODE_ES if lang=="ES" else EXPLAIN_TAB_DECODE_EN)
566
+ with gr.Row():
567
+ src = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label=("Fuente" if lang=="ES" else "Source"))
568
+ tgt = gr.Dropdown(["Español","English"], value=("Español" if lang=="ES" else "English"), label=("Destino" if lang=="ES" else "Target"))
569
+ text = gr.Textbox(lines=3, label=("Texto en conlang (puede incluir `~...`)" if lang=="ES" else "Conlang text (may include `~...`)"), show_copy_button=True)
570
+ out = gr.Textbox(lines=6, label=("Salida" if lang=="ES" else "Output"), show_copy_button=True)
571
+ def run(t, s, d):
572
+ if not t.strip(): return ""
573
+ orig = extract_custom_sidecar(t)
574
+ if orig is not None: return orig
575
+ orig = extract_sidecar_b85(t)
576
+ if orig is not None: return orig
577
+ return decode_simple(strip_custom_sidecar(strip_sidecar_b85(t)), s, d)
578
+ for c in [text, src, tgt]:
579
+ c.change(run, [text, src, tgt], [out])
580
+ return g
581
 
582
+ def make_panel_roundtrip(lang="ES"):
583
+ with gr.Group(visible=False) as g:
584
+ with gr.Accordion(("🔄 Prueba ida→vuelta — ayuda" if lang=="ES" else "🔄 Round-trip — help"), open=False):
585
+ gr.Markdown(EXPLAIN_TAB_ROUNDTRIP_ES if lang=="ES" else EXPLAIN_TAB_ROUNDTRIP_EN)
586
+ with gr.Row():
587
+ src = gr.Dropdown(["Español","English"], value=("Español" if lang=="ES" else "English"), label=("Fuente" if lang=="ES" else "Source"))
588
+ tgt = gr.Dropdown(["Minimax-ASCII","Kōmín-CJK"], value="Minimax-ASCII", label="Conlang")
589
+ text = gr.Textbox(lines=3, label=("Frase" if lang=="ES" else "Sentence"), show_copy_button=True)
590
+ exact = gr.Checkbox(False, label=("Máx. Compresión Exacta" if lang=="ES" else "Max Exact Compression"))
591
+ mode_hidden = gr.Dropdown(["Semi-lossless"], value="Semi-lossless", visible=False)
592
+ out1 = gr.Textbox(lines=3, label=("Conlang (ida)" if lang=="ES" else "Outward (conlang)"), show_copy_button=True)
593
+ out2 = gr.Textbox(lines=3, label=("Vuelta" if lang=="ES" else "Back"), show_copy_button=True)
594
+ def run(t, s, c, m, e):
595
+ if not t.strip(): return "", ""
596
+ conlang = universal_translate(t, s, c, True, False, m, e, False)
597
+ back = universal_translate(conlang, c, s, True, False, m, e, False)
598
+ return conlang, back
599
+ for c in [text, src, tgt, exact]:
600
+ c.change(run, [text, src, tgt, mode_hidden, exact], [out1, out2])
601
+ return g
602
 
603
+ # ---------- Página (ES/EN), con “modos” como CHECKBOX (mutuamente excluyentes) ----------
604
+ with gr.Blocks(title="Universal Conlang Translator", theme=gr.themes.Soft()) as demo:
605
+ gr.Markdown("## 🌍 Idioma / Language")
606
+ lang = gr.Radio(["ES","EN"], value="ES", label="Selecciona / Select")
607
+
608
+ # Acordeones intro + léxico (mismo nivel)
609
+ acc_intro_es = gr.Accordion("☑️ Opciones y compactación — guía rápida (ES)", open=False, visible=True)
610
+ with acc_intro_es: gr.Markdown(EXPLAIN_CHECKBOX_ES)
611
+ acc_intro_en = gr.Accordion("☑️ Options & compaction — quick guide (EN)", open=False, visible=False)
612
+ with acc_intro_en: gr.Markdown(EXPLAIN_CHECKBOX_EN)
613
+
614
+ acc_lex_es = gr.Accordion("ℹ️ Léxico — explicación y vista previa (ES)", open=False, visible=True)
615
+ with acc_lex_es:
616
+ gr.Markdown(LEXICON_FRIENDLY_ES)
617
+ n_rows_es = gr.Slider(5, 100, value=20, step=5, label="Filas a mostrar")
618
+ table_es = gr.Dataframe(headers=["lemma_es","lemma_en","minimax","komin"], row_count=1, interactive=False)
619
+ gr.Button("Actualizar vista").click(lambda n: master_preview(int(n)), [n_rows_es], [table_es])
620
+
621
+ acc_lex_en = gr.Accordion("ℹ️ Lexicon — explainer & preview (EN)", open=False, visible=False)
622
+ with acc_lex_en:
623
+ gr.Markdown(LEXICON_FRIENDLY_EN)
624
+ n_rows_en = gr.Slider(5, 100, value=20, step=5, label="Rows to show")
625
+ table_en = gr.Dataframe(headers=["lemma_es","lemma_en","minimax","komin"], row_count=1, interactive=False)
626
+ gr.Button("Refresh").click(lambda n: master_preview(int(n)), [n_rows_en], [table_en])
627
+
628
+ # “Modos” como CHECKBOX (mutuamente excluyentes)
629
+ gr.Markdown("### 🧭 Modo de uso (marca **uno**)")
630
+ row_modes = gr.Row()
631
+ with row_modes:
632
+ cb_tr = gr.Checkbox(True, label="🔁 Traducir / Translate")
633
+ cb_bu = gr.Checkbox(False, label="🛠️ Construir (ES/EN → Conlang) / Build")
634
+ cb_de = gr.Checkbox(False, label="🗝️ Decodificar (Conlang → ES/EN) / Decode")
635
+ cb_rt = gr.Checkbox(False, label="🔄 Prueba ida→vuelta / Round-trip")
636
+
637
+ # Paneles por modo y por idioma
638
+ gr.Markdown("### 🧪 Área de trabajo")
639
+ panel_tr_es = make_panel_translate("ES"); panel_bu_es = make_panel_build("ES")
640
+ panel_de_es = make_panel_decode("ES"); panel_rt_es = make_panel_roundtrip("ES")
641
+ panel_tr_en = make_panel_translate("EN"); panel_bu_en = make_panel_build("EN")
642
+ panel_de_en = make_panel_decode("EN"); panel_rt_en = make_panel_roundtrip("EN")
643
+
644
+ def _vis(yes): return gr.update(visible=bool(yes))
645
+
646
+ # Enforce: sólo 1 checkbox activo + visibilidad de paneles/accordions por idioma y modo
647
+ def switch_everything(lang_code, tr, bu, de, rt):
648
+ # forzar exclusividad (si varias marcadas, prioriza la primera en orden tr>bu>de>rt)
649
+ tr2, bu2, de2, rt2 = False, False, False, False
650
+ if tr or (not bu and not de and not rt): tr2 = True
651
+ elif bu: bu2 = True
652
+ elif de: de2 = True
653
+ else: rt2 = True
654
+
655
+ is_en = (lang_code == "EN")
656
+ # Accordions ES/EN
657
+ vis_es = not is_en; vis_en = is_en
658
+ updates = [
659
+ _vis(vis_es), _vis(vis_en), # intro accordions
660
+ _vis(vis_es), _vis(vis_en), # lexicon accordions
661
+ ]
662
+ # Panels ES
663
+ updates += [
664
+ _vis(vis_es and tr2), _vis(vis_es and bu2), _vis(vis_es and de2), _vis(vis_es and rt2)
665
+ ]
666
+ # Panels EN
667
+ updates += [
668
+ _vis(vis_en and tr2), _vis(vis_en and bu2), _vis(vis_en and de2), _vis(vis_en and rt2)
669
+ ]
670
+ # Checkbox state (exclusivo)
671
+ updates += [tr2, bu2, de2, rt2]
672
+ return updates
673
+
674
+ lang.change(
675
+ switch_everything,
676
+ [lang, cb_tr, cb_bu, cb_de, cb_rt],
677
+ [
678
+ acc_intro_es, acc_intro_en, acc_lex_es, acc_lex_en,
679
+ panel_tr_es, panel_bu_es, panel_de_es, panel_rt_es,
680
+ panel_tr_en, panel_bu_en, panel_de_en, panel_rt_en,
681
+ cb_tr, cb_bu, cb_de, cb_rt
682
+ ],
683
+ )
684
+
685
+ for box in (cb_tr, cb_bu, cb_de, cb_rt):
686
+ box.change(
687
+ switch_everything,
688
+ [lang, cb_tr, cb_bu, cb_de, cb_rt],
689
+ [
690
+ acc_intro_es, acc_intro_en, acc_lex_es, acc_lex_en,
691
+ panel_tr_es, panel_bu_es, panel_de_es, panel_rt_es,
692
+ panel_tr_en, panel_bu_en, panel_de_en, panel_rt_en,
693
+ cb_tr, cb_bu, cb_de, cb_rt
694
+ ],
695
+ )
696
 
697
  if __name__ == "__main__":
698
  demo.launch()
 
703
 
704
 
705
 
706
+