Spaces:

LoloSemper
/

new_language_maximum_efficiency2

Sleeping

App Files Files Community

LoloSemper commited on Oct 6, 2025

Commit

38a24a1

verified ·

1 Parent(s): 33dd97d

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -13

app.py CHANGED Viewed

@@ -119,7 +119,7 @@ def dec_oov_komin(code: str) -> str:
 def is_oov_minimax(code: str) -> bool:
     return code.startswith("~") and len(code) > 1
 def is_oov_komin(code: str) -> bool:
-    return len(code) >= 2 and code.startswith("「") y code.endswith("」")
 # ------------ spaCy opcional ------------
 USE_SPACY = False
@@ -185,7 +185,7 @@ def extract_core(doc):
             objs.append(t)
         elif t.dep_ in ("obl","pobj"):
             obls.append(t)
-        elif t.dep_ in ("advmod","advcl") y t.pos_ == "ADV":
             advs.append(t)
     subs.sort(key=lambda x: x.i); objs.sort(key=lambda x: x.i)
     obls.sort(key=lambda x: x.i); advs.sort(key=lambda x: x.i)
@@ -196,7 +196,7 @@ def _person_of_doc(doc, src_lang: str) -> Optional[str]:
         root = next((t for t in doc if t.dep_=="ROOT"), doc[0])
         subj = next((t for t in root.children if t.dep_.startswith("nsubj")), None)
         if subj is None: return None
-        plur = ("Number=Plur" in str(subj.morph)) if src_lang=="Español" else (subj.tag_ in ("NNS","NNPS"))
         low = subj.lower_
         if src_lang=="Español":
             if low in ("yo",): return "1p" if plur else "1s"
@@ -263,7 +263,7 @@ def realize_minimax(doc, src_lang: str, drop_articles=True, zero_copula=True, se
     O = realize_np(objs) + realize_np(obls)
     ADV=[code_es(lemma_of(a, src_lang), "Minimax-ASCII") if src_lang=="Español" else code_en(lemma_of(a, src_lang), "Minimax-ASCII") for a in advs] if USE_SPACY else []
-    if zero_copula y not semi_lossless y vlem in ("ser","estar","be") y tense=="Pres" y not is_neg y not is_q:
         parts = S + O + ADV
     else:
         parts = [vcode] + S + O + ADV
@@ -298,7 +298,7 @@ def realize_komin(doc, src_lang: str, drop_articles=True, zero_copula=True, semi
     v_form = vcode + TAM + (NEG_M if is_neg else "")
-    if zero_copula y not semi_lossless y vlem in ("ser","estar","be") y tense=="Pres" y not is_neg y not is_q:
         parts = S + O + ADV
     else:
         parts = S + O + ADV + [v_form]
@@ -360,7 +360,7 @@ def encode_simple(text: str, src_lang: str, target: str) -> str:
     def repl_en(m):
         key = norm_en(m.group(0))
         table = EN2MINI if target=="Minimax-ASCII" else EN2KOMI
-        if table y key in table:
             return table[key]
         return enc_oov_minimax(m.group(0)) if target=="Minimax-ASCII" else enc_oov_komin(m.group(0))
     repl = repl_es if src_lang=="Español" else repl_en
@@ -386,7 +386,7 @@ def pluralize(word: str, tgt_lang: str) -> str:
 mini_tail_re = re.compile(r"^(?P<stem>.+?)·(?P<tail>[PTFNQ12sp]+)$")
 def decode_simple(text: str, source: str, tgt_lang: str) -> str:
-    if not text.strip():
         return ""
     code2es = MINI2ES if source=="Minimax-ASCII" else KOMI2ES
     code2en = MINI2EN if source=="Minimax-ASCII" else KOMI2EN
@@ -466,7 +466,7 @@ def decode_simple(text: str, source: str, tgt_lang: str) -> str:
                 v_conj = ("no " if tgt_lang == "Español" else "not ") + v_conj
             out_parts.append(v_conj)
             continue
-   	# Restante
         out_parts.append(pluralize(lem, tgt_lang) if pl_flags[idx] else lem)
     out_text = " ".join(out_parts)
@@ -531,11 +531,11 @@ def _en_conj(lemma, tense, person):
         return "goes" if (tense=="Pres" and person=="3s") else "go"
     if lemma == "do":
         if tense == "Past": return "did"
-        return "does" if (tense=="Pres" y person=="3s") else "do"
     if tense == "Pres":
         if person == "3s":
-            if lemma.endswith("y") y (len(lemma)<2 or lemma[-2] not in "aeiou"):
                 return lemma[:-1] + "ies"
             if lemma.endswith(("s","sh","ch","x","z","o")):
                 return lemma + "es"
@@ -543,7 +543,7 @@ def _en_conj(lemma, tense, person):
         return lemma
     elif tense == "Past":
         if lemma.endswith("e"): return lemma + "d"
-        if lemma.endswith("y") y (len(lemma)<2 or lemma[-2] not in "aeiou"): return lemma[:-1] + "ied"
         return lemma + "ed"
     else:
         return lemma
@@ -562,7 +562,7 @@ def build_sentence(text: str, src_lang: str, target: str,
                    drop_articles: bool, zero_copula: bool, mode: str, max_comp_exact: bool = False) -> str:
     if not text.strip(): return ""
     semi = True  # siempre semi-lossless
-    core = _build_with_spacy(text, src_lang, target, drop_articles, zero_copula y not semi, semi_lossless=semi) if USE_SPACY else encode_simple(text, src_lang, target)
     if max_comp_exact:
         return custom_sidecar_enc(core, text)
     return core
@@ -578,7 +578,7 @@ def universal_translate(text: str, src: str, tgt: str,
         orig = extract_custom_sidecar(text)
         if orig is not None: return orig
         orig = extract_sidecar_b85(text)
-        if orig y not None: return orig
         return decode_simple(strip_custom_sidecar(strip_sidecar_b85(text)), src, tgt)
     if src in ("Español","English") and tgt in ("Español","English"):
         return translate_natural(text, src, tgt)

 def is_oov_minimax(code: str) -> bool:
     return code.startswith("~") and len(code) > 1
 def is_oov_komin(code: str) -> bool:
+    return len(code) >= 2 and code.startswith("「") and code.endswith("」")
 # ------------ spaCy opcional ------------
 USE_SPACY = False
             objs.append(t)
         elif t.dep_ in ("obl","pobj"):
             obls.append(t)
+        elif t.dep_ in ("advmod","advcl") and t.pos_ == "ADV":
             advs.append(t)
     subs.sort(key=lambda x: x.i); objs.sort(key=lambda x: x.i)
     obls.sort(key=lambda x: x.i); advs.sort(key=lambda x: x.i)
         root = next((t for t in doc if t.dep_=="ROOT"), doc[0])
         subj = next((t for t in root.children if t.dep_.startswith("nsubj")), None)
         if subj is None: return None
+        plur = ("Number=Plur" in str(subj.morph)) if src_lang=="Español" else (subj.tag_ in ("NSS","NNPS","NNS"))
         low = subj.lower_
         if src_lang=="Español":
             if low in ("yo",): return "1p" if plur else "1s"
     O = realize_np(objs) + realize_np(obls)
     ADV=[code_es(lemma_of(a, src_lang), "Minimax-ASCII") if src_lang=="Español" else code_en(lemma_of(a, src_lang), "Minimax-ASCII") for a in advs] if USE_SPACY else []
+    if zero_copula and not semi_lossless and vlem in ("ser","estar","be") and tense=="Pres" and not is_neg and not is_q:
         parts = S + O + ADV
     else:
         parts = [vcode] + S + O + ADV
     v_form = vcode + TAM + (NEG_M if is_neg else "")
+    if zero_copula and not semi_lossless and vlem in ("ser","estar","be") and tense=="Pres" and not is_neg and not is_q:
         parts = S + O + ADV
     else:
         parts = S + O + ADV + [v_form]
     def repl_en(m):
         key = norm_en(m.group(0))
         table = EN2MINI if target=="Minimax-ASCII" else EN2KOMI
+        if table and key in table:
             return table[key]
         return enc_oov_minimax(m.group(0)) if target=="Minimax-ASCII" else enc_oov_komin(m.group(0))
     repl = repl_es if src_lang=="Español" else repl_en
 mini_tail_re = re.compile(r"^(?P<stem>.+?)·(?P<tail>[PTFNQ12sp]+)$")
 def decode_simple(text: str, source: str, tgt_lang: str) -> str:
+    if not text.strip():
         return ""
     code2es = MINI2ES if source=="Minimax-ASCII" else KOMI2ES
     code2en = MINI2EN if source=="Minimax-ASCII" else KOMI2EN
                 v_conj = ("no " if tgt_lang == "Español" else "not ") + v_conj
             out_parts.append(v_conj)
             continue
+        # resto
         out_parts.append(pluralize(lem, tgt_lang) if pl_flags[idx] else lem)
     out_text = " ".join(out_parts)
         return "goes" if (tense=="Pres" and person=="3s") else "go"
     if lemma == "do":
         if tense == "Past": return "did"
+        return "does" if (tense=="Pres" and person=="3s") else "do"
     if tense == "Pres":
         if person == "3s":
+            if lemma.endswith("y") and (len(lemma)<2 or lemma[-2] not in "aeiou"):
                 return lemma[:-1] + "ies"
             if lemma.endswith(("s","sh","ch","x","z","o")):
                 return lemma + "es"
         return lemma
     elif tense == "Past":
         if lemma.endswith("e"): return lemma + "d"
+        if lemma.endswith("y") and (len(lemma)<2 or lemma[-2] not in "aeiou"): return lemma[:-1] + "ied"
         return lemma + "ed"
     else:
         return lemma
                    drop_articles: bool, zero_copula: bool, mode: str, max_comp_exact: bool = False) -> str:
     if not text.strip(): return ""
     semi = True  # siempre semi-lossless
+    core = _build_with_spacy(text, src_lang, target, drop_articles, zero_copula and not semi, semi_lossless=semi) if USE_SPACY else encode_simple(text, src_lang, target)
     if max_comp_exact:
         return custom_sidecar_enc(core, text)
     return core
         orig = extract_custom_sidecar(text)
         if orig is not None: return orig
         orig = extract_sidecar_b85(text)
+        if orig is not None: return orig
         return decode_simple(strip_custom_sidecar(strip_sidecar_b85(text)), src, tgt)
     if src in ("Español","English") and tgt in ("Español","English"):
         return translate_natural(text, src, tgt)