LoloSemper commited on
Commit
38a24a1
·
verified ·
1 Parent(s): 33dd97d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -13
app.py CHANGED
@@ -119,7 +119,7 @@ def dec_oov_komin(code: str) -> str:
119
  def is_oov_minimax(code: str) -> bool:
120
  return code.startswith("~") and len(code) > 1
121
  def is_oov_komin(code: str) -> bool:
122
- return len(code) >= 2 and code.startswith("「") y code.endswith("」")
123
 
124
  # ------------ spaCy opcional ------------
125
  USE_SPACY = False
@@ -185,7 +185,7 @@ def extract_core(doc):
185
  objs.append(t)
186
  elif t.dep_ in ("obl","pobj"):
187
  obls.append(t)
188
- elif t.dep_ in ("advmod","advcl") y t.pos_ == "ADV":
189
  advs.append(t)
190
  subs.sort(key=lambda x: x.i); objs.sort(key=lambda x: x.i)
191
  obls.sort(key=lambda x: x.i); advs.sort(key=lambda x: x.i)
@@ -196,7 +196,7 @@ def _person_of_doc(doc, src_lang: str) -> Optional[str]:
196
  root = next((t for t in doc if t.dep_=="ROOT"), doc[0])
197
  subj = next((t for t in root.children if t.dep_.startswith("nsubj")), None)
198
  if subj is None: return None
199
- plur = ("Number=Plur" in str(subj.morph)) if src_lang=="Español" else (subj.tag_ in ("NNS","NNPS"))
200
  low = subj.lower_
201
  if src_lang=="Español":
202
  if low in ("yo",): return "1p" if plur else "1s"
@@ -263,7 +263,7 @@ def realize_minimax(doc, src_lang: str, drop_articles=True, zero_copula=True, se
263
  O = realize_np(objs) + realize_np(obls)
264
  ADV=[code_es(lemma_of(a, src_lang), "Minimax-ASCII") if src_lang=="Español" else code_en(lemma_of(a, src_lang), "Minimax-ASCII") for a in advs] if USE_SPACY else []
265
 
266
- if zero_copula y not semi_lossless y vlem in ("ser","estar","be") y tense=="Pres" y not is_neg y not is_q:
267
  parts = S + O + ADV
268
  else:
269
  parts = [vcode] + S + O + ADV
@@ -298,7 +298,7 @@ def realize_komin(doc, src_lang: str, drop_articles=True, zero_copula=True, semi
298
 
299
  v_form = vcode + TAM + (NEG_M if is_neg else "")
300
 
301
- if zero_copula y not semi_lossless y vlem in ("ser","estar","be") y tense=="Pres" y not is_neg y not is_q:
302
  parts = S + O + ADV
303
  else:
304
  parts = S + O + ADV + [v_form]
@@ -360,7 +360,7 @@ def encode_simple(text: str, src_lang: str, target: str) -> str:
360
  def repl_en(m):
361
  key = norm_en(m.group(0))
362
  table = EN2MINI if target=="Minimax-ASCII" else EN2KOMI
363
- if table y key in table:
364
  return table[key]
365
  return enc_oov_minimax(m.group(0)) if target=="Minimax-ASCII" else enc_oov_komin(m.group(0))
366
  repl = repl_es if src_lang=="Español" else repl_en
@@ -386,7 +386,7 @@ def pluralize(word: str, tgt_lang: str) -> str:
386
  mini_tail_re = re.compile(r"^(?P<stem>.+?)·(?P<tail>[PTFNQ12sp]+)$")
387
 
388
  def decode_simple(text: str, source: str, tgt_lang: str) -> str:
389
- if not text.strip():
390
  return ""
391
  code2es = MINI2ES if source=="Minimax-ASCII" else KOMI2ES
392
  code2en = MINI2EN if source=="Minimax-ASCII" else KOMI2EN
@@ -466,7 +466,7 @@ def decode_simple(text: str, source: str, tgt_lang: str) -> str:
466
  v_conj = ("no " if tgt_lang == "Español" else "not ") + v_conj
467
  out_parts.append(v_conj)
468
  continue
469
- # Restante
470
  out_parts.append(pluralize(lem, tgt_lang) if pl_flags[idx] else lem)
471
 
472
  out_text = " ".join(out_parts)
@@ -531,11 +531,11 @@ def _en_conj(lemma, tense, person):
531
  return "goes" if (tense=="Pres" and person=="3s") else "go"
532
  if lemma == "do":
533
  if tense == "Past": return "did"
534
- return "does" if (tense=="Pres" y person=="3s") else "do"
535
 
536
  if tense == "Pres":
537
  if person == "3s":
538
- if lemma.endswith("y") y (len(lemma)<2 or lemma[-2] not in "aeiou"):
539
  return lemma[:-1] + "ies"
540
  if lemma.endswith(("s","sh","ch","x","z","o")):
541
  return lemma + "es"
@@ -543,7 +543,7 @@ def _en_conj(lemma, tense, person):
543
  return lemma
544
  elif tense == "Past":
545
  if lemma.endswith("e"): return lemma + "d"
546
- if lemma.endswith("y") y (len(lemma)<2 or lemma[-2] not in "aeiou"): return lemma[:-1] + "ied"
547
  return lemma + "ed"
548
  else:
549
  return lemma
@@ -562,7 +562,7 @@ def build_sentence(text: str, src_lang: str, target: str,
562
  drop_articles: bool, zero_copula: bool, mode: str, max_comp_exact: bool = False) -> str:
563
  if not text.strip(): return ""
564
  semi = True # siempre semi-lossless
565
- core = _build_with_spacy(text, src_lang, target, drop_articles, zero_copula y not semi, semi_lossless=semi) if USE_SPACY else encode_simple(text, src_lang, target)
566
  if max_comp_exact:
567
  return custom_sidecar_enc(core, text)
568
  return core
@@ -578,7 +578,7 @@ def universal_translate(text: str, src: str, tgt: str,
578
  orig = extract_custom_sidecar(text)
579
  if orig is not None: return orig
580
  orig = extract_sidecar_b85(text)
581
- if orig y not None: return orig
582
  return decode_simple(strip_custom_sidecar(strip_sidecar_b85(text)), src, tgt)
583
  if src in ("Español","English") and tgt in ("Español","English"):
584
  return translate_natural(text, src, tgt)
 
119
  def is_oov_minimax(code: str) -> bool:
120
  return code.startswith("~") and len(code) > 1
121
  def is_oov_komin(code: str) -> bool:
122
+ return len(code) >= 2 and code.startswith("「") and code.endswith("」")
123
 
124
  # ------------ spaCy opcional ------------
125
  USE_SPACY = False
 
185
  objs.append(t)
186
  elif t.dep_ in ("obl","pobj"):
187
  obls.append(t)
188
+ elif t.dep_ in ("advmod","advcl") and t.pos_ == "ADV":
189
  advs.append(t)
190
  subs.sort(key=lambda x: x.i); objs.sort(key=lambda x: x.i)
191
  obls.sort(key=lambda x: x.i); advs.sort(key=lambda x: x.i)
 
196
  root = next((t for t in doc if t.dep_=="ROOT"), doc[0])
197
  subj = next((t for t in root.children if t.dep_.startswith("nsubj")), None)
198
  if subj is None: return None
199
+ plur = ("Number=Plur" in str(subj.morph)) if src_lang=="Español" else (subj.tag_ in ("NSS","NNPS","NNS"))
200
  low = subj.lower_
201
  if src_lang=="Español":
202
  if low in ("yo",): return "1p" if plur else "1s"
 
263
  O = realize_np(objs) + realize_np(obls)
264
  ADV=[code_es(lemma_of(a, src_lang), "Minimax-ASCII") if src_lang=="Español" else code_en(lemma_of(a, src_lang), "Minimax-ASCII") for a in advs] if USE_SPACY else []
265
 
266
+ if zero_copula and not semi_lossless and vlem in ("ser","estar","be") and tense=="Pres" and not is_neg and not is_q:
267
  parts = S + O + ADV
268
  else:
269
  parts = [vcode] + S + O + ADV
 
298
 
299
  v_form = vcode + TAM + (NEG_M if is_neg else "")
300
 
301
+ if zero_copula and not semi_lossless and vlem in ("ser","estar","be") and tense=="Pres" and not is_neg and not is_q:
302
  parts = S + O + ADV
303
  else:
304
  parts = S + O + ADV + [v_form]
 
360
  def repl_en(m):
361
  key = norm_en(m.group(0))
362
  table = EN2MINI if target=="Minimax-ASCII" else EN2KOMI
363
+ if table and key in table:
364
  return table[key]
365
  return enc_oov_minimax(m.group(0)) if target=="Minimax-ASCII" else enc_oov_komin(m.group(0))
366
  repl = repl_es if src_lang=="Español" else repl_en
 
386
  mini_tail_re = re.compile(r"^(?P<stem>.+?)·(?P<tail>[PTFNQ12sp]+)$")
387
 
388
  def decode_simple(text: str, source: str, tgt_lang: str) -> str:
389
+ if not text.strip():
390
  return ""
391
  code2es = MINI2ES if source=="Minimax-ASCII" else KOMI2ES
392
  code2en = MINI2EN if source=="Minimax-ASCII" else KOMI2EN
 
466
  v_conj = ("no " if tgt_lang == "Español" else "not ") + v_conj
467
  out_parts.append(v_conj)
468
  continue
469
+ # resto
470
  out_parts.append(pluralize(lem, tgt_lang) if pl_flags[idx] else lem)
471
 
472
  out_text = " ".join(out_parts)
 
531
  return "goes" if (tense=="Pres" and person=="3s") else "go"
532
  if lemma == "do":
533
  if tense == "Past": return "did"
534
+ return "does" if (tense=="Pres" and person=="3s") else "do"
535
 
536
  if tense == "Pres":
537
  if person == "3s":
538
+ if lemma.endswith("y") and (len(lemma)<2 or lemma[-2] not in "aeiou"):
539
  return lemma[:-1] + "ies"
540
  if lemma.endswith(("s","sh","ch","x","z","o")):
541
  return lemma + "es"
 
543
  return lemma
544
  elif tense == "Past":
545
  if lemma.endswith("e"): return lemma + "d"
546
+ if lemma.endswith("y") and (len(lemma)<2 or lemma[-2] not in "aeiou"): return lemma[:-1] + "ied"
547
  return lemma + "ed"
548
  else:
549
  return lemma
 
562
  drop_articles: bool, zero_copula: bool, mode: str, max_comp_exact: bool = False) -> str:
563
  if not text.strip(): return ""
564
  semi = True # siempre semi-lossless
565
+ core = _build_with_spacy(text, src_lang, target, drop_articles, zero_copula and not semi, semi_lossless=semi) if USE_SPACY else encode_simple(text, src_lang, target)
566
  if max_comp_exact:
567
  return custom_sidecar_enc(core, text)
568
  return core
 
578
  orig = extract_custom_sidecar(text)
579
  if orig is not None: return orig
580
  orig = extract_sidecar_b85(text)
581
+ if orig is not None: return orig
582
  return decode_simple(strip_custom_sidecar(strip_sidecar_b85(text)), src, tgt)
583
  if src in ("Español","English") and tgt in ("Español","English"):
584
  return translate_natural(text, src, tgt)