Update app.py
Browse files
app.py
CHANGED
|
@@ -237,7 +237,7 @@ def _person_of_doc(doc, src_lang: str) -> Optional[str]:
|
|
| 237 |
root = next((t for t in doc if t.dep_=="ROOT"), doc[0])
|
| 238 |
subj = next((t for t in root.children if t.dep_.startswith("nsubj")), None)
|
| 239 |
if subj is None: return None
|
| 240 |
-
plur = ("Number=
|
| 241 |
low = subj.lower_
|
| 242 |
if src_lang=="Espa帽ol":
|
| 243 |
if low in ("yo",): return "1p" if plur else "1s"
|
|
@@ -294,7 +294,7 @@ def realize_minimax(doc, src_lang: str, drop_articles=True, zero_copula=True, se
|
|
| 294 |
if zero_copula and not semi_lossless and vlem in ("ser","estar","be") and tense=="Pres" and not is_neg and not is_q:
|
| 295 |
parts = S + O + ADV
|
| 296 |
else:
|
| 297 |
-
parts = [vcode] +
|
| 298 |
return " ".join(p for p in parts if p)
|
| 299 |
|
| 300 |
def realize_komin(doc, src_lang: str, drop_articles=True, zero_copula=True, semi_lossless=False, person_hint="2s"):
|
|
@@ -515,15 +515,17 @@ def decode_simple(text: str, source: str, tgt_lang: str) -> str:
|
|
| 515 |
w = pluralize(lem, tgt_lang) if pl_flags[idx] else lem
|
| 516 |
if w.lower() in {"hola", "hello", "hi", "hey"}:
|
| 517 |
greeting = w
|
| 518 |
-
elif w.lower() in {"como", "
|
| 519 |
-
wh = w
|
|
|
|
|
|
|
| 520 |
else:
|
| 521 |
out_parts.append(w)
|
| 522 |
|
| 523 |
# Reorden: Greeting + wh + S V O ADV
|
| 524 |
final_out = []
|
| 525 |
if greeting:
|
| 526 |
-
final_out.append(greeting)
|
| 527 |
if wh:
|
| 528 |
final_out.append(wh)
|
| 529 |
final_out += out_parts
|
|
@@ -687,7 +689,9 @@ def universal_translate(text: str, src: str, tgt: str,
|
|
| 687 |
code = ES2MINI.get(norm_es(w)) if tgt=="Minimax-ASCII" else ES2KOMI.get(norm_es(w))
|
| 688 |
if not code:
|
| 689 |
code = enc_oov_minimax(w) if tgt=="Minimax-ASCII" else enc_oov_komin(w)
|
| 690 |
-
|
|
|
|
|
|
|
| 691 |
else:
|
| 692 |
out.append(w)
|
| 693 |
return " ".join(out)
|
|
|
|
| 237 |
root = next((t for t in doc if t.dep_=="ROOT"), doc[0])
|
| 238 |
subj = next((t for t in root.children if t.dep_.startswith("nsubj")), None)
|
| 239 |
if subj is None: return None
|
| 240 |
+
plur = ("Number=Sing" in str(subj.morph)) if src_lang=="Espa帽ol" else (subj.tag_ in ("NNS","NNPS"))
|
| 241 |
low = subj.lower_
|
| 242 |
if src_lang=="Espa帽ol":
|
| 243 |
if low in ("yo",): return "1p" if plur else "1s"
|
|
|
|
| 294 |
if zero_copula and not semi_lossless and vlem in ("ser","estar","be") and tense=="Pres" and not is_neg and not is_q:
|
| 295 |
parts = S + O + ADV
|
| 296 |
else:
|
| 297 |
+
parts = S + [vcode] + O + ADV
|
| 298 |
return " ".join(p for p in parts if p)
|
| 299 |
|
| 300 |
def realize_komin(doc, src_lang: str, drop_articles=True, zero_copula=True, semi_lossless=False, person_hint="2s"):
|
|
|
|
| 515 |
w = pluralize(lem, tgt_lang) if pl_flags[idx] else lem
|
| 516 |
if w.lower() in {"hola", "hello", "hi", "hey"}:
|
| 517 |
greeting = w
|
| 518 |
+
elif w.lower() in {"como", "what", "how"} and has_q:
|
| 519 |
+
wh = w + " "
|
| 520 |
+
if tgt_lang == "Espa帽ol" and w.lower() == "como":
|
| 521 |
+
wh = "c贸mo "
|
| 522 |
else:
|
| 523 |
out_parts.append(w)
|
| 524 |
|
| 525 |
# Reorden: Greeting + wh + S V O ADV
|
| 526 |
final_out = []
|
| 527 |
if greeting:
|
| 528 |
+
final_out.append(greeting.capitalize() + ",")
|
| 529 |
if wh:
|
| 530 |
final_out.append(wh)
|
| 531 |
final_out += out_parts
|
|
|
|
| 689 |
code = ES2MINI.get(norm_es(w)) if tgt=="Minimax-ASCII" else ES2KOMI.get(norm_es(w))
|
| 690 |
if not code:
|
| 691 |
code = enc_oov_minimax(w) if tgt=="Minimax-ASCII" else enc_oov_komin(w)
|
| 692 |
+
out.append(code)
|
| 693 |
+
else:
|
| 694 |
+
out.append(code)
|
| 695 |
else:
|
| 696 |
out.append(w)
|
| 697 |
return " ".join(out)
|