Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -24,7 +24,7 @@ models = ["Helsinki-NLP", "QUICKMT", "Argos", "Google", "HPLT", "HPLT-OPUS",
|
|
| 24 |
"Helsinki-NLP/opus-mt-tc-bible-big-roa-deu_eng_fra_por_spa", "Helsinki-NLP/opus-mt-tc-bible-big-deu_eng_fra_por_spa-roa", "Helsinki-NLP/opus-mt-tc-bible-big-roa-en",
|
| 25 |
"facebook/nllb-200-distilled-600M", "facebook/nllb-200-distilled-1.3B", "facebook/nllb-200-1.3B", "facebook/nllb-200-3.3B",
|
| 26 |
"facebook/mbart-large-50-many-to-many-mmt", "facebook/mbart-large-50-one-to-many-mmt", "facebook/mbart-large-50-many-to-one-mmt",
|
| 27 |
-
"facebook/m2m100_418M", "facebook/m2m100_1.2B", "Lego-MT/Lego-MT",
|
| 28 |
"bigscience/mt0-small", "bigscience/mt0-base", "bigscience/mt0-large", "bigscience/mt0-xl",
|
| 29 |
"bigscience/bloomz-560m", "bigscience/bloomz-1b1", "bigscience/bloomz-1b7", "bigscience/bloomz-3b",
|
| 30 |
"google-t5/t5-small", "google-t5/t5-base", "google-t5/t5-large",
|
|
@@ -102,6 +102,16 @@ class Translators:
|
|
| 102 |
outputs = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
|
| 103 |
outputs = ''.join(outputs) if isinstance(outputs, list) else outputs
|
| 104 |
return outputs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
def hplt(self, opus = False):
|
| 107 |
# langs = ['ar', 'bs', 'ca', 'en', 'et', 'eu', 'fi', 'ga', 'gl', 'hi', 'hr', 'is', 'mt', 'nn', 'sq', 'sw', 'zh_hant']
|
|
@@ -109,6 +119,9 @@ class Translators:
|
|
| 109 |
'en-ga', 'en-gl', 'en-hi', 'en-hr', 'en-is', 'en-mt', 'en-nn', 'en-sq', 'en-sw',
|
| 110 |
'en-zh_hant', 'et-en', 'eu-en', 'fi-en', 'ga-en', 'gl-en', 'hi-en', 'hr-en',
|
| 111 |
'is-en', 'mt-en', 'nn-en', 'sq-en', 'sw-en', 'zh_hant-en']
|
|
|
|
|
|
|
|
|
|
| 112 |
if opus:
|
| 113 |
hplt_model = f'HPLT/translate-{self.sl}-{self.tl}-v1.0-hplt_opus' # HPLT/translate-en-hr-v1.0-hplt_opus
|
| 114 |
else:
|
|
@@ -534,6 +547,9 @@ def translate_text(input_text: str, s_language: str, t_language: str, model_name
|
|
| 534 |
|
| 535 |
elif "m2m" in model_name.lower():
|
| 536 |
translated_text = Translators(model_name, sl, tl, input_text).mtom()
|
|
|
|
|
|
|
|
|
|
| 537 |
|
| 538 |
elif "lego" in model_name.lower():
|
| 539 |
translated_text = Translators(model_name, sl, tl, input_text).LegoMT()
|
|
|
|
| 24 |
"Helsinki-NLP/opus-mt-tc-bible-big-roa-deu_eng_fra_por_spa", "Helsinki-NLP/opus-mt-tc-bible-big-deu_eng_fra_por_spa-roa", "Helsinki-NLP/opus-mt-tc-bible-big-roa-en",
|
| 25 |
"facebook/nllb-200-distilled-600M", "facebook/nllb-200-distilled-1.3B", "facebook/nllb-200-1.3B", "facebook/nllb-200-3.3B",
|
| 26 |
"facebook/mbart-large-50-many-to-many-mmt", "facebook/mbart-large-50-one-to-many-mmt", "facebook/mbart-large-50-many-to-one-mmt",
|
| 27 |
+
"facebook/m2m100_418M", "facebook/m2m100_1.2B", "alirezamsh/small100", "Lego-MT/Lego-MT",
|
| 28 |
"bigscience/mt0-small", "bigscience/mt0-base", "bigscience/mt0-large", "bigscience/mt0-xl",
|
| 29 |
"bigscience/bloomz-560m", "bigscience/bloomz-1b1", "bigscience/bloomz-1b7", "bigscience/bloomz-3b",
|
| 30 |
"google-t5/t5-small", "google-t5/t5-base", "google-t5/t5-large",
|
|
|
|
| 102 |
outputs = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
|
| 103 |
outputs = ''.join(outputs) if isinstance(outputs, list) else outputs
|
| 104 |
return outputs
|
| 105 |
+
|
| 106 |
+
def smallonehundred(self):
|
| 107 |
+
from transformers import M2M100ForConditionalGeneration
|
| 108 |
+
from tokenization_small100 import SMALL100Tokenizer
|
| 109 |
+
model = M2M100ForConditionalGeneration.from_pretrained(self.model_name)
|
| 110 |
+
tokenizer = SMALL100Tokenizer.from_pretrained(self.model_name)
|
| 111 |
+
tokenizer.tgt_lang = self.tl
|
| 112 |
+
encoded_sl = tokenizer(self.input_text, return_tensors="pt")
|
| 113 |
+
generated_tokens = model.generate(**encoded_sl, max_length=256, num_beams=5)
|
| 114 |
+
return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)0]
|
| 115 |
|
| 116 |
def hplt(self, opus = False):
|
| 117 |
# langs = ['ar', 'bs', 'ca', 'en', 'et', 'eu', 'fi', 'ga', 'gl', 'hi', 'hr', 'is', 'mt', 'nn', 'sq', 'sw', 'zh_hant']
|
|
|
|
| 119 |
'en-ga', 'en-gl', 'en-hi', 'en-hr', 'en-is', 'en-mt', 'en-nn', 'en-sq', 'en-sw',
|
| 120 |
'en-zh_hant', 'et-en', 'eu-en', 'fi-en', 'ga-en', 'gl-en', 'hi-en', 'hr-en',
|
| 121 |
'is-en', 'mt-en', 'nn-en', 'sq-en', 'sw-en', 'zh_hant-en']
|
| 122 |
+
lang_map = {"zh": "zh_hant"}
|
| 123 |
+
self.sl = lang_map.get(self.sl, self.sl)
|
| 124 |
+
self.tl = lang_map.get(self.tl, self.tl)
|
| 125 |
if opus:
|
| 126 |
hplt_model = f'HPLT/translate-{self.sl}-{self.tl}-v1.0-hplt_opus' # HPLT/translate-en-hr-v1.0-hplt_opus
|
| 127 |
else:
|
|
|
|
| 547 |
|
| 548 |
elif "m2m" in model_name.lower():
|
| 549 |
translated_text = Translators(model_name, sl, tl, input_text).mtom()
|
| 550 |
+
|
| 551 |
+
elif "small100" in model_name.lower():
|
| 552 |
+
translated_text = Translators(model_name, sl, tl, input_text).smallonehundred()
|
| 553 |
|
| 554 |
elif "lego" in model_name.lower():
|
| 555 |
translated_text = Translators(model_name, sl, tl, input_text).LegoMT()
|