TiberiuCristianLeon commited on
Commit
631e80d
·
verified ·
1 Parent(s): 09be962

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -1
app.py CHANGED
@@ -24,7 +24,7 @@ models = ["Helsinki-NLP", "QUICKMT", "Argos", "Google", "HPLT", "HPLT-OPUS",
24
  "Helsinki-NLP/opus-mt-tc-bible-big-roa-deu_eng_fra_por_spa", "Helsinki-NLP/opus-mt-tc-bible-big-deu_eng_fra_por_spa-roa", "Helsinki-NLP/opus-mt-tc-bible-big-roa-en",
25
  "facebook/nllb-200-distilled-600M", "facebook/nllb-200-distilled-1.3B", "facebook/nllb-200-1.3B", "facebook/nllb-200-3.3B",
26
  "facebook/mbart-large-50-many-to-many-mmt", "facebook/mbart-large-50-one-to-many-mmt", "facebook/mbart-large-50-many-to-one-mmt",
27
- "facebook/m2m100_418M", "facebook/m2m100_1.2B", "Lego-MT/Lego-MT",
28
  "bigscience/mt0-small", "bigscience/mt0-base", "bigscience/mt0-large", "bigscience/mt0-xl",
29
  "bigscience/bloomz-560m", "bigscience/bloomz-1b1", "bigscience/bloomz-1b7", "bigscience/bloomz-3b",
30
  "google-t5/t5-small", "google-t5/t5-base", "google-t5/t5-large",
@@ -102,6 +102,16 @@ class Translators:
102
  outputs = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
103
  outputs = ''.join(outputs) if isinstance(outputs, list) else outputs
104
  return outputs
 
 
 
 
 
 
 
 
 
 
105
 
106
  def hplt(self, opus = False):
107
  # langs = ['ar', 'bs', 'ca', 'en', 'et', 'eu', 'fi', 'ga', 'gl', 'hi', 'hr', 'is', 'mt', 'nn', 'sq', 'sw', 'zh_hant']
@@ -109,6 +119,9 @@ class Translators:
109
  'en-ga', 'en-gl', 'en-hi', 'en-hr', 'en-is', 'en-mt', 'en-nn', 'en-sq', 'en-sw',
110
  'en-zh_hant', 'et-en', 'eu-en', 'fi-en', 'ga-en', 'gl-en', 'hi-en', 'hr-en',
111
  'is-en', 'mt-en', 'nn-en', 'sq-en', 'sw-en', 'zh_hant-en']
 
 
 
112
  if opus:
113
  hplt_model = f'HPLT/translate-{self.sl}-{self.tl}-v1.0-hplt_opus' # HPLT/translate-en-hr-v1.0-hplt_opus
114
  else:
@@ -534,6 +547,9 @@ def translate_text(input_text: str, s_language: str, t_language: str, model_name
534
 
535
  elif "m2m" in model_name.lower():
536
  translated_text = Translators(model_name, sl, tl, input_text).mtom()
 
 
 
537
 
538
  elif "lego" in model_name.lower():
539
  translated_text = Translators(model_name, sl, tl, input_text).LegoMT()
 
24
  "Helsinki-NLP/opus-mt-tc-bible-big-roa-deu_eng_fra_por_spa", "Helsinki-NLP/opus-mt-tc-bible-big-deu_eng_fra_por_spa-roa", "Helsinki-NLP/opus-mt-tc-bible-big-roa-en",
25
  "facebook/nllb-200-distilled-600M", "facebook/nllb-200-distilled-1.3B", "facebook/nllb-200-1.3B", "facebook/nllb-200-3.3B",
26
  "facebook/mbart-large-50-many-to-many-mmt", "facebook/mbart-large-50-one-to-many-mmt", "facebook/mbart-large-50-many-to-one-mmt",
27
+ "facebook/m2m100_418M", "facebook/m2m100_1.2B", "alirezamsh/small100", "Lego-MT/Lego-MT",
28
  "bigscience/mt0-small", "bigscience/mt0-base", "bigscience/mt0-large", "bigscience/mt0-xl",
29
  "bigscience/bloomz-560m", "bigscience/bloomz-1b1", "bigscience/bloomz-1b7", "bigscience/bloomz-3b",
30
  "google-t5/t5-small", "google-t5/t5-base", "google-t5/t5-large",
 
102
  outputs = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
103
  outputs = ''.join(outputs) if isinstance(outputs, list) else outputs
104
  return outputs
105
+
106
+ def smallonehundred(self):
107
+ from transformers import M2M100ForConditionalGeneration
108
+ from tokenization_small100 import SMALL100Tokenizer
109
+ model = M2M100ForConditionalGeneration.from_pretrained(self.model_name)
110
+ tokenizer = SMALL100Tokenizer.from_pretrained(self.model_name)
111
+ tokenizer.tgt_lang = self.tl
112
+ encoded_sl = tokenizer(self.input_text, return_tensors="pt")
113
+ generated_tokens = model.generate(**encoded_sl, max_length=256, num_beams=5)
114
+ return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)0]
115
 
116
  def hplt(self, opus = False):
117
  # langs = ['ar', 'bs', 'ca', 'en', 'et', 'eu', 'fi', 'ga', 'gl', 'hi', 'hr', 'is', 'mt', 'nn', 'sq', 'sw', 'zh_hant']
 
119
  'en-ga', 'en-gl', 'en-hi', 'en-hr', 'en-is', 'en-mt', 'en-nn', 'en-sq', 'en-sw',
120
  'en-zh_hant', 'et-en', 'eu-en', 'fi-en', 'ga-en', 'gl-en', 'hi-en', 'hr-en',
121
  'is-en', 'mt-en', 'nn-en', 'sq-en', 'sw-en', 'zh_hant-en']
122
+ lang_map = {"zh": "zh_hant"}
123
+ self.sl = lang_map.get(self.sl, self.sl)
124
+ self.tl = lang_map.get(self.tl, self.tl)
125
  if opus:
126
  hplt_model = f'HPLT/translate-{self.sl}-{self.tl}-v1.0-hplt_opus' # HPLT/translate-en-hr-v1.0-hplt_opus
127
  else:
 
547
 
548
  elif "m2m" in model_name.lower():
549
  translated_text = Translators(model_name, sl, tl, input_text).mtom()
550
+
551
+ elif "small100" in model_name.lower():
552
+ translated_text = Translators(model_name, sl, tl, input_text).smallonehundred()
553
 
554
  elif "lego" in model_name.lower():
555
  translated_text = Translators(model_name, sl, tl, input_text).LegoMT()