FastAPIMT

Running

App Files Files Community

TiberiuCristianLeon commited on Oct 28

Commit

2096aea

verified ·

1 Parent(s): 5b0f1e6

Update src/Translate.py

Browse files

Files changed (1) hide show

src/Translate.py +8 -8

src/Translate.py CHANGED Viewed

@@ -66,28 +66,28 @@ class Translators:
         model_name = "facebook/mbart-large-cc25"
         # load tokenizer and model
-        # tokenizer = AutoTokenizer.from_pretrained(model_name)
-        # model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
         # tell tokenizer the source language
-        # tokenizer.src_lang = "en_XX"
-        # tokenizer.tgt_lang = "ro_RO"
         # set the target language as the model's forced BOS token so pipeline will use it implicitly
-        # model.config.forced_bos_token_id = tokenizer.lang_code_to_id["ro_RO"]
         # find the id for the target language and force it at generation
         # forced_bos_token_id = tokenizer.lang_code_to_id["ro_RO"]
         # create the pipeline (pass tokenizer and model explicitly)
         # export langs=ar_AR,cs_CZ,de_DE,en_XX,es_XX,et_EE,fi_FI,fr_XX,gu_IN,hi_IN,it_IT,ja_XX,kk_KZ,ko_KR,lt_LT,lv_LV,my_MM,ne_NP,nl_XX,ro_RO,ru_RU,si_LK,tr_TR,vi_VN,zh_CN
-        # pipe = pipeline("translation", model=model_name, src_lang="en_XX", tgt_lang="ro_RO")
-        pipe = pipeline("translation_en_to_de", model="facebook/mbart-large-cc25")
         # "translation" task was used, instead of "translation_XX_to_YY", defaulting to "translation_en_to_ro"
         # call the pipeline; generation kwargs are forwarded to model.generate
         # src_lang (str, optional) — The language of the input.
         # tgt_lang (str, optional) — The language of the desired output. Might be required for multilingual models. Will not have any effect for single pair translation models
-        src_text = "Check general exterior condition"
         result = pipe(
             src_text,
             num_beams=4,

         model_name = "facebook/mbart-large-cc25"
         # load tokenizer and model
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
         # tell tokenizer the source language
+        tokenizer.src_lang = "en_XX"
+        tokenizer.tgt_lang = "ro_RO"
         # set the target language as the model's forced BOS token so pipeline will use it implicitly
+        model.config.forced_bos_token_id = tokenizer.lang_code_to_id["ro_RO"]
         # find the id for the target language and force it at generation
         # forced_bos_token_id = tokenizer.lang_code_to_id["ro_RO"]
         # create the pipeline (pass tokenizer and model explicitly)
         # export langs=ar_AR,cs_CZ,de_DE,en_XX,es_XX,et_EE,fi_FI,fr_XX,gu_IN,hi_IN,it_IT,ja_XX,kk_KZ,ko_KR,lt_LT,lv_LV,my_MM,ne_NP,nl_XX,ro_RO,ru_RU,si_LK,tr_TR,vi_VN,zh_CN
+        pipe = pipeline("translation", model=model, tokenizer=tokenizer, src_lang="en_XX", tgt_lang="ro_RO")
+        # pipe = pipeline("translation_en_to_de", model="facebook/mbart-large-cc25")
         # "translation" task was used, instead of "translation_XX_to_YY", defaulting to "translation_en_to_ro"
         # call the pipeline; generation kwargs are forwarded to model.generate
         # src_lang (str, optional) — The language of the input.
         # tgt_lang (str, optional) — The language of the desired output. Might be required for multilingual models. Will not have any effect for single pair translation models
+        src_text = ["Check general exterior conditions"]
         result = pipe(
             src_text,
             num_beams=4,