Spaces:
Running
Running
Update src/Translate.py
Browse files- src/Translate.py +8 -8
src/Translate.py
CHANGED
|
@@ -66,28 +66,28 @@ class Translators:
|
|
| 66 |
model_name = "facebook/mbart-large-cc25"
|
| 67 |
|
| 68 |
# load tokenizer and model
|
| 69 |
-
|
| 70 |
-
|
| 71 |
|
| 72 |
# tell tokenizer the source language
|
| 73 |
-
|
| 74 |
-
|
| 75 |
# set the target language as the model's forced BOS token so pipeline will use it implicitly
|
| 76 |
-
|
| 77 |
|
| 78 |
# find the id for the target language and force it at generation
|
| 79 |
# forced_bos_token_id = tokenizer.lang_code_to_id["ro_RO"]
|
| 80 |
|
| 81 |
# create the pipeline (pass tokenizer and model explicitly)
|
| 82 |
# export langs=ar_AR,cs_CZ,de_DE,en_XX,es_XX,et_EE,fi_FI,fr_XX,gu_IN,hi_IN,it_IT,ja_XX,kk_KZ,ko_KR,lt_LT,lv_LV,my_MM,ne_NP,nl_XX,ro_RO,ru_RU,si_LK,tr_TR,vi_VN,zh_CN
|
| 83 |
-
|
| 84 |
-
pipe = pipeline("translation_en_to_de", model="facebook/mbart-large-cc25")
|
| 85 |
# "translation" task was used, instead of "translation_XX_to_YY", defaulting to "translation_en_to_ro"
|
| 86 |
|
| 87 |
# call the pipeline; generation kwargs are forwarded to model.generate
|
| 88 |
# src_lang (str, optional) — The language of the input.
|
| 89 |
# tgt_lang (str, optional) — The language of the desired output. Might be required for multilingual models. Will not have any effect for single pair translation models
|
| 90 |
-
src_text = "Check general exterior
|
| 91 |
result = pipe(
|
| 92 |
src_text,
|
| 93 |
num_beams=4,
|
|
|
|
| 66 |
model_name = "facebook/mbart-large-cc25"
|
| 67 |
|
| 68 |
# load tokenizer and model
|
| 69 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 70 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
| 71 |
|
| 72 |
# tell tokenizer the source language
|
| 73 |
+
tokenizer.src_lang = "en_XX"
|
| 74 |
+
tokenizer.tgt_lang = "ro_RO"
|
| 75 |
# set the target language as the model's forced BOS token so pipeline will use it implicitly
|
| 76 |
+
model.config.forced_bos_token_id = tokenizer.lang_code_to_id["ro_RO"]
|
| 77 |
|
| 78 |
# find the id for the target language and force it at generation
|
| 79 |
# forced_bos_token_id = tokenizer.lang_code_to_id["ro_RO"]
|
| 80 |
|
| 81 |
# create the pipeline (pass tokenizer and model explicitly)
|
| 82 |
# export langs=ar_AR,cs_CZ,de_DE,en_XX,es_XX,et_EE,fi_FI,fr_XX,gu_IN,hi_IN,it_IT,ja_XX,kk_KZ,ko_KR,lt_LT,lv_LV,my_MM,ne_NP,nl_XX,ro_RO,ru_RU,si_LK,tr_TR,vi_VN,zh_CN
|
| 83 |
+
pipe = pipeline("translation", model=model, tokenizer=tokenizer, src_lang="en_XX", tgt_lang="ro_RO")
|
| 84 |
+
# pipe = pipeline("translation_en_to_de", model="facebook/mbart-large-cc25")
|
| 85 |
# "translation" task was used, instead of "translation_XX_to_YY", defaulting to "translation_en_to_ro"
|
| 86 |
|
| 87 |
# call the pipeline; generation kwargs are forwarded to model.generate
|
| 88 |
# src_lang (str, optional) — The language of the input.
|
| 89 |
# tgt_lang (str, optional) — The language of the desired output. Might be required for multilingual models. Will not have any effect for single pair translation models
|
| 90 |
+
src_text = ["Check general exterior conditions"]
|
| 91 |
result = pipe(
|
| 92 |
src_text,
|
| 93 |
num_beams=4,
|