TiberiuCristianLeon commited on
Commit
2096aea
·
verified ·
1 Parent(s): 5b0f1e6

Update src/Translate.py

Browse files
Files changed (1) hide show
  1. src/Translate.py +8 -8
src/Translate.py CHANGED
@@ -66,28 +66,28 @@ class Translators:
66
  model_name = "facebook/mbart-large-cc25"
67
 
68
  # load tokenizer and model
69
- # tokenizer = AutoTokenizer.from_pretrained(model_name)
70
- # model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
71
 
72
  # tell tokenizer the source language
73
- # tokenizer.src_lang = "en_XX"
74
- # tokenizer.tgt_lang = "ro_RO"
75
  # set the target language as the model's forced BOS token so pipeline will use it implicitly
76
- # model.config.forced_bos_token_id = tokenizer.lang_code_to_id["ro_RO"]
77
 
78
  # find the id for the target language and force it at generation
79
  # forced_bos_token_id = tokenizer.lang_code_to_id["ro_RO"]
80
 
81
  # create the pipeline (pass tokenizer and model explicitly)
82
  # export langs=ar_AR,cs_CZ,de_DE,en_XX,es_XX,et_EE,fi_FI,fr_XX,gu_IN,hi_IN,it_IT,ja_XX,kk_KZ,ko_KR,lt_LT,lv_LV,my_MM,ne_NP,nl_XX,ro_RO,ru_RU,si_LK,tr_TR,vi_VN,zh_CN
83
- # pipe = pipeline("translation", model=model_name, src_lang="en_XX", tgt_lang="ro_RO")
84
- pipe = pipeline("translation_en_to_de", model="facebook/mbart-large-cc25")
85
  # "translation" task was used, instead of "translation_XX_to_YY", defaulting to "translation_en_to_ro"
86
 
87
  # call the pipeline; generation kwargs are forwarded to model.generate
88
  # src_lang (str, optional) — The language of the input.
89
  # tgt_lang (str, optional) — The language of the desired output. Might be required for multilingual models. Will not have any effect for single pair translation models
90
- src_text = "Check general exterior condition"
91
  result = pipe(
92
  src_text,
93
  num_beams=4,
 
66
  model_name = "facebook/mbart-large-cc25"
67
 
68
  # load tokenizer and model
69
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
70
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
71
 
72
  # tell tokenizer the source language
73
+ tokenizer.src_lang = "en_XX"
74
+ tokenizer.tgt_lang = "ro_RO"
75
  # set the target language as the model's forced BOS token so pipeline will use it implicitly
76
+ model.config.forced_bos_token_id = tokenizer.lang_code_to_id["ro_RO"]
77
 
78
  # find the id for the target language and force it at generation
79
  # forced_bos_token_id = tokenizer.lang_code_to_id["ro_RO"]
80
 
81
  # create the pipeline (pass tokenizer and model explicitly)
82
  # export langs=ar_AR,cs_CZ,de_DE,en_XX,es_XX,et_EE,fi_FI,fr_XX,gu_IN,hi_IN,it_IT,ja_XX,kk_KZ,ko_KR,lt_LT,lv_LV,my_MM,ne_NP,nl_XX,ro_RO,ru_RU,si_LK,tr_TR,vi_VN,zh_CN
83
+ pipe = pipeline("translation", model=model, tokenizer=tokenizer, src_lang="en_XX", tgt_lang="ro_RO")
84
+ # pipe = pipeline("translation_en_to_de", model="facebook/mbart-large-cc25")
85
  # "translation" task was used, instead of "translation_XX_to_YY", defaulting to "translation_en_to_ro"
86
 
87
  # call the pipeline; generation kwargs are forwarded to model.generate
88
  # src_lang (str, optional) — The language of the input.
89
  # tgt_lang (str, optional) — The language of the desired output. Might be required for multilingual models. Will not have any effect for single pair translation models
90
+ src_text = ["Check general exterior conditions"]
91
  result = pipe(
92
  src_text,
93
  num_beams=4,