Spaces:
Running
Running
Update src/Translate.py
Browse files- src/Translate.py +12 -0
src/Translate.py
CHANGED
|
@@ -36,6 +36,18 @@ class Translators:
|
|
| 36 |
def translationpipe(self):
|
| 37 |
translation = pipeline('translation', model = self.model_name)
|
| 38 |
return translation(self.input_text)[0]['translation_text'], self.message
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
def paraphraseTranslateMethod(requestValue: str, model: str):
|
| 41 |
nltk.download('punkt')
|
|
|
|
| 36 |
def translationpipe(self):
|
| 37 |
translation = pipeline('translation', model = self.model_name)
|
| 38 |
return translation(self.input_text)[0]['translation_text'], self.message
|
| 39 |
+
def mbartlarge(self):
|
| 40 |
+
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
|
| 41 |
+
src_lang = f"{self.sl}_XX"
|
| 42 |
+
tgt_lang = f"{self.sl}_{self.sl.upper()}"
|
| 43 |
+
# Load model and tokenizer
|
| 44 |
+
model = MBartForConditionalGeneration.from_pretrained(self.model_name)
|
| 45 |
+
tokenizer = MBart50TokenizerFast.from_pretrained(self.model_name, src_lang=src_lang)
|
| 46 |
+
# Tokenize and translate
|
| 47 |
+
inputs = tokenizer(self.input_text, return_tensors="pt")
|
| 48 |
+
translated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang])
|
| 49 |
+
translation = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
|
| 50 |
+
return translation, self.message
|
| 51 |
|
| 52 |
def paraphraseTranslateMethod(requestValue: str, model: str):
|
| 53 |
nltk.download('punkt')
|