p commited on
Commit ·
c069c11
1
Parent(s): 4dffb82
max_new_tokens
Browse files
app.py
CHANGED
|
@@ -1,16 +1,17 @@
|
|
| 1 |
-
# Based on example code of https://huggingface.co/facebook/
|
| 2 |
|
| 3 |
|
| 4 |
import gradio as gr
|
| 5 |
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
|
| 6 |
|
| 7 |
-
model = M2M100ForConditionalGeneration.from_pretrained(
|
|
|
|
| 8 |
|
| 9 |
-
tokenizer = M2M100Tokenizer.from_pretrained("facebook/
|
| 10 |
|
| 11 |
|
| 12 |
this_description = '''
|
| 13 |
-
Using facebook/m2m100-12B-avg-5-ckpt pre-trained model.
|
| 14 |
|
| 15 |
Chinese(zh)
|
| 16 |
English(en)
|
|
@@ -20,6 +21,7 @@ Sinhalese(si)
|
|
| 20 |
Thai(th)
|
| 21 |
Vietnamese(vi)
|
| 22 |
...
|
|
|
|
| 23 |
'''
|
| 24 |
|
| 25 |
# From facebook/m2m100-12B-avg-5-ckpt
|
|
@@ -129,7 +131,6 @@ lang_codes = {
|
|
| 129 |
|
| 130 |
def m2m_translate(Input_Text, from_lang, to_lang):
|
| 131 |
tokenizer.src_lang = lang_codes[from_lang]
|
| 132 |
-
|
| 133 |
encoded_from_lang = tokenizer(Input_Text, return_tensors="pt")
|
| 134 |
|
| 135 |
generated_tokens = model.generate(
|
|
@@ -143,21 +144,6 @@ def m2m_translate(Input_Text, from_lang, to_lang):
|
|
| 143 |
return res[0]
|
| 144 |
|
| 145 |
|
| 146 |
-
def m2m_translate2(Input_Text, from_lang, to_lang):
|
| 147 |
-
tokenizer.src_lang = lang_codes[from_lang]
|
| 148 |
-
|
| 149 |
-
encoded_from_lang = tokenizer(Input_Text, return_tensors="pt")
|
| 150 |
-
|
| 151 |
-
generated_tokens = model.generate(
|
| 152 |
-
**encoded_from_lang, forced_bos_token_id=tokenizer.get_lang_id(lang_codes[to_lang]))
|
| 153 |
-
|
| 154 |
-
res = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
|
| 155 |
-
|
| 156 |
-
return res[0]
|
| 157 |
-
# if res:
|
| 158 |
-
# return '\n'.join(res)
|
| 159 |
-
|
| 160 |
-
|
| 161 |
iface = gr.Interface(
|
| 162 |
fn=m2m_translate,
|
| 163 |
|
|
|
|
| 1 |
+
# Based on example code of https://huggingface.co/facebook/m2m100-12B-avg-5-ckpt
|
| 2 |
|
| 3 |
|
| 4 |
import gradio as gr
|
| 5 |
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
|
| 6 |
|
| 7 |
+
model = M2M100ForConditionalGeneration.from_pretrained(
|
| 8 |
+
"facebook/m2m100-12B-avg-5-ckpt")
|
| 9 |
|
| 10 |
+
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100-12B-avg-5-ckpt")
|
| 11 |
|
| 12 |
|
| 13 |
this_description = '''
|
| 14 |
+
Using facebook/m2m100-12B-avg-5-ckpt pre-trained model.
|
| 15 |
|
| 16 |
Chinese(zh)
|
| 17 |
English(en)
|
|
|
|
| 21 |
Thai(th)
|
| 22 |
Vietnamese(vi)
|
| 23 |
...
|
| 24 |
+
|
| 25 |
'''
|
| 26 |
|
| 27 |
# From facebook/m2m100-12B-avg-5-ckpt
|
|
|
|
| 131 |
|
| 132 |
def m2m_translate(Input_Text, from_lang, to_lang):
|
| 133 |
tokenizer.src_lang = lang_codes[from_lang]
|
|
|
|
| 134 |
encoded_from_lang = tokenizer(Input_Text, return_tensors="pt")
|
| 135 |
|
| 136 |
generated_tokens = model.generate(
|
|
|
|
| 144 |
return res[0]
|
| 145 |
|
| 146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
iface = gr.Interface(
|
| 148 |
fn=m2m_translate,
|
| 149 |
|