Update README.md
Browse files
README.md
CHANGED
|
@@ -110,10 +110,14 @@ tags:
|
|
| 110 |
- tico19
|
| 111 |
- gmnlp/tico19
|
| 112 |
- tatoeba
|
|
|
|
| 113 |
datasets:
|
| 114 |
- tico19
|
| 115 |
- flores101
|
| 116 |
- tatoeba
|
|
|
|
|
|
|
|
|
|
| 117 |
---
|
| 118 |
|
| 119 |
From: https://huggingface.co/alirezamsh/small100
|
|
@@ -302,7 +306,8 @@ encoded_list=[]
|
|
| 302 |
for text in raw_list:
|
| 303 |
encoded_list.append(tokenizer.convert_ids_to_tokens(tokenizer.encode(text)))
|
| 304 |
|
| 305 |
-
#
|
|
|
|
| 306 |
translated_list=translator.translate_batch(encoded_list,target_prefix=[target_language_token]*len(raw_list))
|
| 307 |
|
| 308 |
#decode
|
|
@@ -357,5 +362,4 @@ translated_list=[tokenizer.decode(tokenizer.convert_tokens_to_ids(token.hypothes
|
|
| 357 |
#output
|
| 358 |
for text in translated_list:
|
| 359 |
print(text)
|
| 360 |
-
```
|
| 361 |
-
|
|
|
|
| 110 |
- tico19
|
| 111 |
- gmnlp/tico19
|
| 112 |
- tatoeba
|
| 113 |
+
- nmt
|
| 114 |
datasets:
|
| 115 |
- tico19
|
| 116 |
- flores101
|
| 117 |
- tatoeba
|
| 118 |
+
base_model:
|
| 119 |
+
- alirezamsh/small100
|
| 120 |
+
pipeline_tag: translation
|
| 121 |
---
|
| 122 |
|
| 123 |
From: https://huggingface.co/alirezamsh/small100
|
|
|
|
| 306 |
for text in raw_list:
|
| 307 |
encoded_list.append(tokenizer.convert_ids_to_tokens(tokenizer.encode(text)))
|
| 308 |
|
| 309 |
+
#translate
|
| 310 |
+
#https://opennmt.net/CTranslate2/python/ctranslate2.Translator.html?#ctranslate2.Translator.translate_batch
|
| 311 |
translated_list=translator.translate_batch(encoded_list,target_prefix=[target_language_token]*len(raw_list))
|
| 312 |
|
| 313 |
#decode
|
|
|
|
| 362 |
#output
|
| 363 |
for text in translated_list:
|
| 364 |
print(text)
|
| 365 |
+
```
|
|
|