Upload 20231029-tok.py
Browse files- 20231029-tok.py +9 -0
20231029-tok.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from convert_slow_tokenizer import MarianConverter
|
| 2 |
+
from transformers import AutoTokenizer
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-fr-en", use_fast=False)
|
| 6 |
+
fast_tokenizer = MarianConverter(tokenizer, index=0).converted()
|
| 7 |
+
fast_tokenizer.save(f"tokenizer-marian-base-fr.json")
|
| 8 |
+
fast_tokenizer = MarianConverter(tokenizer, index=1).converted()
|
| 9 |
+
fast_tokenizer.save(f"tokenizer-marian-base-en.json")
|