nioushasadjadi
commited on
Commit
·
eb1e311
1
Parent(s):
f15abb2
Download the tokenizer from Hugging Face Hub
Browse files- tokenizer.py +3 -1
tokenizer.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
from transformers import PreTrainedTokenizer
|
|
|
|
| 2 |
import json
|
| 3 |
import os
|
| 4 |
from itertools import product
|
|
@@ -109,7 +110,8 @@ class KmerTokenizer(PreTrainedTokenizer):
|
|
| 109 |
@classmethod
|
| 110 |
def from_pretrained(cls, pretrained_dir, **kwargs):
|
| 111 |
# Load vocabulary
|
| 112 |
-
vocab_file =
|
|
|
|
| 113 |
with open(vocab_file, "r", encoding="utf-8") as f:
|
| 114 |
vocab_content = json.load(f)
|
| 115 |
vocab = vocab_content["model"]["vocab"]
|
|
|
|
| 1 |
from transformers import PreTrainedTokenizer
|
| 2 |
+
from huggingface_hub import hf_hub_download
|
| 3 |
import json
|
| 4 |
import os
|
| 5 |
from itertools import product
|
|
|
|
| 110 |
@classmethod
|
| 111 |
def from_pretrained(cls, pretrained_dir, **kwargs):
|
| 112 |
# Load vocabulary
|
| 113 |
+
vocab_file = hf_hub_download(repo_id=pretrained_dir, filename="tokenizer.json")
|
| 114 |
+
# vocab_file = os.path.join(pretrained_dir, "tokenizer.json")
|
| 115 |
with open(vocab_file, "r", encoding="utf-8") as f:
|
| 116 |
vocab_content = json.load(f)
|
| 117 |
vocab = vocab_content["model"]["vocab"]
|