Upload modeling_e1.py with huggingface_hub
Browse files- modeling_e1.py +8 -2
modeling_e1.py
CHANGED
|
@@ -404,8 +404,14 @@ PAD_TOKEN_ID = 0
|
|
| 404 |
|
| 405 |
|
| 406 |
def get_tokenizer() -> Tokenizer:
|
| 407 |
-
|
| 408 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 409 |
assert tokenizer.padding["pad_id"] == PAD_TOKEN_ID, (
|
| 410 |
f"Padding token id must be {PAD_TOKEN_ID}, but got {tokenizer.padding['pad_id']}"
|
| 411 |
)
|
|
|
|
| 404 |
|
| 405 |
|
| 406 |
def get_tokenizer() -> Tokenizer:
|
| 407 |
+
try:
|
| 408 |
+
fname = os.path.join(os.path.dirname(__file__), "tokenizer.json")
|
| 409 |
+
tokenizer: Tokenizer = Tokenizer.from_file(fname)
|
| 410 |
+
except:
|
| 411 |
+
print("E1 Tokenizer not found in local directory, downloading from Hugging Face")
|
| 412 |
+
from huggingface_hub import hf_hub_download
|
| 413 |
+
fname = hf_hub_download(repo_id="Synthyra/Profluent-E1-150M", filename="tokenizer.json")
|
| 414 |
+
tokenizer: Tokenizer = Tokenizer.from_file(fname)
|
| 415 |
assert tokenizer.padding["pad_id"] == PAD_TOKEN_ID, (
|
| 416 |
f"Padding token id must be {PAD_TOKEN_ID}, but got {tokenizer.padding['pad_id']}"
|
| 417 |
)
|