lhallee commited on
Commit
f708d7a
·
verified ·
1 Parent(s): 740a45c

Upload modeling_e1.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. modeling_e1.py +8 -2
modeling_e1.py CHANGED
@@ -404,8 +404,14 @@ PAD_TOKEN_ID = 0
404
 
405
 
406
  def get_tokenizer() -> Tokenizer:
407
- fname = os.path.join(os.path.dirname(__file__), "tokenizer.json")
408
- tokenizer: Tokenizer = Tokenizer.from_file(fname)
 
 
 
 
 
 
409
  assert tokenizer.padding["pad_id"] == PAD_TOKEN_ID, (
410
  f"Padding token id must be {PAD_TOKEN_ID}, but got {tokenizer.padding['pad_id']}"
411
  )
 
404
 
405
 
406
  def get_tokenizer() -> Tokenizer:
407
+ try:
408
+ fname = os.path.join(os.path.dirname(__file__), "tokenizer.json")
409
+ tokenizer: Tokenizer = Tokenizer.from_file(fname)
410
+ except:
411
+ print("E1 Tokenizer not found in local directory, downloading from Hugging Face")
412
+ from huggingface_hub import hf_hub_download
413
+ fname = hf_hub_download(repo_id="Synthyra/Profluent-E1-150M", filename="tokenizer.json")
414
+ tokenizer: Tokenizer = Tokenizer.from_file(fname)
415
  assert tokenizer.padding["pad_id"] == PAD_TOKEN_ID, (
416
  f"Padding token id must be {PAD_TOKEN_ID}, but got {tokenizer.padding['pad_id']}"
417
  )