JiRackTokenizer / TestTokenizerSize.py
kgrabko's picture
Rename TestTokenizer.py to TestTokenizerSize.py
c3b0962 verified
raw
history blame contribute delete
194 Bytes
from transformers import AutoTokenizer
tok = AutoTokenizer.from_pretrained(".")
print("Vocab size:", len(tok))
print("pad_token_id:", tok.pad_token_id)
print("eos_token_id:", tok.eos_token_id)