Mini-LLM / Tokenizer /test_tokenizer.py
Ashx098's picture
Upload folder using huggingface_hub
a433a25 verified
raw
history blame contribute delete
290 Bytes
from transformers import AutoTokenizer
tok = AutoTokenizer.from_pretrained(".")
print(tok.tokenize("Hello world! <user> write code </s>"))
text = "Hello world! <user> write code </s>"
ids = tok.encode(text)
print(ids)
print(tok.decode(ids))
print(tok.decode(ids, skip_special_tokens=True))