FlowRead / test_tok2.py
jobbler's picture
feat: Add support for switching models and the 27b-4a backend
93b9241
raw
history blame contribute delete
361 Bytes
import os
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b", token=os.environ.get("HF_TOKEN"))
p = "Hello"
t = " world" # starts with space
print("Pre:", tokenizer.convert_ids_to_tokens(tokenizer(p + "\n\n")["input_ids"]))
print("Full:", tokenizer.convert_ids_to_tokens(tokenizer(p + "\n\n" + t)["input_ids"]))