snote / utils.py
xuanbao01's picture
Upload folder using huggingface_hub
44c5827 verified
import tiktoken
try:
tokenizer = tiktoken.get_encoding("cl100k_base")
except:
tokenizer = tiktoken.get_encoding("gpt2")
# get the number of tokens in a text
def count_tokens(text: str) -> int:
return len(tokenizer.encode(text))