bad-gpt / encoder.py
shamashel's picture
Switch from custom tokenizer to tiktoken
e3fc82f
raw
history blame contribute delete
263 Bytes
import tiktoken
_encoding = tiktoken.encoding_for_model('gpt-4')
_tokens = map(lambda x: str(x), _encoding.token_byte_values())
tokens = list(_tokens)
def encode(s: str):
return _encoding.encode(s)
def decode(l: list[int]):
return _encoding.decode(l)