Anshul Prasad
object oriented programming support.
a5e4950
raw
history blame contribute delete
502 Bytes
class Tokenizer:
def __init__(self, model_name: str, encoder):
self.model_name = model_name
self.encoder = encoder
def count_tokens(self, text: str) -> int:
if not text:
return 0
return len(self.encoder.encode(text))
def trim_to_token_limit(self, text: str, max_tokens: int) -> str:
tokens = self.encoder.encode(text)
if len(tokens) <= max_tokens:
return text
return self.encoder.decode(tokens[:max_tokens])