File size: 502 Bytes
a5e4950 13f54fc a5e4950 13f54fc e59f184 a5e4950 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | class Tokenizer:
def __init__(self, model_name: str, encoder):
self.model_name = model_name
self.encoder = encoder
def count_tokens(self, text: str) -> int:
if not text:
return 0
return len(self.encoder.encode(text))
def trim_to_token_limit(self, text: str, max_tokens: int) -> str:
tokens = self.encoder.encode(text)
if len(tokens) <= max_tokens:
return text
return self.encoder.decode(tokens[:max_tokens]) |