File size: 502 Bytes
a5e4950
 
 
 
13f54fc
a5e4950
 
 
 
13f54fc
e59f184
a5e4950
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
class Tokenizer:
    def __init__(self, model_name: str, encoder):
        self.model_name = model_name
        self.encoder = encoder

    def count_tokens(self, text: str) -> int:
        if not text:
            return 0
        return len(self.encoder.encode(text))


    def trim_to_token_limit(self, text: str, max_tokens: int) -> str:
        tokens = self.encoder.encode(text)
        if len(tokens) <= max_tokens:
            return text
        return self.encoder.decode(tokens[:max_tokens])