Spaces:
Build error
Build error
| # token_counter.py | |
| import os | |
| import tiktoken | |
| # Choose the encoding based on your model, e.g., 'cl100k_base' for OpenAI models | |
| encoding = tiktoken.get_encoding("cl100k_base") | |
| def count_tokens(text): | |
| tokens = encoding.encode(text) | |
| return len(tokens) | |
| class TokenCounter: | |
| def __init__(self): | |
| self.total_tokens = 0 | |
| self.doc_tokens = {} | |
| def add_document(self, doc_id, text): | |
| num_tokens = count_tokens(text) | |
| self.doc_tokens[doc_id] = num_tokens | |
| self.total_tokens += num_tokens | |
| def remove_document(self, doc_id): | |
| if doc_id in self.doc_tokens: | |
| self.total_tokens -= self.doc_tokens[doc_id] | |
| del self.doc_tokens[doc_id] | |
| def get_total_tokens(self): | |
| return self.total_tokens | |
| class SimpleTokenCounter: | |
| def count_tokens(self, text: str) -> int: | |
| return len(text.split()) | |
| class TikTokenCounter: | |
| def __init__(self, model_name: str = "gpt-4"): | |
| import tiktoken | |
| self.encoding = tiktoken.encoding_for_model(model_name) | |
| def count_tokens(self, text: str) -> int: | |
| return len(self.encoding.encode(text)) |