| """Tokenizer palīgfunkcijas.""" | |
| from __future__ import annotations | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| def count_tokens(text: str, model: str = "gpt2") -> int: | |
| """Skaitīt tokenu skaitu tekstā.""" | |
| try: | |
| from transformers import AutoTokenizer # type: ignore | |
| tokenizer = AutoTokenizer.from_pretrained(model) | |
| return len(tokenizer.encode(text)) | |
| except Exception: # noqa: BLE001 | |
| # Aptuvena novērtēšana — vidēji 4 rakstzīmes uz tokenu | |
| return max(1, len(text) // 4) | |