MarisUK's picture
Maris AI model sync
f440f03 verified
"""Tokenizer palīgfunkcijas."""
from __future__ import annotations
import logging
logger = logging.getLogger(__name__)
def count_tokens(text: str, model: str = "gpt2") -> int:
"""Skaitīt tokenu skaitu tekstā."""
try:
from transformers import AutoTokenizer # type: ignore
tokenizer = AutoTokenizer.from_pretrained(model)
return len(tokenizer.encode(text))
except Exception: # noqa: BLE001
# Aptuvena novērtēšana — vidēji 4 rakstzīmes uz tokenu
return max(1, len(text) // 4)