File size: 1,665 Bytes
b8e5043 a7c4301 6d49dc7 a7c4301 b8e5043 a7c4301 b8e5043 a7c4301 b8e5043 a7c4301 6d49dc7 b8e5043 a7c4301 b8e5043 a7c4301 b8e5043 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 | """Token counting utilities using tiktoken."""
from typing import Any
import tiktoken
_encoder = None
def _get_encoder():
global _encoder
if _encoder is None:
_encoder = tiktoken.get_encoding("cl100k_base")
return _encoder
def estimate_tokens(text: str) -> int:
"""Estimate the number of tokens in a text string."""
return len(_get_encoder().encode(text))
def estimate_messages_tokens(messages: list[dict[str, Any]]) -> int:
"""Estimate total tokens across a list of messages.
Each message contributes its content tokens plus a small overhead
for role and message framing (~4 tokens per message).
"""
total = 0
for msg in messages:
content = msg.get("content", "")
if isinstance(content, str):
total += estimate_tokens(content) + 4
elif isinstance(content, list):
# Content blocks (tool_use, tool_result, text)
for block in content:
if isinstance(block, dict):
if block.get("type") == "text":
total += estimate_tokens(block.get("text", ""))
elif block.get("type") == "tool_use":
total += estimate_tokens(str(block.get("input", {})))
elif block.get("type") == "tool_result":
total += estimate_tokens(str(block.get("content", "")))
else:
total += estimate_tokens(str(block))
else:
# Anthropic SDK content block objects
total += estimate_tokens(str(block))
total += 4
return total
|