File size: 1,665 Bytes
b8e5043
a7c4301
6d49dc7
 
a7c4301
 
 
b8e5043
a7c4301
 
b8e5043
 
 
 
 
a7c4301
 
b8e5043
 
 
a7c4301
 
6d49dc7
b8e5043
a7c4301
b8e5043
 
a7c4301
b8e5043
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
"""Token counting utilities using tiktoken."""

from typing import Any

import tiktoken


_encoder = None


def _get_encoder():
    global _encoder
    if _encoder is None:
        _encoder = tiktoken.get_encoding("cl100k_base")
    return _encoder


def estimate_tokens(text: str) -> int:
    """Estimate the number of tokens in a text string."""
    return len(_get_encoder().encode(text))


def estimate_messages_tokens(messages: list[dict[str, Any]]) -> int:
    """Estimate total tokens across a list of messages.

    Each message contributes its content tokens plus a small overhead
    for role and message framing (~4 tokens per message).
    """
    total = 0
    for msg in messages:
        content = msg.get("content", "")
        if isinstance(content, str):
            total += estimate_tokens(content) + 4
        elif isinstance(content, list):
            # Content blocks (tool_use, tool_result, text)
            for block in content:
                if isinstance(block, dict):
                    if block.get("type") == "text":
                        total += estimate_tokens(block.get("text", ""))
                    elif block.get("type") == "tool_use":
                        total += estimate_tokens(str(block.get("input", {})))
                    elif block.get("type") == "tool_result":
                        total += estimate_tokens(str(block.get("content", "")))
                    else:
                        total += estimate_tokens(str(block))
                else:
                    # Anthropic SDK content block objects
                    total += estimate_tokens(str(block))
            total += 4
    return total