Spaces:

e2dew32
/

Entelechy

Configuration error

Entelechy / utils /token_counter.py

qa296

refactor: standardize type hints and improve null safety across codebase

6d49dc7 2 months ago

1.67 kB

	"""Token counting utilities using tiktoken."""

	from typing import Any

	import tiktoken


	_encoder = None


	def _get_encoder():
	global _encoder
	if _encoder is None:
	_encoder = tiktoken.get_encoding("cl100k_base")
	return _encoder


	def estimate_tokens(text: str) -> int:
	"""Estimate the number of tokens in a text string."""
	return len(_get_encoder().encode(text))


	def estimate_messages_tokens(messages: list[dict[str, Any]]) -> int:
	"""Estimate total tokens across a list of messages.

	Each message contributes its content tokens plus a small overhead
	for role and message framing (~4 tokens per message).
	"""
	total = 0
	for msg in messages:
	content = msg.get("content", "")
	if isinstance(content, str):
	total += estimate_tokens(content) + 4
	elif isinstance(content, list):
	# Content blocks (tool_use, tool_result, text)
	for block in content:
	if isinstance(block, dict):
	if block.get("type") == "text":
	total += estimate_tokens(block.get("text", ""))
	elif block.get("type") == "tool_use":
	total += estimate_tokens(str(block.get("input", {})))
	elif block.get("type") == "tool_result":
	total += estimate_tokens(str(block.get("content", "")))
	else:
	total += estimate_tokens(str(block))
	else:
	# Anthropic SDK content block objects
	total += estimate_tokens(str(block))
	total += 4
	return total