Spaces:

jkbennitt
/

felix-framework

Paused

felix-framework / src /llm /token_budget.py

jkbennitt

Clean hf-space branch and prepare for HuggingFace Spaces deployment

fb867c3 7 months ago

15.2 kB

	"""
	Token Budget Management for Felix Framework.

	This module implements adaptive token allocation based on agent position
	in the helix, enabling efficient resource usage without truncating content.

	Key Features:
	- Position-based token budgets (more at top, less at bottom)
	- Stage-aware allocation to prevent overconsumption
	- Content compression guidance for iterative refinement
	- Adaptive prompting based on helix depth

	Mathematical Foundation:
	- Token allocation follows inverse relationship with depth
	- Budget decreases as agents focus toward synthesis
	- Maintains quality while improving efficiency
	"""

	import math
	from typing import Dict, Any, Optional
	from dataclasses import dataclass


	@dataclass
	class TokenAllocation:
	"""Token allocation details for an agent at a specific position."""
	stage_budget: int # Tokens for current processing stage
	remaining_budget: int # Total remaining tokens for agent
	total_budget: int # Original total budget for agent
	compression_ratio: float # Suggested compression from previous stages
	style_guidance: str # Recommended output style
	depth_ratio: float # Agent's current depth in helix


	class TokenBudgetManager:
	"""
	Manages token budgets for agents based on helix position and processing stage.

	Implements adaptive allocation strategy where agents receive more tokens
	for exploration at the top of the helix and fewer tokens for focused
	synthesis at the bottom.
	"""

	def __init__(self, base_budget: int = 1000, min_budget: int = 200,
	max_budget: int = 800, strict_mode: bool = False):
	"""
	Initialize token budget manager.

	Args:
	base_budget: Base token allocation per agent
	min_budget: Minimum tokens for any single stage
	max_budget: Maximum tokens for any single stage
	strict_mode: Enable strict token budgets for lightweight models
	"""
	self.base_budget = base_budget
	self.min_budget = min_budget
	self.max_budget = max_budget
	self.strict_mode = strict_mode

	# Track agent budgets
	self._agent_budgets: Dict[str, int] = {} # remaining budget per agent
	self._agent_total_used: Dict[str, int] = {} # total used per agent
	self._agent_types: Dict[str, str] = {} # agent type mapping

	def initialize_agent_budget(self, agent_id: str, agent_type: str, max_tokens_per_stage: int = None) -> int:
	"""
	Initialize total budget for an agent based on type.

	Args:
	agent_id: Agent identifier
	agent_type: Type of agent (research, analysis, synthesis, etc.)
	max_tokens_per_stage: Maximum tokens per stage (if provided, adjusts max_budget)

	Returns:
	Total allocated budget for the agent
	"""
	# Different agent types get different base budgets
	if self.strict_mode:
	# Strict budgets for lightweight models (much lower)
	type_budgets = {
	"research": 400, # Research agents: 400 base budget
	"analysis": 350, # Analysis agents: 350 base budget
	"synthesis": 300, # Synthesis agents: 300 base budget
	"critic": 250 # Critic agents: 250 base budget
	}
	total_budget = type_budgets.get(agent_type, 350)
	else:
	# Original multiplier-based system
	type_multipliers = {
	"research": 1.2, # More tokens for exploration
	"analysis": 1.0, # Standard allocation
	"synthesis": 0.8, # Fewer tokens for focused synthesis
	"critic": 0.9 # Slightly fewer for critique
	}
	multiplier = type_multipliers.get(agent_type, 1.0)
	total_budget = int(self.base_budget * multiplier)

	# This is now handled above in the if/else block

	# Adjust max_budget to respect agent's max_tokens_per_stage and strict mode
	if self.strict_mode and agent_type in ["research", "analysis", "synthesis", "critic"]:
	# Strict per-stage limits for lightweight models
	strict_stage_limits = {
	"research": 150, # Research agents: max 150 per stage
	"analysis": 120, # Analysis agents: max 120 per stage
	"synthesis": 100, # Synthesis agents: max 100 per stage
	"critic": 80 # Critic agents: max 80 per stage
	}
	stage_limit = strict_stage_limits.get(agent_type, 100)
	if max_tokens_per_stage is not None:
	stage_limit = min(stage_limit, max_tokens_per_stage)
	self.max_budget = min(self.max_budget, stage_limit)
	elif max_tokens_per_stage is not None:
	self.max_budget = min(self.max_budget, max_tokens_per_stage)

	self._agent_budgets[agent_id] = total_budget
	self._agent_total_used[agent_id] = 0
	self._agent_types[agent_id] = agent_type

	return total_budget

	def calculate_stage_allocation(self, agent_id: str, depth_ratio: float,
	stage: int) -> TokenAllocation:
	"""
	Calculate token allocation for a specific processing stage.

	Args:
	agent_id: Agent identifier
	depth_ratio: Current depth in helix (0.0 = top, 1.0 = bottom)
	stage: Current processing stage number (1-based)

	Returns:
	TokenAllocation with budget and guidance information
	"""
	if agent_id not in self._agent_budgets:
	raise ValueError(f"Agent {agent_id} not initialized")

	remaining_budget = self._agent_budgets[agent_id]
	total_used = self._agent_total_used[agent_id]
	original_budget = remaining_budget + total_used
	agent_type = self._agent_types[agent_id]

	# Calculate stage budget based on agent type, position and remaining allocation
	stage_budget = self._calculate_position_budget(
	agent_type, depth_ratio, remaining_budget, stage
	)

	# Ensure budget constraints
	stage_budget = max(self.min_budget, min(stage_budget, self.max_budget))
	stage_budget = min(stage_budget, remaining_budget) # Don't exceed remaining

	# Calculate compression and style guidance based on agent type
	compression_ratio = self._calculate_compression_ratio(agent_type, depth_ratio, stage)
	style_guidance = self._generate_style_guidance(agent_type, depth_ratio, stage_budget)

	return TokenAllocation(
	stage_budget=stage_budget,
	remaining_budget=remaining_budget,
	total_budget=original_budget,
	compression_ratio=compression_ratio,
	style_guidance=style_guidance,
	depth_ratio=depth_ratio
	)

	def record_usage(self, agent_id: str, tokens_used: int) -> None:
	"""
	Record token usage for an agent.

	Args:
	agent_id: Agent identifier
	tokens_used: Number of tokens consumed in last operation
	"""
	if agent_id not in self._agent_budgets:
	raise ValueError(f"Agent {agent_id} not initialized")

	self._agent_budgets[agent_id] -= tokens_used
	self._agent_total_used[agent_id] += tokens_used

	# Ensure non-negative budget
	self._agent_budgets[agent_id] = max(0, self._agent_budgets[agent_id])

	def _calculate_position_budget(self, agent_type: str, depth_ratio: float,
	remaining_budget: int, stage: int) -> int:
	"""Calculate token budget based on agent type, helix position and stage."""
	# FIXED: Token allocation should INCREASE for synthesis agents, not decrease
	# Different agent types need different token allocations based on their role

	# Agent-type-specific base budgets (corrects the inverted allocation)
	if agent_type == "research":
	# Research agents: Small fixed budget for bullet points
	base_budget = 150
	position_factor = 1.0 # Constant allocation
	elif agent_type == "analysis":
	# Analysis agents: Medium budget that grows slightly with depth
	base_budget = 250
	position_factor = 0.8 + (depth_ratio * 0.4) # 0.8 to 1.2
	elif agent_type == "synthesis":
	# Synthesis agents: Large budget that grows significantly with depth
	base_budget = 400
	position_factor = 0.6 + (depth_ratio * 0.8) # 0.6 to 1.4
	elif agent_type == "critic":
	# Critic agents: Small fixed budget for focused feedback
	base_budget = 100
	position_factor = 1.0 # Constant allocation
	else:
	# Default fallback
	base_budget = 300
	position_factor = 1.0

	# Calculate budget with position factor
	stage_budget = int(base_budget * position_factor)

	# Respect remaining budget constraints
	estimated_remaining_stages = max(1, int((1.0 - depth_ratio) * 3) + 1)
	max_from_remaining = remaining_budget // estimated_remaining_stages
	stage_budget = min(stage_budget, max_from_remaining)

	# Apply progressive reduction for strict mode
	if self.strict_mode:
	stage_budget = self._apply_progressive_reduction(stage_budget, stage)

	return max(self.min_budget, stage_budget)

	def _apply_progressive_reduction(self, stage_budget: int, stage: int) -> int:
	"""Apply progressive token reduction based on stage number."""
	if stage <= 2:
	# Stages 1-2: 100% of budget
	reduction_factor = 1.0
	elif stage <= 4:
	# Stages 3-4: 75% of budget
	reduction_factor = 0.75
	else:
	# Stages 5+: 50% of budget
	reduction_factor = 0.50

	return int(stage_budget * reduction_factor)

	def _calculate_compression_ratio(self, agent_type: str, depth_ratio: float, stage: int) -> float:
	"""Calculate suggested compression ratio for content refinement based on agent type."""
	# FIXED: Synthesis agents should have LOWER compression (need more space)
	if agent_type == "research":
	# Research agents always highly compressed (bullet points)
	base_compression = 0.7
	elif agent_type == "analysis":
	# Analysis agents moderately compressed (structured lists)
	base_compression = 0.5 + (depth_ratio * 0.2) # 0.5 to 0.7
	elif agent_type == "synthesis":
	# Synthesis agents LESS compressed (need space for integration)
	base_compression = 0.1 + (depth_ratio * 0.2) # 0.1 to 0.3 (MUCH lower)
	elif agent_type == "critic":
	# Critic agents highly compressed (focused feedback)
	base_compression = 0.8
	else:
	# Default fallback
	base_compression = 0.5

	# Slight increase with processing stages (but much less for synthesis)
	if agent_type == "synthesis":
	stage_factor = min(stage * 0.02, 0.1) # Very small increase for synthesis
	else:
	stage_factor = min(stage * 0.05, 0.2) # Normal increase for others

	return min(base_compression + stage_factor, 0.9)

	def _generate_style_guidance(self, agent_type: str, depth_ratio: float, token_budget: int) -> str:
	"""Generate style guidance based on agent type, position and budget."""
	# Convert tokens to approximate word count (1 token ≈ 0.75 words)
	word_limit = int(token_budget * 0.75)

	# FIXED: Agent-type-specific guidance that matches their role
	if agent_type == "research":
	style = "bullet points"
	detail_level = "5-10 factual bullet points with sources"
	format_example = "• Fact 1 (Source)\n• Fact 2 (Source)"
	elif agent_type == "analysis":
	style = "structured analysis"
	detail_level = "numbered insights with connections"
	format_example = "1. Key insight\n2. Connection to other data\n3. Implications"
	elif agent_type == "synthesis":
	style = "comprehensive narrative"
	detail_level = "complete output with introduction, body, and conclusion"
	format_example = "Introduction paragraph, main content with sections, conclusion"
	elif agent_type == "critic":
	style = "focused critique"
	detail_level = "specific feedback points with suggestions"
	format_example = "Strengths: ...\nWeaknesses: ...\nSuggestions: ..."
	else:
	style = "structured response"
	detail_level = "clear organization with main points"
	format_example = "Main points with supporting details"

	if self.strict_mode:
	return f"⚠️ HARD LIMIT: {token_budget} tokens ({word_limit} words) MAX - OUTPUT WILL BE CUT OFF IF EXCEEDED! Use {style} format: {detail_level}. Format: {format_example}. COUNT YOUR WORDS AND STAY UNDER {word_limit}!"
	else:
	return f"TARGET: ~{token_budget} tokens (~{word_limit} words). Use {style} format: {detail_level}. Example structure: {format_example}. Aim for approximately {word_limit} words."

	def get_agent_status(self, agent_id: str) -> Optional[Dict[str, Any]]:
	"""Get current budget status for an agent."""
	if agent_id not in self._agent_budgets:
	return None

	remaining = self._agent_budgets[agent_id]
	used = self._agent_total_used[agent_id]
	total = remaining + used

	return {
	"agent_id": agent_id,
	"total_budget": total,
	"tokens_used": used,
	"tokens_remaining": remaining,
	"usage_ratio": used / total if total > 0 else 0.0
	}

	def get_system_status(self) -> Dict[str, Any]:
	"""Get overall system token usage status."""
	agent_statuses = [
	self.get_agent_status(agent_id)
	for agent_id in self._agent_budgets.keys()
	]

	total_allocated = sum(status["total_budget"] for status in agent_statuses)
	total_used = sum(status["tokens_used"] for status in agent_statuses)
	total_remaining = sum(status["tokens_remaining"] for status in agent_statuses)

	return {
	"total_agents": len(self._agent_budgets),
	"total_allocated": total_allocated,
	"total_used": total_used,
	"total_remaining": total_remaining,
	"system_efficiency": total_used / total_allocated if total_allocated > 0 else 0.0,
	"agent_statuses": agent_statuses,
	"strict_mode": self.strict_mode,
	"performance_target_met": total_used < 2000 if self.strict_mode else True
	}