Spaces:

holistic-ai
/

AgentGraph

Sleeping

App Files Files Community

AgentGraph / backend /services /cost_calculation_service.py

wu981526092

add

7bc750c 6 months ago

raw

history blame contribute delete

13.4 kB

	"""
	Cost Calculation Service

	This service fetches pricing data from LiteLLM's GitHub repository and calculates
	costs for token usage based on model names and token counts.
	"""

	import json
	import logging
	import requests
	from typing import Dict, Any, Optional, Tuple
	from functools import lru_cache
	import re

	logger = logging.getLogger(__name__)

	class CostCalculationService:
	"""Service for calculating LLM costs based on token usage and model pricing."""

	LITELLM_PRICING_URL = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"

	def __init__(self):
	self._pricing_data = None

	@lru_cache(maxsize=1)
	def _fetch_pricing_data(self) -> Dict[str, Any]:
	"""Fetch and cache pricing data from LiteLLM GitHub repository."""
	try:
	response = requests.get(self.LITELLM_PRICING_URL, timeout=30)
	response.raise_for_status()
	pricing_data = response.json()
	logger.info("Successfully fetched LiteLLM pricing data")
	return pricing_data
	except Exception as e:
	logger.error(f"Failed to fetch pricing data: {str(e)}")
	# Return fallback pricing data
	return self._get_fallback_pricing_data()

	def _get_fallback_pricing_data(self) -> Dict[str, Any]:
	"""Return fallback pricing data if GitHub fetch fails."""
	return {
	"gpt-5-mini": {
	"input_cost_per_token": 0.00000015,
	"output_cost_per_token": 0.0000006,
	"max_tokens": 128000,
	"max_input_tokens": 128000,
	"max_output_tokens": 16384,
	"litellm_provider": "openai",
	"mode": "chat",
	"supports_function_calling": True,
	"supports_vision": True,
	"supports_response_schema": True,
	"supports_prompt_caching": False,
	"supports_system_messages": True,
	"supports_tool_choice": True
	},
	"gpt-4o-mini": {
	"input_cost_per_token": 0.00000015,
	"output_cost_per_token": 0.0000006,
	"max_tokens": 128000,
	"max_input_tokens": 128000,
	"max_output_tokens": 16384,
	"litellm_provider": "openai",
	"mode": "chat",
	"supports_function_calling": True,
	"supports_vision": True,
	"supports_response_schema": True,
	"supports_prompt_caching": False,
	"supports_system_messages": True,
	"supports_tool_choice": True
	},
	"gpt-4o": {
	"input_cost_per_token": 0.0000025,
	"output_cost_per_token": 0.00001,
	"max_tokens": 128000,
	"max_input_tokens": 128000,
	"max_output_tokens": 16384,
	"litellm_provider": "openai",
	"mode": "chat",
	"supports_function_calling": True,
	"supports_vision": True,
	"supports_response_schema": True,
	"supports_prompt_caching": False,
	"supports_system_messages": True,
	"supports_tool_choice": True
	},
	"gpt-4": {
	"input_cost_per_token": 0.00003,
	"output_cost_per_token": 0.00006,
	"max_tokens": 8192,
	"max_input_tokens": 8192,
	"max_output_tokens": 4096,
	"litellm_provider": "openai",
	"mode": "chat",
	"supports_function_calling": True,
	"supports_vision": False,
	"supports_response_schema": False,
	"supports_prompt_caching": False,
	"supports_system_messages": True,
	"supports_tool_choice": True
	},
	"gpt-3.5-turbo": {
	"input_cost_per_token": 0.0000015,
	"output_cost_per_token": 0.000002,
	"max_tokens": 16385,
	"max_input_tokens": 16385,
	"max_output_tokens": 4096,
	"litellm_provider": "openai",
	"mode": "chat",
	"supports_function_calling": True,
	"supports_vision": False,
	"supports_response_schema": False,
	"supports_prompt_caching": False,
	"supports_system_messages": True,
	"supports_tool_choice": True
	},
	"claude-3-5-sonnet-20241022": {
	"input_cost_per_token": 0.000003,
	"output_cost_per_token": 0.000015,
	"max_tokens": 200000,
	"max_input_tokens": 200000,
	"max_output_tokens": 8192,
	"litellm_provider": "anthropic",
	"mode": "chat",
	"supports_function_calling": True,
	"supports_vision": True,
	"supports_response_schema": False,
	"supports_prompt_caching": True,
	"supports_system_messages": True,
	"supports_tool_choice": True
	},
	"claude-3-haiku-20240307": {
	"input_cost_per_token": 0.00000025,
	"output_cost_per_token": 0.00000125,
	"max_tokens": 200000,
	"max_input_tokens": 200000,
	"max_output_tokens": 4096,
	"litellm_provider": "anthropic",
	"mode": "chat",
	"supports_function_calling": True,
	"supports_vision": True,
	"supports_response_schema": False,
	"supports_prompt_caching": True,
	"supports_system_messages": True,
	"supports_tool_choice": True
	}
	}

	def _normalize_model_name(self, model_name: str) -> str:
	"""Normalize model name to match pricing keys."""
	if not model_name:
	return "gpt-5-mini" # Default fallback

	model_lower = model_name.lower()

	# Remove common prefixes
	model_lower = re.sub(r'^(openai/\|anthropic/\|gpt-\|claude-)', '', model_lower)

	# Handle GPT models
	if "gpt-5-mini" in model_lower:
	return "gpt-5-mini"
	elif "gpt-4o-mini" in model_lower:
	return "gpt-4o-mini"
	elif "gpt-4o" in model_lower:
	return "gpt-4o"
	elif "gpt-4" in model_lower:
	return "gpt-4"
	elif "gpt-3.5" in model_lower:
	return "gpt-3.5-turbo"

	# Handle Claude models
	elif "claude-3-5-sonnet" in model_lower or "claude-3.5-sonnet" in model_lower:
	return "claude-3-5-sonnet-20241022"
	elif "claude-3-haiku" in model_lower:
	return "claude-3-haiku-20240307"
	elif "claude-3-sonnet" in model_lower:
	return "claude-3-sonnet-20240229"
	elif "claude-3-opus" in model_lower:
	return "claude-3-opus-20240229"

	# Default fallback
	return "gpt-5-mini"

	def calculate_cost(
	self,
	model_name: str,
	prompt_tokens: int,
	completion_tokens: int
	) -> Dict[str, Any]:
	"""
	Calculate cost for token usage.

	Args:
	model_name: Name of the model used
	prompt_tokens: Number of input/prompt tokens
	completion_tokens: Number of output/completion tokens

	Returns:
	Dictionary with cost information
	"""
	try:
	pricing_data = self._fetch_pricing_data()
	normalized_model = self._normalize_model_name(model_name)

	# Find pricing for the model
	model_pricing = None

	# First try exact match
	if normalized_model in pricing_data:
	model_pricing = pricing_data[normalized_model]
	else:
	# Try to find similar model in pricing data
	for price_model in pricing_data.keys():
	if normalized_model in price_model.lower() or price_model.lower() in normalized_model:
	model_pricing = pricing_data[price_model]
	break

	# Fallback to default model if not found
	if not model_pricing:
	fallback_data = self._get_fallback_pricing_data()
	model_pricing = fallback_data.get(normalized_model, fallback_data["gpt-5-mini"])

	# Extract pricing information
	input_cost_per_token = model_pricing.get("input_cost_per_token", 0.00000015)
	output_cost_per_token = model_pricing.get("output_cost_per_token", 0.0000006)

	# Calculate costs
	input_cost = prompt_tokens * input_cost_per_token
	output_cost = completion_tokens * output_cost_per_token
	total_cost = input_cost + output_cost

	# Extract model metadata for enhanced display
	model_metadata = {
	"max_tokens": model_pricing.get("max_tokens"),
	"max_input_tokens": model_pricing.get("max_input_tokens"),
	"max_output_tokens": model_pricing.get("max_output_tokens"),
	"litellm_provider": model_pricing.get("litellm_provider"),
	"mode": model_pricing.get("mode"),
	"supports_function_calling": model_pricing.get("supports_function_calling", False),
	"supports_vision": model_pricing.get("supports_vision", False),
	"supports_response_schema": model_pricing.get("supports_response_schema", False),
	"supports_prompt_caching": model_pricing.get("supports_prompt_caching", False),
	"supports_system_messages": model_pricing.get("supports_system_messages", False),
	"supports_tool_choice": model_pricing.get("supports_tool_choice", False),
	}

	return {
	"input_cost_usd": input_cost,
	"output_cost_usd": output_cost,
	"total_cost_usd": total_cost,
	"model_used": normalized_model,
	"pricing_source": "litellm" if normalized_model in pricing_data else "fallback",
	"cost_per_1k_input_tokens": input_cost_per_token * 1000,
	"cost_per_1k_output_tokens": output_cost_per_token * 1000,
	"model_metadata": model_metadata
	}

	except Exception as e:
	logger.error(f"Error calculating cost: {str(e)}")
	return {
	"input_cost_usd": 0.0,
	"output_cost_usd": 0.0,
	"total_cost_usd": 0.0,
	"model_used": model_name,
	"pricing_source": "error",
	"error": str(e)
	}

	def calculate_trace_costs(self, schema_analytics: Dict[str, Any]) -> Dict[str, Any]:
	"""
	Calculate comprehensive cost analysis for a trace.

	Args:
	schema_analytics: The schema analytics data from trace metadata

	Returns:
	Dictionary with comprehensive cost information
	"""
	try:
	if not schema_analytics:
	return {"error": "No schema analytics data provided"}

	token_analytics = schema_analytics.get("numerical_overview", {}).get("token_analytics", {})
	prompt_analytics = schema_analytics.get("prompt_analytics", {})

	total_prompt_tokens = token_analytics.get("total_prompt_tokens", 0)
	total_completion_tokens = token_analytics.get("total_completion_tokens", 0)
	prompt_calls = prompt_analytics.get("prompt_calls_detected", 0)

	# For now, assume gpt-5-mini as default model since we don't store model info in trace
	# In future versions, this could be enhanced to detect model from trace content
	default_model = "gpt-5-mini"

	cost_info = self.calculate_cost(default_model, total_prompt_tokens, total_completion_tokens)

	# Calculate averages
	avg_prompt_tokens = total_prompt_tokens / prompt_calls if prompt_calls > 0 else 0
	avg_completion_tokens = total_completion_tokens / prompt_calls if prompt_calls > 0 else 0
	avg_cost_per_call = cost_info["total_cost_usd"] / prompt_calls if prompt_calls > 0 else 0

	return {
	**cost_info,
	"avg_prompt_tokens_per_call": round(avg_prompt_tokens, 1),
	"avg_completion_tokens_per_call": round(avg_completion_tokens, 1),
	"avg_cost_per_call_usd": avg_cost_per_call,
	"total_calls": prompt_calls,
	"cost_efficiency_tokens_per_dollar": (total_prompt_tokens + total_completion_tokens) / cost_info["total_cost_usd"] if cost_info["total_cost_usd"] > 0 else 0
	}

	except Exception as e:
	logger.error(f"Error calculating trace costs: {str(e)}")
	return {"error": str(e)}

	# Global instance
	cost_service = CostCalculationService()