"""
Cost Calculation Service

This service fetches pricing data from LiteLLM's GitHub repository and calculates
costs for token usage based on model names and token counts.
"""

import json
import logging
import requests
from typing import Dict, Any, Optional, Tuple
from functools import lru_cache
import re

logger = logging.getLogger(__name__)

class CostCalculationService:
    """Service for calculating LLM costs based on token usage and model pricing."""
    
    LITELLM_PRICING_URL = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
    
    def __init__(self):
        self._pricing_data = None
        
    @lru_cache(maxsize=1)
    def _fetch_pricing_data(self) -> Dict[str, Any]:
        """Fetch and cache pricing data from LiteLLM GitHub repository."""
        try:
            response = requests.get(self.LITELLM_PRICING_URL, timeout=30)
            response.raise_for_status()
            pricing_data = response.json()
            logger.info("Successfully fetched LiteLLM pricing data")
            return pricing_data
        except Exception as e:
            logger.error(f"Failed to fetch pricing data: {str(e)}")
            # Return fallback pricing data
            return self._get_fallback_pricing_data()
    
    def _get_fallback_pricing_data(self) -> Dict[str, Any]:
        """Return fallback pricing data if GitHub fetch fails."""
        return {
            "gpt-5-mini": {
                "input_cost_per_token": 0.00000015,
                "output_cost_per_token": 0.0000006,
                "max_tokens": 128000,
                "max_input_tokens": 128000,
                "max_output_tokens": 16384,
                "litellm_provider": "openai",
                "mode": "chat",
                "supports_function_calling": True,
                "supports_vision": True,
                "supports_response_schema": True,
                "supports_prompt_caching": False,
                "supports_system_messages": True,
                "supports_tool_choice": True
            },
            "gpt-4o-mini": {
                "input_cost_per_token": 0.00000015,
                "output_cost_per_token": 0.0000006,
                "max_tokens": 128000,
                "max_input_tokens": 128000,
                "max_output_tokens": 16384,
                "litellm_provider": "openai",
                "mode": "chat",
                "supports_function_calling": True,
                "supports_vision": True,
                "supports_response_schema": True,
                "supports_prompt_caching": False,
                "supports_system_messages": True,
                "supports_tool_choice": True
            },
            "gpt-4o": {
                "input_cost_per_token": 0.0000025,
                "output_cost_per_token": 0.00001,
                "max_tokens": 128000,
                "max_input_tokens": 128000,
                "max_output_tokens": 16384,
                "litellm_provider": "openai",
                "mode": "chat",
                "supports_function_calling": True,
                "supports_vision": True,
                "supports_response_schema": True,
                "supports_prompt_caching": False,
                "supports_system_messages": True,
                "supports_tool_choice": True
            },
            "gpt-4": {
                "input_cost_per_token": 0.00003,
                "output_cost_per_token": 0.00006,
                "max_tokens": 8192,
                "max_input_tokens": 8192,
                "max_output_tokens": 4096,
                "litellm_provider": "openai",
                "mode": "chat",
                "supports_function_calling": True,
                "supports_vision": False,
                "supports_response_schema": False,
                "supports_prompt_caching": False,
                "supports_system_messages": True,
                "supports_tool_choice": True
            },
            "gpt-3.5-turbo": {
                "input_cost_per_token": 0.0000015,
                "output_cost_per_token": 0.000002,
                "max_tokens": 16385,
                "max_input_tokens": 16385,
                "max_output_tokens": 4096,
                "litellm_provider": "openai",
                "mode": "chat",
                "supports_function_calling": True,
                "supports_vision": False,
                "supports_response_schema": False,
                "supports_prompt_caching": False,
                "supports_system_messages": True,
                "supports_tool_choice": True
            },
            "claude-3-5-sonnet-20241022": {
                "input_cost_per_token": 0.000003,
                "output_cost_per_token": 0.000015,
                "max_tokens": 200000,
                "max_input_tokens": 200000,
                "max_output_tokens": 8192,
                "litellm_provider": "anthropic",
                "mode": "chat",
                "supports_function_calling": True,
                "supports_vision": True,
                "supports_response_schema": False,
                "supports_prompt_caching": True,
                "supports_system_messages": True,
                "supports_tool_choice": True
            },
            "claude-3-haiku-20240307": {
                "input_cost_per_token": 0.00000025,
                "output_cost_per_token": 0.00000125,
                "max_tokens": 200000,
                "max_input_tokens": 200000,
                "max_output_tokens": 4096,
                "litellm_provider": "anthropic",
                "mode": "chat",
                "supports_function_calling": True,
                "supports_vision": True,
                "supports_response_schema": False,
                "supports_prompt_caching": True,
                "supports_system_messages": True,
                "supports_tool_choice": True
            }
        }
    
    def _normalize_model_name(self, model_name: str) -> str:
        """Normalize model name to match pricing keys."""
        if not model_name:
            return "gpt-5-mini"  # Default fallback
        
        model_lower = model_name.lower()
        
        # Remove common prefixes
        model_lower = re.sub(r'^(openai/|anthropic/|gpt-|claude-)', '', model_lower)
        
        # Handle GPT models
        if "gpt-5-mini" in model_lower:
            return "gpt-5-mini"
        elif "gpt-4o-mini" in model_lower:
            return "gpt-4o-mini"
        elif "gpt-4o" in model_lower:
            return "gpt-4o"
        elif "gpt-4" in model_lower:
            return "gpt-4"
        elif "gpt-3.5" in model_lower:
            return "gpt-3.5-turbo"
        
        # Handle Claude models
        elif "claude-3-5-sonnet" in model_lower or "claude-3.5-sonnet" in model_lower:
            return "claude-3-5-sonnet-20241022"
        elif "claude-3-haiku" in model_lower:
            return "claude-3-haiku-20240307"
        elif "claude-3-sonnet" in model_lower:
            return "claude-3-sonnet-20240229"
        elif "claude-3-opus" in model_lower:
            return "claude-3-opus-20240229"
        
        # Default fallback
        return "gpt-5-mini"
    
    def calculate_cost(
        self, 
        model_name: str, 
        prompt_tokens: int, 
        completion_tokens: int
    ) -> Dict[str, Any]:
        """
        Calculate cost for token usage.
        
        Args:
            model_name: Name of the model used
            prompt_tokens: Number of input/prompt tokens
            completion_tokens: Number of output/completion tokens
            
        Returns:
            Dictionary with cost information
        """
        try:
            pricing_data = self._fetch_pricing_data()
            normalized_model = self._normalize_model_name(model_name)
            
            # Find pricing for the model
            model_pricing = None
            
            # First try exact match
            if normalized_model in pricing_data:
                model_pricing = pricing_data[normalized_model]
            else:
                # Try to find similar model in pricing data
                for price_model in pricing_data.keys():
                    if normalized_model in price_model.lower() or price_model.lower() in normalized_model:
                        model_pricing = pricing_data[price_model]
                        break
            
            # Fallback to default model if not found
            if not model_pricing:
                fallback_data = self._get_fallback_pricing_data()
                model_pricing = fallback_data.get(normalized_model, fallback_data["gpt-5-mini"])
            
            # Extract pricing information
            input_cost_per_token = model_pricing.get("input_cost_per_token", 0.00000015)
            output_cost_per_token = model_pricing.get("output_cost_per_token", 0.0000006)
            
            # Calculate costs
            input_cost = prompt_tokens * input_cost_per_token
            output_cost = completion_tokens * output_cost_per_token
            total_cost = input_cost + output_cost
            
            # Extract model metadata for enhanced display
            model_metadata = {
                "max_tokens": model_pricing.get("max_tokens"),
                "max_input_tokens": model_pricing.get("max_input_tokens"),
                "max_output_tokens": model_pricing.get("max_output_tokens"),
                "litellm_provider": model_pricing.get("litellm_provider"),
                "mode": model_pricing.get("mode"),
                "supports_function_calling": model_pricing.get("supports_function_calling", False),
                "supports_vision": model_pricing.get("supports_vision", False),
                "supports_response_schema": model_pricing.get("supports_response_schema", False),
                "supports_prompt_caching": model_pricing.get("supports_prompt_caching", False),
                "supports_system_messages": model_pricing.get("supports_system_messages", False),
                "supports_tool_choice": model_pricing.get("supports_tool_choice", False),
            }
            
            return {
                "input_cost_usd": input_cost,
                "output_cost_usd": output_cost,
                "total_cost_usd": total_cost,
                "model_used": normalized_model,
                "pricing_source": "litellm" if normalized_model in pricing_data else "fallback",
                "cost_per_1k_input_tokens": input_cost_per_token * 1000,
                "cost_per_1k_output_tokens": output_cost_per_token * 1000,
                "model_metadata": model_metadata
            }
            
        except Exception as e:
            logger.error(f"Error calculating cost: {str(e)}")
            return {
                "input_cost_usd": 0.0,
                "output_cost_usd": 0.0,
                "total_cost_usd": 0.0,
                "model_used": model_name,
                "pricing_source": "error",
                "error": str(e)
            }
    
    def calculate_trace_costs(self, schema_analytics: Dict[str, Any]) -> Dict[str, Any]:
        """
        Calculate comprehensive cost analysis for a trace.
        
        Args:
            schema_analytics: The schema analytics data from trace metadata
            
        Returns:
            Dictionary with comprehensive cost information
        """
        try:
            if not schema_analytics:
                return {"error": "No schema analytics data provided"}
            
            token_analytics = schema_analytics.get("numerical_overview", {}).get("token_analytics", {})
            prompt_analytics = schema_analytics.get("prompt_analytics", {})
            
            total_prompt_tokens = token_analytics.get("total_prompt_tokens", 0)
            total_completion_tokens = token_analytics.get("total_completion_tokens", 0)
            prompt_calls = prompt_analytics.get("prompt_calls_detected", 0)
            
            # For now, assume gpt-5-mini as default model since we don't store model info in trace
            # In future versions, this could be enhanced to detect model from trace content
            default_model = "gpt-5-mini"
            
            cost_info = self.calculate_cost(default_model, total_prompt_tokens, total_completion_tokens)
            
            # Calculate averages
            avg_prompt_tokens = total_prompt_tokens / prompt_calls if prompt_calls > 0 else 0
            avg_completion_tokens = total_completion_tokens / prompt_calls if prompt_calls > 0 else 0
            avg_cost_per_call = cost_info["total_cost_usd"] / prompt_calls if prompt_calls > 0 else 0
            
            return {
                **cost_info,
                "avg_prompt_tokens_per_call": round(avg_prompt_tokens, 1),
                "avg_completion_tokens_per_call": round(avg_completion_tokens, 1),
                "avg_cost_per_call_usd": avg_cost_per_call,
                "total_calls": prompt_calls,
                "cost_efficiency_tokens_per_dollar": (total_prompt_tokens + total_completion_tokens) / cost_info["total_cost_usd"] if cost_info["total_cost_usd"] > 0 else 0
            }
            
        except Exception as e:
            logger.error(f"Error calculating trace costs: {str(e)}")
            return {"error": str(e)}

# Global instance
cost_service = CostCalculationService()