Spaces:

ColettoGS
/

zico-agent

Sleeping

App Files Files Community

ColettoG commited on Jan 13

Commit

92f2b7d

1 Parent(s): 21d8407

deploy

Browse files

Files changed (16) hide show

requirements.txt +11 -4
src/agents/base_agent.py +0 -238
src/agents/config.py +119 -51
src/agents/conversation_manager.py +0 -275
src/agents/supervisor/agent.py +11 -20
src/app.py +388 -15
src/infrastructure/__init__.py +27 -0
src/infrastructure/logging.py +169 -0
src/infrastructure/rate_limiter.py +95 -0
src/infrastructure/retry.py +189 -0
src/llm/__init__.py +32 -0
src/llm/cost_tracker.py +295 -0
src/llm/exceptions.py +49 -0
src/llm/factory.py +285 -0
src/service/chat_manager.py +56 -0
src/service/panorama_store.py +96 -0

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
 python-dotenv==1.0.0
-google-generativeai>=0.8.0
-langchain-google-genai>=2.0.0
 langchain-core>=0.2.43
 fastapi==0.109.2
 uvicorn==0.27.1
@@ -13,6 +13,9 @@ langgraph-supervisor>=0.0.1
 langchain>=0.2.0
 langchain-community>=0.2.0
 # Testing dependencies
 pytest==8.0.0
@@ -25,9 +28,13 @@ numpy>=1.24.0
 pandas>=2.0.0
 # Monitoring and logging
-structlog>=23.0.0
-prometheus-client>=0.17.0
 langsmith>=0.1.0
 # Database (optional for production)
 sqlalchemy>=2.0.0

 python-dotenv==1.0.0
+google-genai>=1.0.0
+langchain-google-genai>=4.1.0
 langchain-core>=0.2.43
 fastapi==0.109.2
 uvicorn==0.27.1
 langchain>=0.2.0
 langchain-community>=0.2.0
+# Multi-provider LLM support
+langchain-openai>=0.2.0
+langchain-anthropic>=0.2.0
 # Testing dependencies
 pytest==8.0.0
 pandas>=2.0.0
 # Monitoring and logging
+structlog>=24.0.0
+prometheus-client>=0.20.0
 langsmith>=0.1.0
+colorlog>=6.8.0
+# Rate limiting
+slowapi>=0.1.9
 # Database (optional for production)
 sqlalchemy>=2.0.0

src/agents/base_agent.py DELETED Viewed

@@ -1,238 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Dict, Any, List, Optional
-import logging
-from datetime import datetime
-from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
-from langchain_core.language_models import BaseLanguageModel
-from src.models.chatMessage import ChatMessage, AgentResponse, AgentType, MessageRole
-from src.agents.config import Config
-class BaseAgent(ABC):
-    """Base class for all agents in the multi-agent system"""
-    def __init__(self, name: str, agent_type: AgentType, llm: BaseLanguageModel, description: str = ""):
-        self.name = name
-        self.agent_type = agent_type
-        self.llm = llm
-        self.description = description
-        self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
-        # Agent state
-        self.is_active = True
-        self.created_at = datetime.utcnow()
-        self.last_used = None
-        self.usage_count = 0
-        # Performance metrics
-        self.response_times = []
-        self.success_count = 0
-        self.error_count = 0
-        self.logger.info(f"Initialized agent {name} of type {agent_type}")
-    @abstractmethod
-    async def process_message(self, message: str, context: Dict[str, Any] = None) -> AgentResponse:
-        """Process a message and return a response"""
-        pass
-    @abstractmethod
-    def get_capabilities(self) -> List[str]:
-        """Return list of agent capabilities"""
-        pass
-    def can_handle(self, message: str, context: Dict[str, Any] = None) -> bool:
-        """Determine if this agent can handle the given message"""
-        # Default implementation - can be overridden by subclasses
-        return True
-    def get_confidence_score(self, message: str, context: Dict[str, Any] = None) -> float:
-        """Get confidence score for handling this message (0.0 to 1.0)"""
-        # Default implementation - can be overridden by subclasses
-        return 0.5
-    def prepare_context_messages(self, context: Dict[str, Any] = None) -> List[SystemMessage]:
-        """Prepare context messages for the LLM"""
-        context_messages = []
-        if context:
-            # Add relevant context information
-            if context.get("crypto_related"):
-                context_messages.append(SystemMessage(
-                    content="This conversation involves cryptocurrency-related topics."
-                ))
-            if context.get("user_preferences"):
-                context_messages.append(SystemMessage(
-                    content=f"User preferences: {context['user_preferences']}"
-                ))
-            if context.get("conversation_history"):
-                context_messages.append(SystemMessage(
-                    content=f"Previous context: {context['conversation_history']}"
-                ))
-        return context_messages
-    def create_agent_response(
-        self,
-        content: str,
-        success: bool = True,
-        error_message: Optional[str] = None,
-        metadata: Dict[str, Any] = None,
-        tools_used: List[str] = None,
-        next_agent: Optional[str] = None,
-        requires_followup: bool = False
-    ) -> AgentResponse:
-        """Create a standardized agent response"""
-        return AgentResponse(
-            content=content,
-            agent_name=self.name,
-            agent_type=self.agent_type,
-            success=success,
-            error_message=error_message,
-            metadata=metadata or {},
-            tools_used=tools_used or [],
-            next_agent=next_agent,
-            requires_followup=requires_followup,
-            timestamp=datetime.utcnow()
-        )
-    def update_metrics(self, response_time: float, success: bool):
-        """Update agent performance metrics"""
-        self.response_times.append(response_time)
-        self.last_used = datetime.utcnow()
-        self.usage_count += 1
-        if success:
-            self.success_count += 1
-        else:
-            self.error_count += 1
-        # Keep only last 100 response times
-        if len(self.response_times) > 100:
-            self.response_times = self.response_times[-100:]
-    def get_performance_metrics(self) -> Dict[str, Any]:
-        """Get agent performance metrics"""
-        avg_response_time = sum(self.response_times) / len(self.response_times) if self.response_times else 0
-        success_rate = self.success_count / self.usage_count if self.usage_count > 0 else 0
-        return {
-            "name": self.name,
-            "agent_type": self.agent_type.value,
-            "usage_count": self.usage_count,
-            "success_count": self.success_count,
-            "error_count": self.error_count,
-            "success_rate": success_rate,
-            "average_response_time": avg_response_time,
-            "last_used": self.last_used.isoformat() if self.last_used else None,
-            "is_active": self.is_active
-        }
-    def activate(self):
-        """Activate the agent"""
-        self.is_active = True
-        self.logger.info(f"Agent {self.name} activated")
-    def deactivate(self):
-        """Deactivate the agent"""
-        self.is_active = False
-        self.logger.info(f"Agent {self.name} deactivated")
-    def reset_metrics(self):
-        """Reset performance metrics"""
-        self.response_times = []
-        self.success_count = 0
-        self.error_count = 0
-        self.usage_count = 0
-        self.logger.info(f"Reset metrics for agent {self.name}")
-    def get_agent_info(self) -> Dict[str, Any]:
-        """Get comprehensive agent information"""
-        return {
-            "name": self.name,
-            "type": self.agent_type.value,
-            "description": self.description,
-            "capabilities": self.get_capabilities(),
-            "is_active": self.is_active,
-            "created_at": self.created_at.isoformat(),
-            "last_used": self.last_used.isoformat() if self.last_used else None,
-            "performance_metrics": self.get_performance_metrics()
-        }
-class AgentRegistry:
-    """Registry for managing all agents in the system"""
-    def __init__(self):
-        self.agents: Dict[str, BaseAgent] = {}
-        self.logger = logging.getLogger(__name__)
-    def register_agent(self, agent: BaseAgent) -> None:
-        """Register an agent"""
-        if agent.name in self.agents:
-            self.logger.warning(f"Agent {agent.name} already registered, overwriting")
-        self.agents[agent.name] = agent
-        self.logger.info(f"Registered agent {agent.name}")
-    def unregister_agent(self, agent_name: str) -> bool:
-        """Unregister an agent"""
-        if agent_name in self.agents:
-            del self.agents[agent_name]
-            self.logger.info(f"Unregistered agent {agent_name}")
-            return True
-        return False
-    def get_agent(self, agent_name: str) -> Optional[BaseAgent]:
-        """Get agent by name"""
-        return self.agents.get(agent_name)
-    def get_active_agents(self) -> List[BaseAgent]:
-        """Get all active agents"""
-        return [agent for agent in self.agents.values() if agent.is_active]
-    def get_agents_by_type(self, agent_type: AgentType) -> List[BaseAgent]:
-        """Get agents by type"""
-        return [agent for agent in self.agents.values() if agent.agent_type == agent_type]
-    def find_best_agent(self, message: str, context: Dict[str, Any] = None) -> Optional[BaseAgent]:
-        """Find the best agent to handle a message"""
-        best_agent = None
-        best_score = 0.0
-        for agent in self.get_active_agents():
-            if agent.can_handle(message, context):
-                confidence = agent.get_confidence_score(message, context)
-                if confidence > best_score:
-                    best_score = confidence
-                    best_agent = agent
-        return best_agent
-    def get_all_agents_info(self) -> List[Dict[str, Any]]:
-        """Get information about all agents"""
-        return [agent.get_agent_info() for agent in self.agents.values()]
-    def get_agent_performance_summary(self) -> Dict[str, Any]:
-        """Get performance summary for all agents"""
-        total_agents = len(self.agents)
-        active_agents = len(self.get_active_agents())
-        total_usage = sum(agent.usage_count for agent in self.agents.values())
-        total_success = sum(agent.success_count for agent in self.agents.values())
-        return {
-            "total_agents": total_agents,
-            "active_agents": active_agents,
-            "total_usage": total_usage,
-            "total_success": total_success,
-            "overall_success_rate": total_success / total_usage if total_usage > 0 else 0,
-            "agents": self.get_all_agents_info()
-        }
-# Global agent registry
-agent_registry = AgentRegistry()

src/agents/config.py CHANGED Viewed

@@ -1,25 +1,34 @@
 import os
 from dotenv import load_dotenv
-from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
-from typing import Optional
 load_dotenv()
-gemini_api_key = os.getenv("GEMINI_API_KEY")
-if not gemini_api_key:
-    raise ValueError("GEMINI_API_KEY não encontrada nas variáveis de ambiente")
 class Config:
-    # Model configuration
-    GEMINI_MODEL = "gemini-2.5-flash"
-    GEMINI_EMBEDDING_MODEL = "models/embedding-001"
-    GEMINI_API_KEY = gemini_api_key
     # Application configuration
     MAX_UPLOAD_LENGTH = 16 * 1024 * 1024
-    MAX_CONVERSATION_LENGTH = 100  # Maximum messages per conversation
-    MAX_CONTEXT_MESSAGES = 10  # Maximum messages to include in context
     # Agent configuration
     AGENTS_CONFIG = {
         "agents": [
@@ -28,84 +37,143 @@ class Config:
                 "description": "Handles cryptocurrency-related queries",
                 "type": "specialized",
                 "enabled": True,
-                "priority": 1
             },
             {
                 "name": "general",
                 "description": "Handles general conversation and queries",
                 "type": "general",
                 "enabled": True,
-                "priority": 2
-            }
         ]
     }
     # LangGraph configuration
     LANGGRAPH_CONFIG = {
         "max_iterations": 10,
         "timeout": 30,
         "memory_window": 10,
-        "enable_memory": True
     }
     # Conversation configuration
     CONVERSATION_CONFIG = {
         "default_user_id": "anonymous",
         "max_conversations_per_user": 50,
         "conversation_timeout_hours": 24,
-        "enable_context_extraction": True
     }
-    # LLM instances (singleton pattern)
-    _llm_instance: Optional[ChatGoogleGenerativeAI] = None
-    _embeddings_instance: Optional[GoogleGenerativeAIEmbeddings] = None
     @classmethod
-    def get_llm(cls) -> ChatGoogleGenerativeAI:
-        """Get or create LLM instance (singleton)"""
-        if cls._llm_instance is None:
-            cls._llm_instance = ChatGoogleGenerativeAI(
-                model=cls.GEMINI_MODEL,
-                temperature=0.7,
-                google_api_key=cls.GEMINI_API_KEY
-            )
-        return cls._llm_instance
     @classmethod
     def get_embeddings(cls) -> GoogleGenerativeAIEmbeddings:
-        """Get or create embeddings instance (singleton)"""
         if cls._embeddings_instance is None:
             cls._embeddings_instance = GoogleGenerativeAIEmbeddings(
-                model=cls.GEMINI_EMBEDDING_MODEL,
-                google_api_key=cls.GEMINI_API_KEY
             )
         return cls._embeddings_instance
     @classmethod
-    def get_agent_config(cls, agent_name: str) -> Optional[dict]:
-        """Get configuration for a specific agent"""
         for agent in cls.AGENTS_CONFIG["agents"]:
             if agent["name"] == agent_name:
                 return agent
         return None
     @classmethod
-    def get_enabled_agents(cls) -> list:
-        """Get list of enabled agents"""
         return [
-            agent for agent in cls.AGENTS_CONFIG["agents"]
             if agent.get("enabled", True)
         ]
     @classmethod
     def validate_config(cls) -> bool:
-        """Validate configuration"""
         try:
-            # Test LLM connection
-            llm = cls.get_llm()
-            # Test embeddings connection
             embeddings = cls.get_embeddings()
             return True
         except Exception as e:
             print(f"Configuration validation failed: {e}")
-            return False

 import os
+from typing import Literal
 from dotenv import load_dotenv
+from langchain_core.language_models import BaseChatModel
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
+from src.llm import LLMFactory, CostTrackingCallback
 load_dotenv()
+# Type alias for providers
+Provider = Literal["google", "openai", "anthropic"]
 class Config:
+    """Application configuration with multi-provider LLM support."""
+    # Default model configuration
+    DEFAULT_MODEL = os.getenv("DEFAULT_LLM_MODEL", "gemini-3-pro-preview")
+    DEFAULT_TEMPERATURE = float(os.getenv("DEFAULT_LLM_TEMPERATURE", "0.7"))
+    DEFAULT_PROVIDER: Provider = "google"
+    # Embedding configuration
+    EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "models/embedding-001")
     # Application configuration
     MAX_UPLOAD_LENGTH = 16 * 1024 * 1024
+    MAX_CONVERSATION_LENGTH = 100
+    MAX_CONTEXT_MESSAGES = 10
     # Agent configuration
     AGENTS_CONFIG = {
         "agents": [
                 "description": "Handles cryptocurrency-related queries",
                 "type": "specialized",
                 "enabled": True,
+                "priority": 1,
             },
             {
                 "name": "general",
                 "description": "Handles general conversation and queries",
                 "type": "general",
                 "enabled": True,
+                "priority": 2,
+            },
         ]
     }
     # LangGraph configuration
     LANGGRAPH_CONFIG = {
         "max_iterations": 10,
         "timeout": 30,
         "memory_window": 10,
+        "enable_memory": True,
     }
     # Conversation configuration
     CONVERSATION_CONFIG = {
         "default_user_id": "anonymous",
         "max_conversations_per_user": 50,
         "conversation_timeout_hours": 24,
+        "enable_context_extraction": True,
     }
+    # Instance caches
+    _llm_instance: BaseChatModel | None = None
+    _embeddings_instance: GoogleGenerativeAIEmbeddings | None = None
+    _cost_tracker: CostTrackingCallback | None = None
     @classmethod
+    def get_llm(
+        cls,
+        model: str | None = None,
+        temperature: float | None = None,
+        with_cost_tracking: bool = True,
+    ) -> BaseChatModel:
+        """
+        Get or create LLM instance using the factory.
+        Args:
+            model: Model name (defaults to DEFAULT_MODEL)
+            temperature: Sampling temperature (defaults to DEFAULT_TEMPERATURE)
+            with_cost_tracking: Whether to attach cost tracking callback
+        Returns:
+            BaseChatModel instance
+        """
+        model = model or cls.DEFAULT_MODEL
+        temperature = temperature if temperature is not None else cls.DEFAULT_TEMPERATURE
+        # Use cache for default config
+        use_cache = model == cls.DEFAULT_MODEL and temperature == cls.DEFAULT_TEMPERATURE
+        if use_cache and cls._llm_instance is not None:
+            return cls._llm_instance
+        # Build callbacks
+        callbacks = []
+        if with_cost_tracking:
+            callbacks.append(cls.get_cost_tracker())
+        llm = LLMFactory.create(
+            model=model,
+            temperature=temperature,
+            callbacks=callbacks if callbacks else None,
+            use_cache=False,  # We handle caching ourselves
+        )
+        if use_cache:
+            cls._llm_instance = llm
+        return llm
     @classmethod
     def get_embeddings(cls) -> GoogleGenerativeAIEmbeddings:
+        """Get or create embeddings instance (singleton)."""
         if cls._embeddings_instance is None:
             cls._embeddings_instance = GoogleGenerativeAIEmbeddings(
+                model=cls.EMBEDDING_MODEL,
+                google_api_key=os.getenv("GEMINI_API_KEY"),
             )
         return cls._embeddings_instance
+    @classmethod
+    def get_cost_tracker(cls) -> CostTrackingCallback:
+        """Get or create cost tracker instance (singleton)."""
+        if cls._cost_tracker is None:
+            cls._cost_tracker = CostTrackingCallback(log_calls=True)
+        return cls._cost_tracker
     @classmethod
+    def get_agent_config(cls, agent_name: str) -> dict | None:
+        """Get configuration for a specific agent."""
         for agent in cls.AGENTS_CONFIG["agents"]:
             if agent["name"] == agent_name:
                 return agent
         return None
     @classmethod
+    def get_enabled_agents(cls) -> list[dict]:
+        """Get list of enabled agents."""
         return [
+            agent
+            for agent in cls.AGENTS_CONFIG["agents"]
             if agent.get("enabled", True)
         ]
+    @classmethod
+    def list_available_models(cls) -> list[str]:
+        """List all available LLM models."""
+        return LLMFactory.list_models()
+    @classmethod
+    def list_available_providers(cls) -> list[str]:
+        """List all available LLM providers."""
+        return LLMFactory.list_providers()
     @classmethod
     def validate_config(cls) -> bool:
+        """Validate configuration by testing connections."""
         try:
+            llm = cls.get_llm(with_cost_tracking=False)
             embeddings = cls.get_embeddings()
             return True
         except Exception as e:
             print(f"Configuration validation failed: {e}")
+            return False
+    @classmethod
+    def reset_instances(cls) -> None:
+        """Reset all cached instances."""
+        cls._llm_instance = None
+        cls._embeddings_instance = None
+        if cls._cost_tracker:
+            cls._cost_tracker.reset()
+        LLMFactory.clear_cache()

src/agents/conversation_manager.py DELETED Viewed

@@ -1,275 +0,0 @@
-import logging
-import uuid
-from typing import Dict, List, Optional, Any
-from datetime import datetime, timedelta
-from dataclasses import dataclass, asdict
-import json
-from src.models.chatMessage import ConversationState, ChatMessage, MessageRole, AgentType
-logger = logging.getLogger(__name__)
-@dataclass
-class ConversationMetadata:
-    """Metadata for conversation tracking"""
-    conversation_id: str
-    user_id: str
-    created_at: datetime
-    updated_at: datetime
-    message_count: int
-    current_agent: Optional[str]
-    is_active: bool
-    context_summary: Dict[str, Any]
-class ConversationManager:
-    """Manages conversation state and persistence for multi-agent system"""
-    def __init__(self):
-        self.conversations: Dict[str, ConversationState] = {}
-        self.metadata: Dict[str, ConversationMetadata] = {}
-        self.user_conversations: Dict[str, List[str]] = {}
-    def create_conversation(self, user_id: str, conversation_id: Optional[str] = None) -> str:
-        """Create a new conversation"""
-        if not conversation_id:
-            conversation_id = str(uuid.uuid4())
-        key = f"{user_id}:{conversation_id}"
-        # Create conversation state
-        conversation_state = ConversationState(
-            conversation_id=conversation_id,
-            user_id=user_id,
-            messages=[],
-            context={},
-            memory={},
-            agent_history=[],
-            current_agent=None,
-            last_message_id=None,
-            created_at=datetime.utcnow(),
-            updated_at=datetime.utcnow(),
-            is_active=True
-        )
-        # Create metadata
-        metadata = ConversationMetadata(
-            conversation_id=conversation_id,
-            user_id=user_id,
-            created_at=datetime.utcnow(),
-            updated_at=datetime.utcnow(),
-            message_count=0,
-            current_agent=None,
-            is_active=True,
-            context_summary={}
-        )
-        # Store conversation
-        self.conversations[key] = conversation_state
-        self.metadata[key] = metadata
-        # Update user conversations
-        if user_id not in self.user_conversations:
-            self.user_conversations[user_id] = []
-        self.user_conversations[user_id].append(conversation_id)
-        logger.info(f"Created conversation {conversation_id} for user {user_id}")
-        return conversation_id
-    def get_conversation(self, conversation_id: str, user_id: str) -> Optional[ConversationState]:
-        """Get conversation by ID and user"""
-        key = f"{user_id}:{conversation_id}"
-        return self.conversations.get(key)
-    def get_or_create_conversation(self, conversation_id: str, user_id: str) -> ConversationState:
-        """Get existing conversation or create new one"""
-        conversation = self.get_conversation(conversation_id, user_id)
-        if not conversation:
-            self.create_conversation(user_id, conversation_id)
-            conversation = self.get_conversation(conversation_id, user_id)
-        return conversation
-    def add_message(self, conversation_id: str, user_id: str, message: ChatMessage) -> None:
-        """Add message to conversation"""
-        conversation = self.get_or_create_conversation(conversation_id, user_id)
-        key = f"{user_id}:{conversation_id}"
-        # Add message
-        conversation.messages.append(message)
-        conversation.last_message_id = message.message_id
-        conversation.updated_at = datetime.utcnow()
-        # Update metadata
-        if key in self.metadata:
-            self.metadata[key].message_count = len(conversation.messages)
-            self.metadata[key].updated_at = datetime.utcnow()
-            self.metadata[key].current_agent = conversation.current_agent
-        logger.info(f"Added message to conversation {conversation_id}")
-    def update_conversation_context(self, conversation_id: str, user_id: str, context_updates: Dict[str, Any]) -> None:
-        """Update conversation context"""
-        conversation = self.get_conversation(conversation_id, user_id)
-        if conversation:
-            conversation.context.update(context_updates)
-            conversation.updated_at = datetime.utcnow()
-            # Update metadata
-            key = f"{user_id}:{conversation_id}"
-            if key in self.metadata:
-                self.metadata[key].context_summary.update(context_updates)
-                self.metadata[key].updated_at = datetime.utcnow()
-    def update_agent_history(self, conversation_id: str, user_id: str, agent_info: Dict[str, Any]) -> None:
-        """Update agent interaction history"""
-        conversation = self.get_conversation(conversation_id, user_id)
-        if conversation:
-            conversation.agent_history.append(agent_info)
-            conversation.updated_at = datetime.utcnow()
-    def get_conversation_messages(self, conversation_id: str, user_id: str, limit: Optional[int] = None) -> List[ChatMessage]:
-        """Get messages from conversation"""
-        conversation = self.get_conversation(conversation_id, user_id)
-        if not conversation:
-            return []
-        messages = conversation.messages
-        if limit:
-            messages = messages[-limit:]
-        return messages
-    def get_user_conversations(self, user_id: str) -> List[Dict[str, Any]]:
-        """Get all conversations for a user"""
-        user_conversations = []
-        for conversation_id in self.user_conversations.get(user_id, []):
-            key = f"{user_id}:{conversation_id}"
-            metadata = self.metadata.get(key)
-            if metadata:
-                user_conversations.append(asdict(metadata))
-        return user_conversations
-    def delete_conversation(self, conversation_id: str, user_id: str) -> bool:
-        """Delete a conversation"""
-        key = f"{user_id}:{conversation_id}"
-        if key in self.conversations:
-            del self.conversations[key]
-        if key in self.metadata:
-            del self.metadata[key]
-        # Remove from user conversations
-        if user_id in self.user_conversations:
-            if conversation_id in self.user_conversations[user_id]:
-                self.user_conversations[user_id].remove(conversation_id)
-        logger.info(f"Deleted conversation {conversation_id} for user {user_id}")
-        return True
-    def reset_conversation(self, conversation_id: str, user_id: str) -> None:
-        """Reset conversation (clear messages but keep conversation)"""
-        conversation = self.get_conversation(conversation_id, user_id)
-        if conversation:
-            conversation.messages = []
-            conversation.context = {}
-            conversation.agent_history = []
-            conversation.current_agent = None
-            conversation.last_message_id = None
-            conversation.updated_at = datetime.utcnow()
-            # Update metadata
-            key = f"{user_id}:{conversation_id}"
-            if key in self.metadata:
-                self.metadata[key].message_count = 0
-                self.metadata[key].current_agent = None
-                self.metadata[key].context_summary = {}
-                self.metadata[key].updated_at = datetime.utcnow()
-    def cleanup_old_conversations(self, max_age_hours: int = 24) -> int:
-        """Clean up old conversations"""
-        cutoff_time = datetime.utcnow() - timedelta(hours=max_age_hours)
-        deleted_count = 0
-        conversations_to_delete = []
-        for key, metadata in self.metadata.items():
-            if metadata.updated_at < cutoff_time and not metadata.is_active:
-                conversations_to_delete.append(key)
-        for key in conversations_to_delete:
-            user_id, conversation_id = key.split(":", 1)
-            if self.delete_conversation(conversation_id, user_id):
-                deleted_count += 1
-        logger.info(f"Cleaned up {deleted_count} old conversations")
-        return deleted_count
-    def get_conversation_stats(self, user_id: str) -> Dict[str, Any]:
-        """Get conversation statistics for a user"""
-        user_conversations = self.get_user_conversations(user_id)
-        total_conversations = len(user_conversations)
-        active_conversations = sum(1 for conv in user_conversations if conv["is_active"])
-        total_messages = sum(conv["message_count"] for conv in user_conversations)
-        # Agent usage statistics
-        agent_usage = {}
-        for conv in user_conversations:
-            conversation = self.get_conversation(conv["conversation_id"], user_id)
-            if conversation:
-                for agent_info in conversation.agent_history:
-                    agent_name = agent_info.get("agent", "unknown")
-                    agent_usage[agent_name] = agent_usage.get(agent_name, 0) + 1
-        return {
-            "total_conversations": total_conversations,
-            "active_conversations": active_conversations,
-            "total_messages": total_messages,
-            "agent_usage": agent_usage,
-            "average_messages_per_conversation": total_messages / total_conversations if total_conversations > 0 else 0
-        }
-    def export_conversation(self, conversation_id: str, user_id: str) -> Dict[str, Any]:
-        """Export conversation data"""
-        conversation = self.get_conversation(conversation_id, user_id)
-        if not conversation:
-            return {}
-        return {
-            "conversation_id": conversation_id,
-            "user_id": user_id,
-            "messages": [msg.dict() for msg in conversation.messages],
-            "context": conversation.context,
-            "agent_history": conversation.agent_history,
-            "metadata": asdict(self.metadata.get(f"{user_id}:{conversation_id}", {}))
-        }
-    def import_conversation(self, conversation_data: Dict[str, Any]) -> str:
-        """Import conversation data"""
-        conversation_id = conversation_data.get("conversation_id", str(uuid.uuid4()))
-        user_id = conversation_data.get("user_id", "anonymous")
-        # Create conversation
-        self.create_conversation(user_id, conversation_id)
-        # Import messages
-        for msg_data in conversation_data.get("messages", []):
-            message = ChatMessage(**msg_data)
-            self.add_message(conversation_id, user_id, message)
-        # Import context and history
-        conversation = self.get_conversation(conversation_id, user_id)
-        if conversation:
-            conversation.context.update(conversation_data.get("context", {}))
-            conversation.agent_history.extend(conversation_data.get("agent_history", []))
-        return conversation_id
-# Global conversation manager instance
-conversation_manager = ConversationManager()

src/agents/supervisor/agent.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
 from langgraph_supervisor import create_supervisor
 from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
 from src.agents.config import Config
@@ -29,16 +28,8 @@ from src.agents.staking.prompt import STAKING_AGENT_SYSTEM_PROMPT
 from src.agents.search.agent import SearchAgent
 from src.agents.database.client import is_database_available
-llm = ChatGoogleGenerativeAI(
-    model=Config.GEMINI_MODEL,
-    temperature=0.7,
-    google_api_key=Config.GEMINI_API_KEY
-)
-embeddings = GoogleGenerativeAIEmbeddings(
-    model=Config.GEMINI_EMBEDDING_MODEL,
-    google_api_key=Config.GEMINI_API_KEY
-)
 class ChatMessage(TypedDict):
@@ -50,7 +41,7 @@ class Supervisor:
     def __init__(self, llm):
         self.llm = llm
-        cryptoDataAgentClass = CryptoDataAgent(llm)
         cryptoDataAgent = cryptoDataAgentClass.agent
         agents = [cryptoDataAgent]
@@ -60,7 +51,7 @@ class Supervisor:
         # Conditionally include database agent
         if is_database_available():
-            databaseAgent = DatabaseAgent(llm)
             agents.append(databaseAgent)
             available_agents_text += (
                 "- database_agent: Handles database queries and data analysis. Can search and analyze data from the database.\n"
@@ -68,42 +59,42 @@ class Supervisor:
         else:
             databaseAgent = None
-        swapAgent = SwapAgent(llm)
         self.swap_agent = swapAgent.agent
         agents.append(self.swap_agent)
         available_agents_text += (
             "- swap_agent: Handles swap operations on the Avalanche network and any other swap question related.\n"
         )
-        dcaAgent = DcaAgent(llm)
         self.dca_agent = dcaAgent.agent
         agents.append(self.dca_agent)
         available_agents_text += (
             "- dca_agent: Plans DCA swap workflows, consulting strategy docs, validating parameters, and confirming automation metadata.\n"
         )
-        lendingAgent = LendingAgent(llm)
         self.lending_agent = lendingAgent.agent
         agents.append(self.lending_agent)
         available_agents_text += (
             "- lending_agent: Handles lending operations (supply, borrow, repay, withdraw) on DeFi protocols like Aave.\n"
         )
-        stakingAgent = StakingAgent(llm)
         self.staking_agent = stakingAgent.agent
         agents.append(self.staking_agent)
         available_agents_text += (
             "- staking_agent: Handles staking operations (stake ETH, unstake stETH) via Lido on Ethereum.\n"
         )
-        searchAgent = SearchAgent(llm)
         self.search_agent = searchAgent.agent
         agents.append(self.search_agent)
         available_agents_text += (
             "- search_agent: Uses web search tools for current events and factual lookups.\n"
         )
-        defaultAgent = DefaultAgent(llm)
         self.default_agent = defaultAgent.agent
         agents.append(self.default_agent)
@@ -252,7 +243,7 @@ Examples of general queries to handle directly:
         self.supervisor = create_supervisor(
             agents,
-            model=llm,
             prompt=system_prompt,
             output_mode="last_message"
         )

 from langgraph_supervisor import create_supervisor
 from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
 from src.agents.config import Config
 from src.agents.search.agent import SearchAgent
 from src.agents.database.client import is_database_available
+# Embeddings singleton
+embeddings = Config.get_embeddings()
 class ChatMessage(TypedDict):
     def __init__(self, llm):
         self.llm = llm
+        cryptoDataAgentClass = CryptoDataAgent(self.llm)
         cryptoDataAgent = cryptoDataAgentClass.agent
         agents = [cryptoDataAgent]
         # Conditionally include database agent
         if is_database_available():
+            databaseAgent = DatabaseAgent(self.llm)
             agents.append(databaseAgent)
             available_agents_text += (
                 "- database_agent: Handles database queries and data analysis. Can search and analyze data from the database.\n"
         else:
             databaseAgent = None
+        swapAgent = SwapAgent(self.llm)
         self.swap_agent = swapAgent.agent
         agents.append(self.swap_agent)
         available_agents_text += (
             "- swap_agent: Handles swap operations on the Avalanche network and any other swap question related.\n"
         )
+        dcaAgent = DcaAgent(self.llm)
         self.dca_agent = dcaAgent.agent
         agents.append(self.dca_agent)
         available_agents_text += (
             "- dca_agent: Plans DCA swap workflows, consulting strategy docs, validating parameters, and confirming automation metadata.\n"
         )
+        lendingAgent = LendingAgent(self.llm)
         self.lending_agent = lendingAgent.agent
         agents.append(self.lending_agent)
         available_agents_text += (
             "- lending_agent: Handles lending operations (supply, borrow, repay, withdraw) on DeFi protocols like Aave.\n"
         )
+        stakingAgent = StakingAgent(self.llm)
         self.staking_agent = stakingAgent.agent
         agents.append(self.staking_agent)
         available_agents_text += (
             "- staking_agent: Handles staking operations (stake ETH, unstake stETH) via Lido on Ethereum.\n"
         )
+        searchAgent = SearchAgent(self.llm)
         self.search_agent = searchAgent.agent
         agents.append(self.search_agent)
         available_agents_text += (
             "- search_agent: Uses web search tools for current events and factual lookups.\n"
         )
+        defaultAgent = DefaultAgent(self.llm)
         self.default_agent = defaultAgent.agent
         agents.append(self.default_agent)
         self.supervisor = create_supervisor(
             agents,
+            model=self.llm,
             prompt=system_prompt,
             output_mode="last_message"
         )

src/app.py CHANGED Viewed

@@ -1,16 +1,15 @@
-import logging
-logging.basicConfig(
-    level=logging.DEBUG,
-    format="%(asctime)s %(levelname)s %(name)s: %(message)s",
-    handlers=[logging.StreamHandler()]
-)
-logging.info("Test log from app.py startup")
-from fastapi import FastAPI, HTTPException, Request
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
-from typing import List
-import re
 from src.agents.config import Config
 from src.agents.supervisor.agent import Supervisor
 from src.models.chatMessage import ChatMessage
@@ -19,8 +18,23 @@ from src.service.chat_manager import chat_manager_instance
 from src.agents.crypto_data.tools import get_coingecko_id, get_tradingview_symbol
 from src.agents.metadata import metadata
 # Initialize FastAPI app
-app = FastAPI(title="Zico Agent API", version="1.0")
 # Enable CORS for local/frontend dev
 app.add_middleware(
@@ -31,9 +45,8 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# Instantiate Supervisor agent (singleton LLM)
-supervisor = Supervisor(Config.get_llm())
-logger = logging.getLogger(__name__)
 class ChatRequest(BaseModel):
     message: ChatMessage
@@ -156,6 +169,54 @@ def _resolve_identity(request: ChatRequest) -> tuple[str, str]:
 def health_check():
     return {"status": "ok"}
 @app.get("/chat/messages")
 def get_messages(request: Request):
     params = request.query_params
@@ -214,13 +275,27 @@ def chat(request: ChatRequest):
             conversation_id=conversation_id,
             user_id=user_id
         )
         # Invoke the supervisor agent with the conversation
         result = supervisor.invoke(
             conversation_messages,
             conversation_id=conversation_id,
             user_id=user_id,
         )
         logger.debug(
             "Supervisor returned result for user=%s conversation=%s: %s",
             user_id,
@@ -353,5 +428,303 @@ def chat(request: ChatRequest):
         )
         raise HTTPException(status_code=500, detail=str(e))
 # Include chat manager router
 app.include_router(chat_manager_router)

+import base64
+import json
+import os
+from typing import List, Optional
+from fastapi import FastAPI, File, Form, HTTPException, Request, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
+from langchain_core.messages import HumanMessage
 from pydantic import BaseModel
+from src.infrastructure.logging import setup_logging, get_logger
+from src.infrastructure.rate_limiter import setup_rate_limiter, limiter
 from src.agents.config import Config
 from src.agents.supervisor.agent import Supervisor
 from src.models.chatMessage import ChatMessage
 from src.agents.crypto_data.tools import get_coingecko_id, get_tradingview_symbol
 from src.agents.metadata import metadata
+# Setup structured logging
+log_level = os.getenv("LOG_LEVEL", "INFO")
+log_format = os.getenv("LOG_FORMAT", "color")
+setup_logging(level=log_level, format_type=log_format)
+logger = get_logger(__name__)
+logger.info("Starting Zico Agent API")
 # Initialize FastAPI app
+app = FastAPI(
+    title="Zico Agent API",
+    version="2.0",
+    description="Multi-agent AI assistant with streaming support",
+)
+# Setup rate limiting
+setup_rate_limiter(app)
 # Enable CORS for local/frontend dev
 app.add_middleware(
     allow_headers=["*"],
 )
+# Instantiate Supervisor agent (singleton LLM with cost tracking)
+supervisor = Supervisor(Config.get_llm(with_cost_tracking=True))
 class ChatRequest(BaseModel):
     message: ChatMessage
 def health_check():
     return {"status": "ok"}
+@app.get("/costs")
+def get_costs():
+    """Get current LLM cost summary."""
+    cost_tracker = Config.get_cost_tracker()
+    return cost_tracker.get_summary()
+@app.get("/costs/detailed")
+def get_detailed_costs():
+    """Get detailed LLM cost report."""
+    cost_tracker = Config.get_cost_tracker()
+    return cost_tracker.get_detailed_report()
+@app.get("/costs/conversation")
+def get_conversation_costs(request: Request):
+    """Get accumulated LLM costs for a specific conversation."""
+    params = request.query_params
+    conversation_id = params.get("conversation_id")
+    user_id = params.get("user_id")
+    if not conversation_id or not user_id:
+        raise HTTPException(
+            status_code=400,
+            detail="Both 'conversation_id' and 'user_id' query parameters are required.",
+        )
+    costs = chat_manager_instance.get_conversation_costs(
+        conversation_id=conversation_id,
+        user_id=user_id,
+    )
+    return {
+        "conversation_id": conversation_id,
+        "user_id": user_id,
+        "costs": costs,
+    }
+@app.get("/models")
+def get_available_models():
+    """List available LLM models."""
+    return {
+        "models": Config.list_available_models(),
+        "providers": Config.list_available_providers(),
+        "default": Config.DEFAULT_MODEL,
+    }
 @app.get("/chat/messages")
 def get_messages(request: Request):
     params = request.query_params
             conversation_id=conversation_id,
             user_id=user_id
         )
+        # Take cost snapshot before invoking
+        cost_tracker = Config.get_cost_tracker()
+        cost_snapshot = cost_tracker.get_snapshot()
         # Invoke the supervisor agent with the conversation
         result = supervisor.invoke(
             conversation_messages,
             conversation_id=conversation_id,
             user_id=user_id,
         )
+        # Calculate and save cost delta for this request
+        cost_delta = cost_tracker.calculate_delta(cost_snapshot)
+        if cost_delta.get("cost", 0) > 0 or cost_delta.get("calls", 0) > 0:
+            chat_manager_instance.update_conversation_costs(
+                cost_delta,
+                conversation_id=conversation_id,
+                user_id=user_id,
+            )
         logger.debug(
             "Supervisor returned result for user=%s conversation=%s: %s",
             user_id,
         )
         raise HTTPException(status_code=500, detail=str(e))
+# Supported audio MIME types
+AUDIO_MIME_TYPES = {
+    ".mp3": "audio/mpeg",
+    ".wav": "audio/wav",
+    ".flac": "audio/flac",
+    ".ogg": "audio/ogg",
+    ".webm": "audio/webm",
+    ".m4a": "audio/mp4",
+    ".aac": "audio/aac",
+}
+# Max audio file size (20MB)
+MAX_AUDIO_SIZE = 20 * 1024 * 1024
+def _get_audio_mime_type(filename: str, content_type: str | None) -> str:
+    """Determine the MIME type for an audio file."""
+    # Try from filename extension first
+    if filename:
+        ext = os.path.splitext(filename.lower())[1]
+        if ext in AUDIO_MIME_TYPES:
+            return AUDIO_MIME_TYPES[ext]
+    # Fall back to content type from upload
+    if content_type and content_type.startswith("audio/"):
+        return content_type
+    # Default to mpeg
+    return "audio/mpeg"
+@app.post("/chat/audio")
+async def chat_audio(
+    audio: UploadFile = File(..., description="Audio file (mp3, wav, flac, ogg, webm, m4a)"),
+    user_id: str = Form(..., description="User ID"),
+    conversation_id: str = Form(..., description="Conversation ID"),
+    wallet_address: str = Form("default", description="Wallet address"),
+):
+    """
+    Process audio input through the agent pipeline.
+    The audio is first transcribed using Gemini, then the transcription
+    is passed to the supervisor agent for processing (just like text input).
+    """
+    request_user_id: str | None = user_id
+    request_conversation_id: str | None = conversation_id
+    try:
+        # Validate user_id
+        if not user_id or user_id.lower() == "anonymous":
+            wallet = (wallet_address or "").strip()
+            if wallet and wallet.lower() != "default":
+                request_user_id = f"wallet::{wallet.lower()}"
+            else:
+                raise HTTPException(
+                    status_code=400,
+                    detail="A stable 'user_id' or wallet_address is required.",
+                )
+        logger.debug(
+            "Received audio chat request user=%s conversation=%s filename=%s",
+            request_user_id,
+            request_conversation_id,
+            audio.filename,
+        )
+        # Validate file size
+        audio_content = await audio.read()
+        if len(audio_content) > MAX_AUDIO_SIZE:
+            raise HTTPException(
+                status_code=413,
+                detail=f"Audio file too large. Maximum size is {MAX_AUDIO_SIZE // (1024*1024)}MB.",
+            )
+        if len(audio_content) == 0:
+            raise HTTPException(
+                status_code=400,
+                detail="Audio file is empty.",
+            )
+        # Get MIME type
+        mime_type = _get_audio_mime_type(audio.filename or "", audio.content_type)
+        logger.debug("Audio MIME type: %s, size: %d bytes", mime_type, len(audio_content))
+        # Encode audio to base64
+        encoded_audio = base64.b64encode(audio_content).decode("utf-8")
+        # Ensure session exists
+        wallet = wallet_address.strip() if wallet_address else None
+        if wallet and wallet.lower() == "default":
+            wallet = None
+        chat_manager_instance.ensure_session(
+            request_user_id,
+            request_conversation_id,
+            wallet_address=wallet,
+        )
+        # Take cost snapshot before invoking
+        cost_tracker = Config.get_cost_tracker()
+        cost_snapshot = cost_tracker.get_snapshot()
+        # Step 1: Transcribe the audio using Gemini
+        transcription_message = HumanMessage(
+            content=[
+                {"type": "text", "text": "Transcribe exactly what is being said in this audio. Return ONLY the transcription, nothing else."},
+                {"type": "media", "data": encoded_audio, "mime_type": mime_type},
+            ]
+        )
+        llm = Config.get_llm(with_cost_tracking=True)
+        transcription_response = llm.invoke([transcription_message])
+        # Extract transcription text
+        transcribed_text = transcription_response.content
+        if isinstance(transcribed_text, list):
+            text_parts = []
+            for part in transcribed_text:
+                if isinstance(part, dict) and part.get("text"):
+                    text_parts.append(part["text"])
+                elif isinstance(part, str):
+                    text_parts.append(part)
+            transcribed_text = " ".join(text_parts).strip()
+        if not transcribed_text:
+            raise HTTPException(
+                status_code=400,
+                detail="Could not transcribe the audio. Please try again with a clearer recording.",
+            )
+        logger.info("Audio transcribed: %s", transcribed_text[:200])
+        # Step 2: Store the user message with the transcription
+        user_message = ChatMessage(
+            role="user",
+            content=transcribed_text,
+            metadata={
+                "source": "audio",
+                "audio_filename": audio.filename,
+                "audio_size": len(audio_content),
+                "audio_mime_type": mime_type,
+            },
+        )
+        chat_manager_instance.add_message(
+            message=user_message.dict(),
+            conversation_id=request_conversation_id,
+            user_id=request_user_id,
+        )
+        # Step 3: Get conversation history and invoke supervisor
+        conversation_messages = chat_manager_instance.get_messages(
+            conversation_id=request_conversation_id,
+            user_id=request_user_id,
+        )
+        result = supervisor.invoke(
+            conversation_messages,
+            conversation_id=request_conversation_id,
+            user_id=request_user_id,
+        )
+        # Calculate and save cost delta
+        cost_delta = cost_tracker.calculate_delta(cost_snapshot)
+        if cost_delta.get("cost", 0) > 0 or cost_delta.get("calls", 0) > 0:
+            chat_manager_instance.update_conversation_costs(
+                cost_delta,
+                conversation_id=request_conversation_id,
+                user_id=request_user_id,
+            )
+        logger.debug(
+            "Supervisor returned result for audio user=%s conversation=%s: %s",
+            request_user_id,
+            request_conversation_id,
+            result,
+        )
+        # Step 4: Process and store the agent response (same as /chat endpoint)
+        if result and isinstance(result, dict):
+            agent_name = result.get("agent", "supervisor")
+            agent_name = _map_agent_type(agent_name)
+            response_metadata = {"supervisor_result": result, "source": "audio"}
+            swap_meta_snapshot = None
+            if isinstance(result, dict) and result.get("metadata"):
+                response_metadata.update(result.get("metadata") or {})
+            elif agent_name == "token swap":
+                swap_meta = metadata.get_swap_agent(
+                    user_id=request_user_id,
+                    conversation_id=request_conversation_id,
+                )
+                if swap_meta:
+                    response_metadata.update(swap_meta)
+                    swap_meta_snapshot = swap_meta
+            elif agent_name == "lending":
+                lending_meta = metadata.get_lending_agent(
+                    user_id=request_user_id,
+                    conversation_id=request_conversation_id,
+                )
+                if lending_meta:
+                    response_metadata.update(lending_meta)
+            elif agent_name == "staking":
+                staking_meta = metadata.get_staking_agent(
+                    user_id=request_user_id,
+                    conversation_id=request_conversation_id,
+                )
+                if staking_meta:
+                    response_metadata.update(staking_meta)
+            response_message = ChatMessage(
+                role="assistant",
+                content=result.get("response", "No response available"),
+                agent_name=agent_name,
+                agent_type=_map_agent_type(agent_name),
+                metadata=result.get("metadata", {}),
+                conversation_id=request_conversation_id,
+                user_id=request_user_id,
+                requires_action=True if agent_name in ["token swap", "lending", "staking"] else False,
+                action_type="swap" if agent_name == "token swap" else "lending" if agent_name == "lending" else "staking" if agent_name == "staking" else None,
+            )
+            chat_manager_instance.add_message(
+                message=response_message.dict(),
+                conversation_id=request_conversation_id,
+                user_id=request_user_id,
+            )
+            # Build response payload
+            response_payload = {
+                "response": result.get("response", "No response available"),
+                "agentName": agent_name,
+                "transcription": transcribed_text,
+            }
+            response_meta = result.get("metadata") or {}
+            if agent_name == "token swap" and not response_meta:
+                if swap_meta_snapshot:
+                    response_meta = swap_meta_snapshot
+                else:
+                    swap_meta = metadata.get_swap_agent(
+                        user_id=request_user_id,
+                        conversation_id=request_conversation_id,
+                    )
+                    if swap_meta:
+                        response_meta = swap_meta
+            if response_meta:
+                response_payload["metadata"] = response_meta
+            # Clear metadata after ready events (same as /chat)
+            if agent_name == "token swap":
+                should_clear = False
+                if response_meta:
+                    status = response_meta.get("status") if isinstance(response_meta, dict) else None
+                    event = response_meta.get("event") if isinstance(response_meta, dict) else None
+                    should_clear = status == "ready" or event == "swap_intent_ready"
+                if should_clear:
+                    metadata.set_swap_agent({}, user_id=request_user_id, conversation_id=request_conversation_id)
+            if agent_name == "lending":
+                should_clear = False
+                if response_meta:
+                    status = response_meta.get("status") if isinstance(response_meta, dict) else None
+                    event = response_meta.get("event") if isinstance(response_meta, dict) else None
+                    should_clear = status == "ready" or event == "lending_intent_ready"
+                if should_clear:
+                    metadata.set_lending_agent({}, user_id=request_user_id, conversation_id=request_conversation_id)
+            if agent_name == "staking":
+                should_clear = False
+                if response_meta:
+                    status = response_meta.get("status") if isinstance(response_meta, dict) else None
+                    event = response_meta.get("event") if isinstance(response_meta, dict) else None
+                    should_clear = status == "ready" or event == "staking_intent_ready"
+                if should_clear:
+                    metadata.set_staking_agent({}, user_id=request_user_id, conversation_id=request_conversation_id)
+            return response_payload
+        return {
+            "response": "No response available",
+            "agentName": "supervisor",
+            "transcription": transcribed_text,
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.exception(
+            "Audio chat handler failed for user=%s conversation=%s",
+            request_user_id,
+            request_conversation_id,
+        )
+        raise HTTPException(status_code=500, detail=str(e))
 # Include chat manager router
 app.include_router(chat_manager_router)

src/infrastructure/__init__.py ADDED Viewed

	@@ -0,0 +1,27 @@

+"""
+Infrastructure Module - Cross-cutting concerns.
+This module provides:
+- Logging: Structured logging with color support
+- Rate Limiting: API rate limiting with SlowAPI
+- Metrics: Prometheus metrics for observability
+- Retry: Retry utilities with exponential backoff
+"""
+from .logging import setup_logging, get_logger
+from .rate_limiter import limiter, setup_rate_limiter, limit_chat, limit_stream
+from .retry import execute_with_retry, RetryConfig
+__all__ = [
+    # Logging
+    "setup_logging",
+    "get_logger",
+    # Rate limiting
+    "limiter",
+    "setup_rate_limiter",
+    "limit_chat",
+    "limit_stream",
+    # Retry
+    "execute_with_retry",
+    "RetryConfig",
+]

src/infrastructure/logging.py ADDED Viewed

	@@ -0,0 +1,169 @@

+"""
+Structured Logging Configuration.
+Supports:
+- Color output for development (colorlog)
+- JSON output for production (structlog)
+"""
+import logging
+import os
+import sys
+from typing import Literal
+LogFormat = Literal["color", "json"]
+def setup_logging(
+    level: int | str = logging.INFO,
+    format_type: LogFormat | None = None,
+    json_indent: int | None = None,
+) -> logging.Logger:
+    """
+    Configure application logging.
+    Args:
+        level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
+        format_type: Output format ("color" for dev, "json" for prod)
+                    If None, reads from LOG_FORMAT env var (defaults to "color")
+        json_indent: Indentation for JSON output (None for compact)
+    Returns:
+        Configured root logger
+    """
+    # Determine format from env if not specified
+    if format_type is None:
+        format_type = os.getenv("LOG_FORMAT", "color").lower()
+        if format_type not in ("color", "json"):
+            format_type = "color"
+    # Parse level if string
+    if isinstance(level, str):
+        level = getattr(logging, level.upper(), logging.INFO)
+    # Get root logger
+    root_logger = logging.getLogger()
+    root_logger.setLevel(level)
+    # Remove existing handlers
+    for handler in root_logger.handlers[:]:
+        root_logger.removeHandler(handler)
+    # Create handler
+    handler = logging.StreamHandler(sys.stdout)
+    handler.setLevel(level)
+    if format_type == "color":
+        formatter = _create_color_formatter()
+    else:
+        formatter = _create_json_formatter(json_indent)
+    handler.setFormatter(formatter)
+    root_logger.addHandler(handler)
+    # Reduce noise from third-party libraries
+    logging.getLogger("httpx").setLevel(logging.WARNING)
+    logging.getLogger("httpcore").setLevel(logging.WARNING)
+    logging.getLogger("urllib3").setLevel(logging.WARNING)
+    logging.getLogger("langchain").setLevel(logging.WARNING)
+    logging.getLogger("langsmith").setLevel(logging.WARNING)
+    return root_logger
+def _create_color_formatter() -> logging.Formatter:
+    """Create colorized formatter for development."""
+    try:
+        import colorlog
+        return colorlog.ColoredFormatter(
+            fmt="%(log_color)s%(asctime)s | %(levelname)-8s | %(name)s | %(message)s",
+            datefmt="%Y-%m-%d %H:%M:%S",
+            log_colors={
+                "DEBUG": "cyan",
+                "INFO": "green",
+                "WARNING": "yellow",
+                "ERROR": "red",
+                "CRITICAL": "bold_red",
+            },
+            secondary_log_colors={},
+            style="%",
+        )
+    except ImportError:
+        # Fallback if colorlog not installed
+        return logging.Formatter(
+            fmt="%(asctime)s | %(levelname)-8s | %(name)s | %(message)s",
+            datefmt="%Y-%m-%d %H:%M:%S",
+        )
+def _create_json_formatter(indent: int | None = None) -> logging.Formatter:
+    """Create JSON formatter for production."""
+    try:
+        import structlog
+        # Configure structlog
+        structlog.configure(
+            processors=[
+                structlog.stdlib.filter_by_level,
+                structlog.stdlib.add_logger_name,
+                structlog.stdlib.add_log_level,
+                structlog.stdlib.PositionalArgumentsFormatter(),
+                structlog.processors.TimeStamper(fmt="iso"),
+                structlog.processors.StackInfoRenderer(),
+                structlog.processors.format_exc_info,
+                structlog.processors.UnicodeDecoder(),
+                structlog.processors.JSONRenderer(indent=indent),
+            ],
+            wrapper_class=structlog.stdlib.BoundLogger,
+            context_class=dict,
+            logger_factory=structlog.stdlib.LoggerFactory(),
+            cache_logger_on_first_use=True,
+        )
+        # Return a simple formatter since structlog handles formatting
+        return logging.Formatter("%(message)s")
+    except ImportError:
+        # Fallback JSON formatter
+        import json
+        class JsonFormatter(logging.Formatter):
+            def format(self, record: logging.LogRecord) -> str:
+                log_data = {
+                    "timestamp": self.formatTime(record, "%Y-%m-%dT%H:%M:%S"),
+                    "level": record.levelname,
+                    "logger": record.name,
+                    "message": record.getMessage(),
+                }
+                if record.exc_info:
+                    log_data["exception"] = self.formatException(record.exc_info)
+                return json.dumps(log_data)
+        return JsonFormatter()
+def get_logger(name: str) -> logging.Logger:
+    """
+    Get a logger with the specified name.
+    Args:
+        name: Logger name (typically __name__)
+    Returns:
+        Logger instance
+    """
+    return logging.getLogger(name)
+class LoggerMixin:
+    """Mixin class to add logging capability to any class."""
+    @property
+    def logger(self) -> logging.Logger:
+        """Get logger for this class."""
+        if not hasattr(self, "_logger"):
+            self._logger = logging.getLogger(
+                f"{self.__class__.__module__}.{self.__class__.__name__}"
+            )
+        return self._logger

src/infrastructure/rate_limiter.py ADDED Viewed

	@@ -0,0 +1,95 @@

+"""
+Rate Limiting Configuration using SlowAPI.
+Provides rate limiting for FastAPI endpoints to prevent abuse.
+"""
+import os
+from typing import Callable
+from fastapi import FastAPI, Request, Response
+from slowapi import Limiter, _rate_limit_exceeded_handler
+from slowapi.errors import RateLimitExceeded
+from slowapi.util import get_remote_address
+def _get_identifier(request: Request) -> str:
+    """
+    Get identifier for rate limiting.
+    Uses X-Forwarded-For header if behind a proxy,
+    otherwise falls back to remote address.
+    Also considers user_id from query params or body if available.
+    """
+    # Try to get user_id for more granular limiting
+    user_id = request.query_params.get("user_id")
+    if user_id and user_id != "anonymous":
+        return f"user:{user_id}"
+    # Fall back to IP-based limiting
+    forwarded = request.headers.get("X-Forwarded-For")
+    if forwarded:
+        return forwarded.split(",")[0].strip()
+    return get_remote_address(request)
+# Create global limiter instance
+limiter = Limiter(
+    key_func=_get_identifier,
+    default_limits=[os.getenv("RATE_LIMIT_DEFAULT", "100/minute")],
+)
+def setup_rate_limiter(app: FastAPI) -> None:
+    """
+    Configure rate limiting on a FastAPI application.
+    Args:
+        app: FastAPI application instance
+    """
+    app.state.limiter = limiter
+    app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
+def limit_chat(func: Callable) -> Callable:
+    """
+    Rate limit decorator for chat endpoints.
+    Default: 30 requests per minute.
+    """
+    limit = os.getenv("RATE_LIMIT_CHAT", "30/minute")
+    return limiter.limit(limit)(func)
+def limit_stream(func: Callable) -> Callable:
+    """
+    Rate limit decorator for streaming endpoints.
+    Default: 10 requests per minute (streaming is more resource-intensive).
+    """
+    limit = os.getenv("RATE_LIMIT_STREAM", "10/minute")
+    return limiter.limit(limit)(func)
+def limit_health(func: Callable) -> Callable:
+    """
+    Rate limit decorator for health check endpoints.
+    Default: 100 requests per minute.
+    """
+    limit = os.getenv("RATE_LIMIT_HEALTH", "100/minute")
+    return limiter.limit(limit)(func)
+def limit_custom(limit_string: str) -> Callable:
+    """
+    Create a custom rate limit decorator.
+    Args:
+        limit_string: Rate limit string (e.g., "10/minute", "100/hour")
+    Returns:
+        Decorator function
+    """
+    return limiter.limit(limit_string)

src/infrastructure/retry.py ADDED Viewed

	@@ -0,0 +1,189 @@

+"""
+Retry utilities with exponential backoff.
+Provides retry logic for unreliable operations like LLM calls.
+"""
+import asyncio
+import logging
+from dataclasses import dataclass, field
+from functools import wraps
+from typing import Any, Callable, TypeVar, ParamSpec
+logger = logging.getLogger(__name__)
+T = TypeVar("T")
+P = ParamSpec("P")
+@dataclass
+class RetryConfig:
+    """Configuration for retry behavior."""
+    max_retries: int = 3
+    base_delay: float = 1.0  # Base delay in seconds
+    max_delay: float = 30.0  # Maximum delay in seconds
+    exponential_base: float = 2.0  # Exponential backoff base
+    retryable_exceptions: tuple = field(
+        default_factory=lambda: (TimeoutError, ConnectionError, Exception)
+    )
+# Default configuration
+DEFAULT_RETRY_CONFIG = RetryConfig()
+async def execute_with_retry(
+    func: Callable[P, T],
+    *args: P.args,
+    config: RetryConfig | None = None,
+    fallback_response: T | None = None,
+    on_retry: Callable[[int, Exception], None] | None = None,
+    **kwargs: P.kwargs,
+) -> T:
+    """
+    Execute a function with retry logic and exponential backoff.
+    Args:
+        func: Function to execute (can be sync or async)
+        *args: Positional arguments for the function
+        config: Retry configuration (uses defaults if None)
+        fallback_response: Value to return if all retries fail (if None, raises exception)
+        on_retry: Optional callback called on each retry (receives attempt number and exception)
+        **kwargs: Keyword arguments for the function
+    Returns:
+        The function result or fallback_response
+    Raises:
+        The last exception if all retries fail and no fallback is provided
+    """
+    config = config or DEFAULT_RETRY_CONFIG
+    last_exception: Exception | None = None
+    for attempt in range(config.max_retries):
+        try:
+            if asyncio.iscoroutinefunction(func):
+                return await func(*args, **kwargs)
+            else:
+                return func(*args, **kwargs)
+        except config.retryable_exceptions as e:
+            last_exception = e
+            is_last_attempt = attempt >= config.max_retries - 1
+            if is_last_attempt:
+                logger.error(
+                    f"All {config.max_retries} attempts failed for {func.__name__}. "
+                    f"Last error: {e}"
+                )
+            else:
+                # Calculate delay with exponential backoff
+                delay = min(
+                    config.base_delay * (config.exponential_base**attempt),
+                    config.max_delay,
+                )
+                logger.warning(
+                    f"Attempt {attempt + 1}/{config.max_retries} failed for {func.__name__}: {e}. "
+                    f"Retrying in {delay:.1f}s..."
+                )
+                # Call retry callback if provided
+                if on_retry:
+                    on_retry(attempt + 1, e)
+                await asyncio.sleep(delay)
+    # All retries exhausted
+    if fallback_response is not None:
+        logger.info(f"Using fallback response for {func.__name__}")
+        return fallback_response
+    if last_exception:
+        raise last_exception
+    raise RuntimeError(f"Unexpected state in retry logic for {func.__name__}")
+def with_retry(
+    config: RetryConfig | None = None,
+    fallback_response: Any = None,
+) -> Callable[[Callable[P, T]], Callable[P, T]]:
+    """
+    Decorator to add retry logic to a function.
+    Args:
+        config: Retry configuration
+        fallback_response: Value to return if all retries fail
+    Returns:
+        Decorated function with retry logic
+    """
+    config = config or DEFAULT_RETRY_CONFIG
+    def decorator(func: Callable[P, T]) -> Callable[P, T]:
+        @wraps(func)
+        async def async_wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
+            return await execute_with_retry(
+                func,
+                *args,
+                config=config,
+                fallback_response=fallback_response,
+                **kwargs,
+            )
+        @wraps(func)
+        def sync_wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
+            return asyncio.run(
+                execute_with_retry(
+                    func,
+                    *args,
+                    config=config,
+                    fallback_response=fallback_response,
+                    **kwargs,
+                )
+            )
+        if asyncio.iscoroutinefunction(func):
+            return async_wrapper
+        return sync_wrapper
+    return decorator
+class RetryableMixin:
+    """
+    Mixin class that adds retry capability to any class.
+    Usage:
+        class MyAgent(RetryableMixin):
+            async def call_llm(self, prompt):
+                return await self.with_retry(
+                    self._do_call_llm,
+                    prompt,
+                    fallback_response="Sorry, I couldn't process that."
+                )
+    """
+    _retry_config: RetryConfig = DEFAULT_RETRY_CONFIG
+    async def with_retry(
+        self,
+        func: Callable[P, T],
+        *args: P.args,
+        fallback_response: T | None = None,
+        **kwargs: P.kwargs,
+    ) -> T:
+        """Execute a method with retry logic."""
+        return await execute_with_retry(
+            func,
+            *args,
+            config=self._retry_config,
+            fallback_response=fallback_response,
+            **kwargs,
+        )
+    def set_retry_config(self, config: RetryConfig) -> None:
+        """Update retry configuration."""
+        self._retry_config = config

src/llm/__init__.py ADDED Viewed

	@@ -0,0 +1,32 @@

+"""
+LLM Module - Multi-provider LLM abstraction layer.
+This module provides:
+- LLMFactory: Create LLM instances for multiple providers (Google, OpenAI, Anthropic)
+- CostTrackingCallback: Track token usage and costs per LLM call
+"""
+from .factory import LLMFactory, detect_provider, MODEL_PROVIDERS
+from .cost_tracker import CostTrackingCallback
+from .exceptions import (
+    LLMError,
+    LLMProviderError,
+    LLMTimeoutError,
+    LLMRateLimitError,
+    LLMInvalidModelError,
+)
+__all__ = [
+    # Factory
+    "LLMFactory",
+    "detect_provider",
+    "MODEL_PROVIDERS",
+    # Cost tracking
+    "CostTrackingCallback",
+    # Exceptions
+    "LLMError",
+    "LLMProviderError",
+    "LLMTimeoutError",
+    "LLMRateLimitError",
+    "LLMInvalidModelError",
+]

src/llm/cost_tracker.py ADDED Viewed

	@@ -0,0 +1,295 @@

+"""
+Cost Tracking Callback for LLM usage monitoring.
+Tracks token usage and calculates costs per LLM call.
+"""
+import logging
+from datetime import datetime
+from typing import Any
+from langchain_core.callbacks import BaseCallbackHandler
+from langchain_core.outputs import LLMResult
+logger = logging.getLogger(__name__)
+class CostTrackingCallback(BaseCallbackHandler):
+    """
+    LangChain callback handler for tracking LLM costs.
+    Tracks:
+    - Input/output token counts
+    - Cost per call and cumulative
+    - Model usage statistics
+    Usage:
+        callback = CostTrackingCallback()
+        llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", callbacks=[callback])
+        response = llm.invoke("Hello!")
+        print(callback.get_summary())
+    """
+    # Pricing per 1M tokens (USD) - Update as needed
+    PRICING: dict[str, dict[str, float]] = {
+        # Google Gemini
+        "gemini-3-pro-preview": {"input": 1.50, "output": 6.00, "cache": 0.40},
+        "gemini-2.5-flash": {"input": 0.15, "output": 0.60, "cache": 0.02},
+        "gemini-2.5-pro": {"input": 1.25, "output": 5.00, "cache": 0.32},
+        "gemini-2.0-flash": {"input": 0.10, "output": 0.40, "cache": 0.01},
+        "gemini-1.5-flash": {"input": 0.075, "output": 0.30, "cache": 0.02},
+        "gemini-1.5-pro": {"input": 1.25, "output": 5.00, "cache": 0.32},
+        # OpenAI
+        "gpt-4o": {"input": 2.50, "output": 10.00, "cache": 1.25},
+        "gpt-4o-mini": {"input": 0.15, "output": 0.60, "cache": 0.08},
+        "gpt-4-turbo": {"input": 10.00, "output": 30.00, "cache": 5.00},
+        "gpt-4": {"input": 30.00, "output": 60.00, "cache": 15.00},
+        "gpt-3.5-turbo": {"input": 0.50, "output": 1.50, "cache": 0.25},
+        # Anthropic Claude
+        "claude-sonnet-4-20250514": {"input": 3.00, "output": 15.00, "cache": 0.30},
+        "claude-3-5-sonnet-20241022": {"input": 3.00, "output": 15.00, "cache": 0.30},
+        "claude-3-5-haiku-20241022": {"input": 0.80, "output": 4.00, "cache": 0.08},
+        "claude-3-opus-20240229": {"input": 15.00, "output": 75.00, "cache": 1.50},
+    }
+    # Default pricing for unknown models
+    DEFAULT_PRICING = {"input": 1.00, "output": 3.00, "cache": 0.10}
+    def __init__(self, log_calls: bool = True):
+        """
+        Initialize the cost tracker.
+        Args:
+            log_calls: Whether to log each LLM call
+        """
+        super().__init__()
+        self.log_calls = log_calls
+        self.total_cost: float = 0.0
+        self.total_tokens: dict[str, int] = {"input": 0, "output": 0, "cache": 0}
+        self.calls: list[dict[str, Any]] = []
+        self.start_time: datetime = datetime.utcnow()
+    def on_llm_start(self, serialized: dict[str, Any], prompts: list[str], **kwargs) -> None:
+        """Called when LLM starts processing."""
+        pass  # Could track start time per call if needed
+    def on_llm_end(self, response: LLMResult, **kwargs) -> None:
+        """
+        Called when LLM finishes processing.
+        Calculates and records the cost of the call.
+        """
+        logger.debug(f"[COST DEBUG] on_llm_end called. llm_output: {response.llm_output}")
+        # Try to extract usage from multiple sources (different providers put it in different places)
+        input_tokens = 0
+        output_tokens = 0
+        cache_tokens = 0
+        model = "unknown"
+        # Source 1: llm_output (OpenAI, Anthropic style)
+        if response.llm_output:
+            model = self._extract_model_name(response.llm_output)
+            usage = response.llm_output.get("token_usage", {})
+            input_tokens = usage.get("prompt_tokens", 0) or usage.get("input_tokens", 0)
+            output_tokens = usage.get("completion_tokens", 0) or usage.get("output_tokens", 0)
+            cache_tokens = usage.get("cache_read_input_tokens", 0) or usage.get("cached_tokens", 0)
+        # Source 2: generations metadata (Google Gemini style)
+        if input_tokens == 0 and output_tokens == 0 and response.generations:
+            for gen_list in response.generations:
+                for gen in gen_list:
+                    # Check generation_info
+                    gen_info = getattr(gen, "generation_info", {}) or {}
+                    usage_meta = gen_info.get("usage_metadata", {})
+                    if usage_meta:
+                        input_tokens = usage_meta.get("input_tokens", 0)
+                        output_tokens = usage_meta.get("output_tokens", 0)
+                        cache_details = usage_meta.get("input_token_details", {})
+                        cache_tokens = cache_details.get("cache_read", 0)
+                        if model == "unknown":
+                            model = gen_info.get("model_name", "unknown")
+                        break
+                    # Check message attribute (for ChatGeneration)
+                    msg = getattr(gen, "message", None)
+                    if msg:
+                        msg_usage = getattr(msg, "usage_metadata", None)
+                        if msg_usage:
+                            input_tokens = msg_usage.get("input_tokens", 0)
+                            output_tokens = msg_usage.get("output_tokens", 0)
+                            cache_details = msg_usage.get("input_token_details", {})
+                            cache_tokens = cache_details.get("cache_read", 0)
+                            resp_meta = getattr(msg, "response_metadata", {}) or {}
+                            if model == "unknown":
+                                model = resp_meta.get("model_name", "unknown")
+                            break
+                if input_tokens > 0 or output_tokens > 0:
+                    break
+        # Skip if no usage data found
+        if input_tokens == 0 and output_tokens == 0:
+            logger.debug("[COST DEBUG] No token usage found in response, skipping cost tracking")
+            return
+        logger.debug(f"[COST DEBUG] Extracted: model={model}, input={input_tokens}, output={output_tokens}, cache={cache_tokens}")
+        # Calculate cost
+        pricing = self.PRICING.get(model, self.DEFAULT_PRICING)
+        input_cost = (input_tokens * pricing["input"]) / 1_000_000
+        output_cost = (output_tokens * pricing["output"]) / 1_000_000
+        cache_cost = (cache_tokens * pricing["cache"]) / 1_000_000
+        total_call_cost = input_cost + output_cost + cache_cost
+        # Update totals
+        self.total_cost += total_call_cost
+        self.total_tokens["input"] += input_tokens
+        self.total_tokens["output"] += output_tokens
+        self.total_tokens["cache"] += cache_tokens
+        # Record call details
+        call_info = {
+            "timestamp": datetime.utcnow().isoformat(),
+            "model": model,
+            "tokens": {
+                "input": input_tokens,
+                "output": output_tokens,
+                "cache": cache_tokens,
+            },
+            "cost": {
+                "input": input_cost,
+                "output": output_cost,
+                "cache": cache_cost,
+                "total": total_call_cost,
+            },
+        }
+        self.calls.append(call_info)
+        # Log if enabled
+        if self.log_calls:
+            logger.info(
+                f"[COST] {model} | "
+                f"Tokens: {input_tokens:,} in / {output_tokens:,} out"
+                + (f" / {cache_tokens:,} cache" if cache_tokens else "")
+                + f" | Cost: ${total_call_cost:.6f} | Total: ${self.total_cost:.6f}"
+            )
+    def on_llm_error(self, error: Exception, **kwargs) -> None:
+        """Called when LLM encounters an error."""
+        logger.error(f"[COST] LLM Error: {error}")
+    def _extract_model_name(self, llm_output: dict[str, Any]) -> str:
+        """Extract model name from LLM output."""
+        # Try common keys
+        for key in ["model_name", "model", "model_id"]:
+            if key in llm_output:
+                return llm_output[key]
+        # Check nested structure
+        if "model_info" in llm_output:
+            return llm_output["model_info"].get("model", "unknown")
+        return "unknown"
+    def get_summary(self) -> dict[str, Any]:
+        """
+        Get a summary of all tracked costs.
+        Returns:
+            Dictionary with cost summary
+        """
+        duration = (datetime.utcnow() - self.start_time).total_seconds()
+        return {
+            "total_cost": round(self.total_cost, 6),
+            "total_tokens": self.total_tokens.copy(),
+            "calls_count": len(self.calls),
+            "duration_seconds": round(duration, 2),
+            "avg_cost_per_call": round(self.total_cost / len(self.calls), 6) if self.calls else 0,
+            "models_used": list(set(call["model"] for call in self.calls)),
+            "start_time": self.start_time.isoformat(),
+        }
+    def get_detailed_report(self) -> dict[str, Any]:
+        """
+        Get a detailed report including all calls.
+        Returns:
+            Dictionary with full cost details
+        """
+        summary = self.get_summary()
+        summary["calls"] = self.calls
+        return summary
+    def get_cost_by_model(self) -> dict[str, dict[str, float]]:
+        """
+        Get costs aggregated by model.
+        Returns:
+            Dictionary mapping model names to their costs
+        """
+        by_model: dict[str, dict[str, float]] = {}
+        for call in self.calls:
+            model = call["model"]
+            if model not in by_model:
+                by_model[model] = {"cost": 0.0, "input_tokens": 0, "output_tokens": 0, "calls": 0}
+            by_model[model]["cost"] += call["cost"]["total"]
+            by_model[model]["input_tokens"] += call["tokens"]["input"]
+            by_model[model]["output_tokens"] += call["tokens"]["output"]
+            by_model[model]["calls"] += 1
+        return by_model
+    def reset(self) -> None:
+        """Reset all tracked data."""
+        self.total_cost = 0.0
+        self.total_tokens = {"input": 0, "output": 0, "cache": 0}
+        self.calls = []
+        self.start_time = datetime.utcnow()
+    def get_snapshot(self) -> dict[str, Any]:
+        """
+        Get a snapshot of current totals for delta calculation.
+        Returns:
+            Dictionary with current cost and token totals
+        """
+        return {
+            "total_cost": self.total_cost,
+            "total_tokens": self.total_tokens.copy(),
+            "calls_count": len(self.calls),
+        }
+    def calculate_delta(self, previous_snapshot: dict[str, Any]) -> dict[str, Any]:
+        """
+        Calculate the delta between current state and a previous snapshot.
+        Args:
+            previous_snapshot: Snapshot from get_snapshot()
+        Returns:
+            Dictionary with cost and token deltas for this period
+        """
+        prev_cost = previous_snapshot.get("total_cost", 0.0)
+        prev_tokens = previous_snapshot.get("total_tokens", {"input": 0, "output": 0, "cache": 0})
+        prev_calls = previous_snapshot.get("calls_count", 0)
+        return {
+            "cost": round(self.total_cost - prev_cost, 6),
+            "tokens": {
+                "input": self.total_tokens["input"] - prev_tokens.get("input", 0),
+                "output": self.total_tokens["output"] - prev_tokens.get("output", 0),
+                "cache": self.total_tokens["cache"] - prev_tokens.get("cache", 0),
+            },
+            "calls": len(self.calls) - prev_calls,
+        }
+    def __str__(self) -> str:
+        """String representation of current costs."""
+        return (
+            f"CostTracker: ${self.total_cost:.6f} total | "
+            f"{self.total_tokens['input']:,} in / {self.total_tokens['output']:,} out | "
+            f"{len(self.calls)} calls"
+        )

src/llm/exceptions.py ADDED Viewed

	@@ -0,0 +1,49 @@

+"""
+Custom exceptions for LLM module.
+"""
+class LLMError(Exception):
+    """Base exception for LLM-related errors."""
+    def __init__(self, message: str, provider: str | None = None, model: str | None = None):
+        self.provider = provider
+        self.model = model
+        super().__init__(message)
+class LLMProviderError(LLMError):
+    """Raised when there's an error with the LLM provider."""
+    pass
+class LLMTimeoutError(LLMError):
+    """Raised when an LLM request times out."""
+    pass
+class LLMRateLimitError(LLMError):
+    """Raised when rate limit is exceeded."""
+    def __init__(
+        self,
+        message: str,
+        provider: str | None = None,
+        model: str | None = None,
+        retry_after: int | None = None,
+    ):
+        self.retry_after = retry_after
+        super().__init__(message, provider, model)
+class LLMInvalidModelError(LLMError):
+    """Raised when an invalid model is specified."""
+    def __init__(self, model: str, available_models: list[str] | None = None):
+        self.available_models = available_models or []
+        message = f"Invalid model: {model}"
+        if available_models:
+            message += f". Available models: {', '.join(available_models)}"
+        super().__init__(message, model=model)

src/llm/factory.py ADDED Viewed

	@@ -0,0 +1,285 @@

+"""
+LLM Factory - Multi-provider LLM abstraction.
+Supports:
+- Google (Gemini)
+- OpenAI (GPT)
+- Anthropic (Claude)
+"""
+import os
+from typing import Literal
+from langchain_core.language_models import BaseChatModel
+from .exceptions import LLMInvalidModelError, LLMProviderError
+Provider = Literal["google", "openai", "anthropic"]
+MODEL_PROVIDERS: dict[Provider, list[str]] = {
+    "google": [
+        "gemini-3-pro-preview",
+        "gemini-2.5-flash",
+        "gemini-2.5-pro",
+        "gemini-2.0-flash",
+        "gemini-1.5-flash",
+        "gemini-1.5-pro",
+    ],
+    "openai": [
+        "gpt-4o",
+        "gpt-4o-mini",
+        "gpt-4-turbo",
+        "gpt-4",
+        "gpt-3.5-turbo",
+    ],
+    "anthropic": [
+        "claude-sonnet-4-20250514",
+        "claude-3-5-sonnet-20241022",
+        "claude-3-5-haiku-20241022",
+        "claude-3-opus-20240229",
+    ],
+}
+# Flatten for quick lookup
+ALL_MODELS: set[str] = {model for models in MODEL_PROVIDERS.values() for model in models}
+def detect_provider(model: str) -> Provider:
+    """
+    Detect the provider based on model name.
+    Args:
+        model: The model name (e.g., 'gemini-2.5-flash', 'gpt-4o')
+    Returns:
+        The provider name ('google', 'openai', 'anthropic')
+    Raises:
+        LLMInvalidModelError: If the model is not recognized
+    """
+    model_lower = model.lower()
+    # Check by prefix
+    if model_lower.startswith("gemini"):
+        return "google"
+    if model_lower.startswith("gpt"):
+        return "openai"
+    if model_lower.startswith("claude"):
+        return "anthropic"
+    # Check in known models
+    for provider, models in MODEL_PROVIDERS.items():
+        if model in models:
+            return provider
+    raise LLMInvalidModelError(model, list(ALL_MODELS))
+class LLMFactory:
+    """Factory for creating LLM instances across multiple providers."""
+    # Cache for LLM instances (singleton per model+config)
+    _instances: dict[str, BaseChatModel] = {}
+    @classmethod
+    def create(
+        cls,
+        model: str,
+        temperature: float = 0.7,
+        max_retries: int = 3,
+        timeout: int = 60,
+        api_key: str | None = None,
+        use_cache: bool = True,
+        **kwargs,
+    ) -> BaseChatModel:
+        """
+        Create an LLM instance for the specified model.
+        Args:
+            model: Model name (e.g., 'gemini-2.5-flash', 'gpt-4o', 'claude-sonnet-4-20250514')
+            temperature: Sampling temperature (0.0 to 1.0)
+            max_retries: Maximum number of retries on failure
+            timeout: Request timeout in seconds
+            api_key: Optional API key (defaults to environment variable)
+            use_cache: Whether to use cached instances
+            **kwargs: Additional provider-specific arguments
+        Returns:
+            BaseChatModel instance
+        Raises:
+            LLMInvalidModelError: If model is not recognized
+            LLMProviderError: If provider initialization fails
+        """
+        # Check cache
+        cache_key = f"{model}:{temperature}:{timeout}"
+        if use_cache and cache_key in cls._instances:
+            return cls._instances[cache_key]
+        provider = detect_provider(model)
+        try:
+            llm = cls._create_for_provider(
+                provider=provider,
+                model=model,
+                temperature=temperature,
+                max_retries=max_retries,
+                timeout=timeout,
+                api_key=api_key,
+                **kwargs,
+            )
+            if use_cache:
+                cls._instances[cache_key] = llm
+            return llm
+        except ImportError as e:
+            raise LLMProviderError(
+                f"Provider '{provider}' dependencies not installed: {e}",
+                provider=provider,
+                model=model,
+            )
+        except Exception as e:
+            raise LLMProviderError(
+                f"Failed to create LLM for '{model}': {e}",
+                provider=provider,
+                model=model,
+            )
+    @classmethod
+    def _create_for_provider(
+        cls,
+        provider: Provider,
+        model: str,
+        temperature: float,
+        max_retries: int,
+        timeout: int,
+        api_key: str | None,
+        **kwargs,
+    ) -> BaseChatModel:
+        """Create LLM instance for a specific provider."""
+        match provider:
+            case "google":
+                return cls._create_google(
+                    model, temperature, max_retries, timeout, api_key, **kwargs
+                )
+            case "openai":
+                return cls._create_openai(
+                    model, temperature, max_retries, timeout, api_key, **kwargs
+                )
+            case "anthropic":
+                return cls._create_anthropic(
+                    model, temperature, max_retries, timeout, api_key, **kwargs
+                )
+    @staticmethod
+    def _create_google(
+        model: str,
+        temperature: float,
+        max_retries: int,
+        timeout: int,
+        api_key: str | None,
+        callbacks: list | None = None,
+        **kwargs,
+    ) -> BaseChatModel:
+        """Create Google Gemini LLM instance."""
+        from langchain_google_genai import ChatGoogleGenerativeAI
+        return ChatGoogleGenerativeAI(
+            model=model,
+            temperature=temperature,
+            max_retries=max_retries,
+            timeout=timeout,
+            google_api_key=api_key or os.getenv("GEMINI_API_KEY"),
+            callbacks=callbacks,
+            **kwargs,
+        )
+    @staticmethod
+    def _create_openai(
+        model: str,
+        temperature: float,
+        max_retries: int,
+        timeout: int,
+        api_key: str | None,
+        callbacks: list | None = None,
+        **kwargs,
+    ) -> BaseChatModel:
+        """Create OpenAI LLM instance."""
+        from langchain_openai import ChatOpenAI
+        return ChatOpenAI(
+            model=model,
+            temperature=temperature,
+            max_retries=max_retries,
+            timeout=timeout,
+            api_key=api_key or os.getenv("OPENAI_API_KEY"),
+            callbacks=callbacks,
+            **kwargs,
+        )
+    @staticmethod
+    def _create_anthropic(
+        model: str,
+        temperature: float,
+        max_retries: int,
+        timeout: int,
+        api_key: str | None,
+        callbacks: list | None = None,
+        **kwargs,
+    ) -> BaseChatModel:
+        """Create Anthropic Claude LLM instance."""
+        from langchain_anthropic import ChatAnthropic
+        return ChatAnthropic(
+            model=model,
+            temperature=temperature,
+            max_retries=max_retries,
+            timeout=timeout,
+            api_key=api_key or os.getenv("ANTHROPIC_API_KEY"),
+            callbacks=callbacks,
+            **kwargs,
+        )
+    @classmethod
+    def list_models(cls, provider: Provider | None = None) -> list[str]:
+        """
+        List available models.
+        Args:
+            provider: Optional provider to filter by
+        Returns:
+            List of model names
+        """
+        if provider:
+            return MODEL_PROVIDERS.get(provider, [])
+        return list(ALL_MODELS)
+    @classmethod
+    def list_providers(cls) -> list[Provider]:
+        """List available providers."""
+        return list(MODEL_PROVIDERS.keys())
+    @classmethod
+    def clear_cache(cls) -> None:
+        """Clear the LLM instance cache."""
+        cls._instances.clear()
+    @classmethod
+    def get_default_model(cls, provider: Provider | None = None) -> str:
+        """
+        Get the default model for a provider.
+        Args:
+            provider: Provider name (defaults to 'google')
+        Returns:
+            Default model name
+        """
+        provider = provider or "google"
+        models = MODEL_PROVIDERS.get(provider, [])
+        if not models:
+            raise LLMProviderError(f"No models available for provider: {provider}")
+        return models[0]

src/service/chat_manager.py CHANGED Viewed

@@ -168,6 +168,62 @@ class ChatManager:
             display_name=display_name,
         )
 # Singleton-style accessor for the FastAPI routes
 chat_manager_instance = ChatManager()

             display_name=display_name,
         )
+    # ---- Cost tracking ----------------------------------------------------
+    def update_conversation_costs(
+        self,
+        cost_delta: Dict,
+        conversation_id: Optional[str] = None,
+        user_id: Optional[str] = None,
+    ) -> Dict:
+        """
+        Update the conversation with cost data from the latest request.
+        Args:
+            cost_delta: Cost delta dict with 'cost', 'tokens', 'calls' keys
+            conversation_id: Conversation ID
+            user_id: User ID
+        Returns:
+            Updated conversation data
+        """
+        conversation_id, user_id = self._resolve_ids(conversation_id, user_id)
+        try:
+            result = self._store.update_conversation_costs(user_id, conversation_id, cost_delta)
+            logger.info(
+                "Updated costs for user=%s conversation=%s: +$%.6f (%d calls)",
+                user_id,
+                conversation_id,
+                cost_delta.get("cost", 0),
+                cost_delta.get("calls", 0),
+            )
+            return result
+        except Exception as exc:
+            logger.warning(
+                "Failed to update costs for user=%s conversation=%s: %s",
+                user_id,
+                conversation_id,
+                exc,
+            )
+            return {}
+    def get_conversation_costs(
+        self,
+        conversation_id: Optional[str] = None,
+        user_id: Optional[str] = None,
+    ) -> Dict:
+        """
+        Get the accumulated costs for a conversation.
+        Args:
+            conversation_id: Conversation ID
+            user_id: User ID
+        Returns:
+            Cost data dict
+        """
+        conversation_id, user_id = self._resolve_ids(conversation_id, user_id)
+        return self._store.get_conversation_costs(user_id, conversation_id)
 # Singleton-style accessor for the FastAPI routes
 chat_manager_instance = ChatManager()

src/service/panorama_store.py CHANGED Viewed

@@ -388,3 +388,99 @@ class PanoramaStore:
         )
         conversation = self.ensure_conversation(user_id, conversation_id)
         return user, conversation

         )
         conversation = self.ensure_conversation(user_id, conversation_id)
         return user, conversation
+    # ---- cost tracking -----------------------------------------------------
+    def update_conversation_costs(
+        self,
+        user_id: str,
+        conversation_id: str,
+        cost_delta: Dict[str, Any],
+    ) -> Dict[str, Any]:
+        """
+        Update the conversation with accumulated cost data.
+        Args:
+            user_id: User ID
+            conversation_id: Conversation ID
+            cost_delta: Cost delta from this request (cost, tokens, calls)
+        Returns:
+            Updated conversation data
+        """
+        conv_key = _conversation_key(user_id, conversation_id)
+        try:
+            conversation = self._client.get("conversations", conv_key)
+        except PanoramaGatewayError as exc:
+            if exc.status_code == 404:
+                self._logger.warning("Conversation %s not found for cost update", conv_key)
+                return {}
+            raise
+        # Get existing cost data from contextState
+        context_state = conversation.get("contextState", {}) or {}
+        existing_costs = context_state.get("costs", {
+            "total_cost": 0.0,
+            "total_tokens": {"input": 0, "output": 0, "cache": 0},
+            "total_calls": 0,
+        })
+        # Accumulate costs
+        delta_tokens = cost_delta.get("tokens", {})
+        existing_tokens = existing_costs.get("total_tokens", {"input": 0, "output": 0, "cache": 0})
+        updated_costs = {
+            "total_cost": round(existing_costs.get("total_cost", 0.0) + cost_delta.get("cost", 0.0), 6),
+            "total_tokens": {
+                "input": existing_tokens.get("input", 0) + delta_tokens.get("input", 0),
+                "output": existing_tokens.get("output", 0) + delta_tokens.get("output", 0),
+                "cache": existing_tokens.get("cache", 0) + delta_tokens.get("cache", 0),
+            },
+            "total_calls": existing_costs.get("total_calls", 0) + cost_delta.get("calls", 0),
+            "last_updated": _utc_now_iso(),
+        }
+        # Update contextState with new costs
+        context_state["costs"] = updated_costs
+        try:
+            return self._client.update(
+                "conversations",
+                conv_key,
+                {"contextState": context_state, "updatedAt": _utc_now_iso()},
+            )
+        except PanoramaGatewayError as exc:
+            self._logger.error(
+                "Failed to update costs for conversation %s: status=%s",
+                conv_key,
+                exc.status_code,
+            )
+            raise
+    def get_conversation_costs(
+        self,
+        user_id: str,
+        conversation_id: str,
+    ) -> Dict[str, Any]:
+        """
+        Get the accumulated costs for a conversation.
+        Args:
+            user_id: User ID
+            conversation_id: Conversation ID
+        Returns:
+            Cost data or empty dict if not found
+        """
+        conv_key = _conversation_key(user_id, conversation_id)
+        try:
+            conversation = self._client.get("conversations", conv_key)
+        except PanoramaGatewayError as exc:
+            if exc.status_code == 404:
+                return {}
+            raise
+        context_state = conversation.get("contextState", {}) or {}
+        return context_state.get("costs", {
+            "total_cost": 0.0,
+            "total_tokens": {"input": 0, "output": 0, "cache": 0},
+            "total_calls": 0,
+        })