Spaces:

Agents-MCP-Hackathon
/

Audio-Agent

Sleeping

App Files Files Community

YigitSekerci commited on Jun 7, 2025

Commit

903ecf8

1 Parent(s): 4a4128c

implement basic agent

Browse files

Files changed (1) hide show

src/agent.py +367 -0

src/agent.py ADDED Viewed

	@@ -0,0 +1,367 @@

+import asyncio
+import logging
+from typing import List, Dict, Any, Optional, Tuple, Union
+from langchain_mcp_adapters.client import MultiServerMCPClient
+from langgraph.prebuilt import create_react_agent
+from langchain_core.messages import HumanMessage, AIMessage, BaseMessage
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.exceptions import OutputParserException
+from dotenv import load_dotenv
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+load_dotenv()
+class AudioAgentError(Exception):
+    """Custom exception for AudioAgent errors"""
+    pass
+class AudioAgentInitializationError(AudioAgentError):
+    """Raised when agent initialization fails"""
+    pass
+class AudioAgentChatError(AudioAgentError):
+    """Raised when chat processing fails"""
+    pass
+class AudioAgent:
+    """
+    A class to manage an audio-focused AI agent with MCP tools integration.
+    This agent connects to audio tools via MCP and provides a conversational interface
+    using LangChain's robust message handling and output parsing.
+    """
+    def __init__(self, model_name: str = "gpt-4o", server_url: str = "http://127.0.0.1:7860/gradio_api/mcp/sse"):
+        """
+        Initialize the AudioAgent.
+        Args:
+            model_name: The language model to use for the agent
+            server_url: The URL of the MCP server providing audio tools
+        """
+        self.model_name = model_name
+        self.server_url = server_url
+        self._agent = None
+        self._tools = None
+        self._is_initialized = False
+        self._output_parser = StrOutputParser()
+        # Initialize MCP client
+        self._client = MultiServerMCPClient({
+            "audio-tools": {
+                "url": server_url,
+                "transport": "sse",
+            }
+        })
+    @property
+    def is_initialized(self) -> bool:
+        """Check if the agent is initialized and ready to use."""
+        return self._is_initialized
+    async def initialize(self) -> None:
+        """
+        Initialize the agent with tools from the MCP client.
+        Raises:
+            AudioAgentInitializationError: If initialization fails
+        """
+        if self._is_initialized:
+            logger.info("Agent already initialized")
+            return
+        try:
+            logger.info("Initializing AudioAgent...")
+            # Get tools from MCP client
+            self._tools = await self._client.get_tools()
+            if not self._tools:
+                raise AudioAgentInitializationError("No tools available from MCP client")
+            logger.info(f"Loaded {len(self._tools)} tools: {[tool.name for tool in self._tools]}")
+            # Create the agent
+            self._agent = create_react_agent(
+                self.model_name,
+                self._tools,
+            )
+            self._is_initialized = True
+            logger.info("AudioAgent initialized successfully")
+        except Exception as e:
+            error_msg = f"Failed to initialize AudioAgent: {str(e)}"
+            logger.error(error_msg)
+            raise AudioAgentInitializationError(error_msg) from e
+    def _convert_to_langchain_messages(self, history: List[Tuple[str, Optional[str]]]) -> List[BaseMessage]:
+        """
+        Convert chat history to LangChain message objects.
+        Args:
+            history: List of (human_message, ai_response) tuples
+        Returns:
+            List of LangChain BaseMessage objects
+        """
+        messages = []
+        for human_msg, ai_msg in history:
+            if human_msg and human_msg.strip():
+                messages.append(HumanMessage(content=human_msg.strip()))
+            if ai_msg and ai_msg.strip():
+                messages.append(AIMessage(content=ai_msg.strip()))
+        return messages
+    def _format_messages_for_agent(self, messages: List[BaseMessage]) -> List[Dict[str, str]]:
+        """
+        Convert LangChain messages to the format expected by the agent.
+        Args:
+            messages: List of LangChain BaseMessage objects
+        Returns:
+            List of message dictionaries with role and content
+        """
+        formatted_messages = []
+        for message in messages:
+            if isinstance(message, HumanMessage):
+                formatted_messages.append({"role": "user", "content": message.content})
+            elif isinstance(message, AIMessage):
+                formatted_messages.append({"role": "assistant", "content": message.content})
+            else:
+                # Handle other message types if needed
+                formatted_messages.append({"role": "user", "content": str(message.content)})
+        return formatted_messages
+    async def _extract_response_content(self, response: Dict[str, Any]) -> str:
+        """
+        Extract the content from the agent's response using LangChain output parser.
+        Args:
+            response: The response from the agent
+        Returns:
+            The extracted content as a string
+        Raises:
+            AudioAgentChatError: If response parsing fails
+        """
+        try:
+            if not response:
+                raise OutputParserException("Received empty response from agent")
+            if "messages" not in response or not response["messages"]:
+                raise OutputParserException("No messages found in agent response")
+            last_message = response["messages"][-1]
+            # Handle different message formats
+            if hasattr(last_message, 'content'):
+                content = last_message.content
+            elif isinstance(last_message, dict) and 'content' in last_message:
+                content = last_message['content']
+            else:
+                content = str(last_message)
+            # Use LangChain's output parser for robust string processing
+            parsed_content = await self._output_parser.aparse(content)
+            return parsed_content if parsed_content else "I couldn't generate a response."
+        except OutputParserException as e:
+            logger.warning(f"Output parsing failed: {e}")
+            raise AudioAgentChatError(f"Failed to parse agent response: {str(e)}") from e
+        except Exception as e:
+            logger.error(f"Unexpected error in response extraction: {e}")
+            raise AudioAgentChatError(f"Error extracting response content: {str(e)}") from e
+    def _validate_message(self, message: str) -> str:
+        """
+        Validate and sanitize the input message.
+        Args:
+            message: The user's message
+        Returns:
+            The validated and sanitized message
+        Raises:
+            AudioAgentChatError: If message is invalid
+        """
+        if not message:
+            raise AudioAgentChatError("Message cannot be None")
+        cleaned_message = message.strip()
+        if not cleaned_message:
+            raise AudioAgentChatError("Message cannot be empty or only whitespace")
+        if len(cleaned_message) > 10000:
+            raise AudioAgentChatError("Message is too long (max 10,000 characters)")
+        return cleaned_message
+    async def chat(self, message: str, history: Optional[List[Tuple[str, Optional[str]]]] = None) -> str:
+        """
+        Process a chat message with the agent using LangChain's robust message handling.
+        Args:
+            message: The user's message
+            history: Previous chat history as list of (human, ai) tuples
+        Returns:
+            The agent's response
+        Raises:
+            AudioAgentChatError: If chat processing fails
+            AudioAgentInitializationError: If agent is not initialized
+        """
+        # Validate input
+        validated_message = self._validate_message(message)
+        # Ensure agent is initialized
+        if not self._is_initialized:
+            await self.initialize()
+        try:
+            # Convert history to LangChain messages
+            langchain_messages = self._convert_to_langchain_messages(history or [])
+            # Add current message
+            langchain_messages.append(HumanMessage(content=validated_message))
+            # Format for agent
+            formatted_messages = self._format_messages_for_agent(langchain_messages)
+            logger.info(f"Processing message: {validated_message[:50]}{'...' if len(validated_message) > 50 else ''}")
+            # Get response from agent
+            response = await self._agent.ainvoke({"messages": formatted_messages})
+            # Extract and return content using output parser
+            content = await self._extract_response_content(response)
+            logger.info("Message processed successfully")
+            return content
+        except AudioAgentChatError:
+            # Re-raise our custom errors
+            raise
+        except Exception as e:
+            error_msg = f"Failed to process chat message: {str(e)}"
+            logger.error(error_msg)
+            raise AudioAgentChatError(error_msg) from e
+    def chat_sync(self, message: str, history: Optional[List[Tuple[str, Optional[str]]]] = None) -> str:
+        """
+        Synchronous wrapper for the async chat method.
+        Args:
+            message: The user's message
+            history: Previous chat history as list of (human, ai) tuples
+        Returns:
+            The agent's response
+        """
+        try:
+            return asyncio.run(self.chat(message, history))
+        except Exception as e:
+            logger.error(f"Error in synchronous chat: {e}")
+            raise
+    async def get_available_tools(self) -> List[str]:
+        """
+        Get the list of available tool names.
+        Returns:
+            List of tool names
+        Raises:
+            AudioAgentInitializationError: If initialization fails
+        """
+        try:
+            if not self._is_initialized:
+                await self.initialize()
+            return [tool.name for tool in self._tools] if self._tools else []
+        except Exception as e:
+            error_msg = f"Failed to get available tools: {str(e)}"
+            logger.error(error_msg)
+            raise AudioAgentInitializationError(error_msg) from e
+    async def stream_chat(self, message: str, history: Optional[List[Tuple[str, Optional[str]]]] = None):
+        """
+        Stream a chat response (if supported by the underlying agent).
+        Args:
+            message: The user's message
+            history: Previous chat history as list of (human, ai) tuples
+        Yields:
+            Chunks of the response as they become available
+        Raises:
+            AudioAgentChatError: If streaming fails
+        """
+        # Validate input
+        validated_message = self._validate_message(message)
+        # Ensure agent is initialized
+        if not self._is_initialized:
+            await self.initialize()
+        try:
+            # Convert history to LangChain messages
+            langchain_messages = self._convert_to_langchain_messages(history or [])
+            # Add current message
+            langchain_messages.append(HumanMessage(content=validated_message))
+            # Format for agent
+            formatted_messages = self._format_messages_for_agent(langchain_messages)
+            logger.info(f"Streaming message: {validated_message[:50]}{'...' if len(validated_message) > 50 else ''}")
+            # Check if agent supports streaming
+            if hasattr(self._agent, 'astream'):
+                async for chunk in self._agent.astream({"messages": formatted_messages}):
+                    yield chunk
+            else:
+                # Fallback to regular chat if streaming not supported
+                response = await self.chat(validated_message, history)
+                yield response
+        except Exception as e:
+            error_msg = f"Failed to stream chat message: {str(e)}"
+            logger.error(error_msg)
+            raise AudioAgentChatError(error_msg) from e
+async def main():
+    """Example usage and testing"""
+    try:
+        # Create and initialize agent
+        agent = AudioAgent()
+        await agent.initialize()
+        # Show available tools
+        tools = await agent.get_available_tools()
+        print(f"Available tools: {tools}")
+        # Test chat
+        response = await agent.chat("What tools do you have?")
+        print(f"Agent response: {response}")
+        # Test streaming (if supported)
+        print("\nTesting streaming:")
+        async for chunk in agent.stream_chat("Tell me about audio processing"):
+            print(f"Chunk: {chunk}")
+    except AudioAgentError as e:
+        logger.error(f"AudioAgent error: {e}")
+    except Exception as e:
+        logger.error(f"Unexpected error: {e}")
+if __name__ == "__main__":
+    asyncio.run(main())