Spaces:

Agents-MCP-Hackathon
/

Audio-Agent

Sleeping

App Files Files Community

YigitSekerci commited on Jun 7, 2025

Commit

eff95ca

1 Parent(s): 026baed

simplify agent

Browse files

Files changed (1) hide show

src/agent.py +59 -339

src/agent.py CHANGED Viewed

@@ -1,370 +1,90 @@
 import asyncio
-import json
-import logging
-from typing import List, Dict, Any, Optional, Tuple, Union
-from langchain_mcp_adapters.client import MultiServerMCPClient
-from langgraph.prebuilt import create_react_agent
-from langchain_core.messages import HumanMessage, AIMessage, BaseMessage
-from langchain_core.output_parsers import StrOutputParser
-from langchain_core.exceptions import OutputParserException
 from dotenv import load_dotenv
-from langchain_openai import ChatOpenAI
-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-load_dotenv()
-class AudioAgentError(Exception):
-    """Custom exception for AudioAgent errors"""
-    pass
-class AudioAgentInitializationError(AudioAgentError):
-    """Raised when agent initialization fails"""
-    pass
-class AudioAgentChatError(AudioAgentError):
-    """Raised when chat processing fails"""
-    pass
 class AudioAgent:
     """
-    A class to manage an audio-focused AI agent with MCP tools integration.
-    This agent connects to audio tools via MCP and provides a conversational interface
-    using LangChain's robust message handling and output parsing.
     """
-    def __init__(self, model_name: str = "gpt-4o", server_url: str = "http://127.0.0.1:7860/gradio_api/mcp/sse"):
-        """
-        Initialize the AudioAgent.
-        Args:
-            model_name: The language model to use for the agent
-            server_url: The URL of the MCP server providing audio tools
-        """
         self.model_name = model_name
         self.server_url = server_url
-        self._agent = None
-        self._tools = None
-        self._llm = None
-        self._is_initialized = False
-        self._output_parser = StrOutputParser()
-        # Initialize MCP client
         self._client = MultiServerMCPClient({
-            "audio-tools": {
-                "url": server_url,
-                "transport": "sse",
-            }
         })
     @property
     def is_initialized(self) -> bool:
-        """Check if the agent is initialized and ready to use."""
-        return self._is_initialized
     async def initialize(self) -> None:
-        """
-        Initialize the agent with tools from the MCP client.
-        Raises:
-            AudioAgentInitializationError: If initialization fails
-        """
-        if self._is_initialized:
-            logger.info("Agent already initialized")
             return
-        try:
-            logger.info("Initializing AudioAgent...")
-            logger.info(f"Initializing LLM: {self.model_name}")
-            self._llm = ChatOpenAI(model=self.model_name, temperature=0, streaming=True)
-            # Get tools from MCP client
-            self._tools = await self._client.get_tools()
-            if not self._tools:
-                raise AudioAgentInitializationError("No tools available from MCP client")
-            logger.info(f"Loaded {len(self._tools)} tools: {[tool.name for tool in self._tools]}")
-            # Create the agent
-            self._agent = create_react_agent(
-                self._llm,
-                self._tools,
-            )
-            self._is_initialized = True
-            logger.info("AudioAgent initialized successfully")
-        except Exception as e:
-            error_msg = f"Failed to initialize AudioAgent: {str(e)}"
-            logger.error(error_msg)
-            raise AudioAgentInitializationError(error_msg) from e
-    def _convert_to_langchain_messages(self, history: List[Tuple[str, Optional[str]]]) -> List[BaseMessage]:
-        """
-        Convert chat history to LangChain message objects.
-        Args:
-            history: List of (human_message, ai_response) tuples
-        Returns:
-            List of LangChain BaseMessage objects
-        """
-        messages = []
-        for human_msg, ai_msg in history:
-            if human_msg and human_msg.strip():
-                messages.append(HumanMessage(content=human_msg.strip()))
-            if ai_msg and ai_msg.strip():
-                messages.append(AIMessage(content=ai_msg.strip()))
-        return messages
-    async def _extract_response_content(self, response: Dict[str, Any]) -> str:
-        """
-        Extract the content from the agent's response using LangChain output parser.
-        Args:
-            response: The response from the agent
-        Returns:
-            The extracted content as a string
-        Raises:
-            AudioAgentChatError: If response parsing fails
-        """
-        try:
-            if not response:
-                raise OutputParserException("Received empty response from agent")
-            if "messages" not in response or not response["messages"]:
-                raise OutputParserException("No messages found in agent response")
-            last_message = response["messages"][-1]
-            # Handle different message formats
-            if hasattr(last_message, 'content'):
-                content = last_message.content
-            elif isinstance(last_message, dict) and 'content' in last_message:
-                content = last_message['content']
-            else:
-                content = str(last_message)
-            # Use LangChain's output parser for robust string processing
-            parsed_content = await self._output_parser.aparse(content)
-            return parsed_content if parsed_content else "I couldn't generate a response."
-        except OutputParserException as e:
-            logger.warning(f"Output parsing failed: {e}")
-            raise AudioAgentChatError(f"Failed to parse agent response: {str(e)}") from e
-        except Exception as e:
-            logger.error(f"Unexpected error in response extraction: {e}")
-            raise AudioAgentChatError(f"Error extracting response content: {str(e)}") from e
-    def _validate_message(self, message: str) -> str:
         """
-        Validate and sanitize the input message.
-        Args:
-            message: The user's message
-        Returns:
-            The validated and sanitized message
-        Raises:
-            AudioAgentChatError: If message is invalid
         """
-        if not message:
-            raise AudioAgentChatError("Message cannot be None")
-        cleaned_message = message.strip()
-        if not cleaned_message:
-            raise AudioAgentChatError("Message cannot be empty or only whitespace")
-        if len(cleaned_message) > 10000:
-            raise AudioAgentChatError("Message is too long (max 10,000 characters)")
-        return cleaned_message
-    async def chat(self, message: str, history: Optional[List[Tuple[str, Optional[str]]]] = None) -> str:
         """
-        Process a chat message with the agent using LangChain's robust message handling.
-        Args:
-            message: The user's message
-            history: Previous chat history as list of (human, ai) tuples
-        Returns:
-            The agent's response
-        Raises:
-            AudioAgentChatError: If chat processing fails
-            AudioAgentInitializationError: If agent is not initialized
         """
-        # Validate input
-        validated_message = self._validate_message(message)
-        # Ensure agent is initialized
-        if not self._is_initialized:
             await self.initialize()
-        try:
-            # Convert history to LangChain messages
-            langchain_messages = self._convert_to_langchain_messages(history or [])
-            # Add current message
-            langchain_messages.append(HumanMessage(content=validated_message))
-            # Prepare input for the agent
-            input_data = {"messages": langchain_messages}
-            logger.info(f"Processing message: {validated_message[:50]}{'...' if len(validated_message) > 50 else ''}")
-            # Get response from agent
-            response = await self._agent.ainvoke(input_data)
-            # Extract and return content using output parser
-            content = await self._extract_response_content(response)
-            logger.info("Message processed successfully")
-            return content
-        except AudioAgentChatError:
-            # Re-raise our custom errors
-            raise
-        except Exception as e:
-            error_msg = f"Failed to process chat message: {str(e)}"
-            logger.error(error_msg)
-            raise AudioAgentChatError(error_msg) from e
-    def chat_sync(self, message: str, history: Optional[List[Tuple[str, Optional[str]]]] = None) -> str:
-        """
-        Synchronous wrapper for the async chat method.
-        Args:
-            message: The user's message
-            history: Previous chat history as list of (human, ai) tuples
-        Returns:
-            The agent's response
-        """
-        try:
-            return asyncio.run(self.chat(message, history))
-        except Exception as e:
-            logger.error(f"Error in synchronous chat: {e}")
-            raise
-    async def get_available_tools(self) -> List[str]:
-        """
-        Get the list of available tool names.
-        Returns:
-            List of tool names
-        Raises:
-            AudioAgentInitializationError: If initialization fails
-        """
-        try:
-            if not self._is_initialized:
-                await self.initialize()
-            return [tool.name for tool in self._tools] if self._tools else []
-        except Exception as e:
-            error_msg = f"Failed to get available tools: {str(e)}"
-            logger.error(error_msg)
-            raise AudioAgentInitializationError(error_msg) from e
-    async def stream_chat(self, message: str, history: Optional[List[Tuple[str, Optional[str]]]] = None):
         """
-        Stream a chat response with intermediate steps.
-        Args:
-            message: The user's message
-            history: Previous chat history as list of (human, ai) tuples
-        Yields:
-            Formatted strings for thought process and final response.
-            The string is prefixed with 'thought:', 'response_chunk:', or 'error:'.
-        Raises:
-            AudioAgentChatError: If streaming fails
         """
-        # Validate input
-        validated_message = self._validate_message(message)
-        # Ensure agent is initialized
-        if not self._is_initialized:
             await self.initialize()
-        try:
-            # Convert history to LangChain messages
-            langchain_messages = self._convert_to_langchain_messages(history or [])
-            # Add current message
-            langchain_messages.append(HumanMessage(content=validated_message))
-            # Prepare input for the agent
-            input_data = {"messages": langchain_messages}
-            logger.info(f"Streaming message: {validated_message[:50]}{'...' if len(validated_message) > 50 else ''}")
-            final_response = ""
-            # Use astream_events to get intermediate steps
-            async for event in self._agent.astream_events(input_data, version="v1"):
-                kind = event["event"]
-                if kind == "on_chat_model_stream":
-                    content = event["data"]["chunk"].content
-                    if content:
-                        final_response += content
-                        yield f"response_chunk:{content}"
-                elif kind == "on_tool_start":
-                    yield f"thought:Calling tool `{event['name']}` with input:\n```json\n{json.dumps(event['data'].get('input'), indent=2)}\n```"
-                elif kind == "on_tool_end":
-                    yield f"thought:Tool `{event['name']}` finished. Output:\n```\n{event['data'].get('output')}\n```"
-            if not final_response:
-                logger.warning("Streaming finished but no final response was generated.")
-                yield "response_chunk:I couldn't generate a response."
-        except Exception as e:
-            error_msg = f"Failed to stream chat message: {str(e)}"
-            logger.error(error_msg, exc_info=True)
-            yield f"error:{error_msg}"
-            # Re-raising the exception might be too much if the error is already yielded
-            # raise AudioAgentChatError(error_msg) from e
 async def main():
-    """Example usage and testing"""
-    try:
-        # Create and initialize agent
-        agent = AudioAgent()
-        await agent.initialize()
-        # Show available tools
-        tools = await agent.get_available_tools()
-        print(f"Available tools: {tools}")
-        # Test chat
-        #response = await agent.chat("What tools do you have?")
-        #print(f"Agent response: {response}")
-        # Test streaming (if supported)
-        print("\nTesting streaming:")
-        full_response = ""
-        async for chunk in agent.stream_chat("Tell me about audio processing"):
-            if chunk.startswith("response_chunk:"):
-                full_response += chunk[len("response_chunk:"):]
-            else:
-                print(chunk)
-        print(f"Final response: {full_response}")
-    except AudioAgentError as e:
-        logger.error(f"AudioAgent error: {e}")
-    except Exception as e:
-        logger.error(f"Unexpected error: {e}")
 if __name__ == "__main__":
     asyncio.run(main())

 import asyncio
 from dotenv import load_dotenv
+from langchain_mcp_adapters.client import MultiServerMCPClient
+from langgraph.prebuilt import create_react_agent
+from langgraph.graph.graph import CompiledGraph
 class AudioAgent:
     """
+    Wraps a LangGraph REACT agent over your MCP audio-tools,
+    exposing both one-shot and streaming chat methods.
     """
+    def __init__(
+        self,
+        model_name: str = "gpt-4o",
+        server_url: str = "http://127.0.0.1:7860/gradio_api/mcp/sse",
+    ):
+        load_dotenv()
         self.model_name = model_name
         self.server_url = server_url
+        # SSE client for your audio tools
         self._client = MultiServerMCPClient({
+            "audio-tools": {"url": self.server_url, "transport": "sse"}
         })
+        self._agent = None
     @property
     def is_initialized(self) -> bool:
+        return self._agent is not None
     async def initialize(self) -> None:
+        """Fetch tools from MCP and build a streaming-capable LangGraph REACT agent."""
+        if self.is_initialized:
             return
+        tools = await self._client.get_tools()
+        if not tools:
+            raise RuntimeError("No tools available from MCP server")
+        self._agent: CompiledGraph = create_react_agent(
+            model=self.model_name,
+            tools=tools,
+            prompt="""
+            You are a helpful assistant that can use the following tools to help the user.
+            """
+        )
+    def process_user_input(self, user_input: str):
         """
+        Process user input and return a prompt for the agent.
         """
+        return {"messages": [{"role": "user", "content": user_input}]}
+    async def chat(self, prompt: str) -> str:
         """
+        One-shot chat: returns the full LLM + tool-augmented reply.
         """
+        if not self.is_initialized:
             await self.initialize()
+        return await self._agent.ainvoke(self.process_user_input(prompt))
+    async def stream_chat(self, prompt: str):
         """
+        Streaming chat: prints tokens live and returns the full reply at the end.
         """
+        if not self.is_initialized:
             await self.initialize()
+        async for msg, metadata in self._agent.astream(
+            self.process_user_input(prompt),
+            stream_mode="messages"
+        ):
+            if msg.content:
+                yield msg.content
 async def main():
+    agent = AudioAgent()
+    # one-shot example
+    reply = await agent.chat("Hi! What audio tools are available?")
+    print("→", reply)
+    # streaming example
+    async for msg in agent.stream_chat("Explain how audio normalization works."):
+        print(msg, end="", flush=True)
 if __name__ == "__main__":
     asyncio.run(main())