Spaces:

Agents-MCP-Hackathon
/

Audio-Agent

Sleeping

App Files Files Community

YigitSekerci commited on Jun 7, 2025

Commit

211e40f

1 Parent(s): 46971d5

fix agent

Browse files

Files changed (4) hide show

src/agent.py +6 -56
src/nodes/chat.py +1 -1
src/nodes/processor.py +26 -18
src/ui.py +23 -73

src/agent.py CHANGED Viewed

@@ -1,10 +1,8 @@
-import asyncio
-import re
 from dotenv import load_dotenv
 from langchain_mcp_adapters.client import MultiServerMCPClient
 from langgraph.graph import StateGraph, END, START
-from langgraph.prebuilt import ToolNode
 from .state import AgentState
 from .nodes.chat import chat_node, chat_node_router
@@ -49,13 +47,10 @@ class AudioAgent:
         _graph.add_node("planner", planner_node)
         _graph.add_edge("planner", "audio_processor")
-        _graph.add_node("audio_processor", processor_node)
         _graph.add_edge("audio_processor", "validator")
-        _graph.add_node("tools", ToolNode(self.tools))
-        _graph.add_edge("audio_processor", "tools")
-        _graph.add_edge("tools", "audio_processor")
         _graph.add_node("validator", validator_node)
         _graph.add_conditional_edges(
             "validator",
@@ -98,7 +93,7 @@ class AudioAgent:
         clean_message = '\n'.join(clean_lines).strip()
         return clean_message, audio_files
-    async def stream_chat(self, user_message: str):
         """Stream chat responses with node information."""
         if not self.is_initialized:
             await self.initialize()
@@ -119,55 +114,10 @@ class AudioAgent:
         }
         # Stream the graph execution
-        current_node = None
-        async for event in self.graph.astream(initial_state):
-            for node_name, node_output in event.items():
-                current_node = node_name
-                # Yield any response content from the node
-                if isinstance(node_output, dict):
-                    # Check for final response
-                    if "final_response" in node_output and node_output["final_response"]:
-                        response_content = node_output["final_response"]
-                        # Stream the response in chunks
-                        for i in range(0, len(response_content), 50):
-                            chunk = response_content[i:i+50]
-                            yield chunk, current_node
-                    # Check for output audio files
-                    if "output_audio_files" in node_output:
-                        for audio_file in node_output["output_audio_files"]:
-                            if audio_file:
-                                yield f"Audio Ready: {audio_file}", current_node
     def draw_graph(self) -> None:
         """Draw the graph to a file."""
         graph_image = self.graph.get_graph().draw_mermaid_png()
         with open("graph.png", "wb") as f:
-            f.write(graph_image)
-async def main():
-    """Test the agent with various scenarios."""
-    agent = AudioAgent()
-    await agent.initialize()
-    # Test with audio files
-    test_message = """Remove filler words from this audio
-Audio file: /path/to/audio1.mp3
-Audio file: /path/to/audio2.wav"""
-    print("Testing agent with audio files...")
-    async for chunk, node_name in agent.stream_chat(test_message):
-        print(f"[{node_name}]: {chunk}")
-    print("\n" + "="*50 + "\n")
-    # Test with just a question
-    test_question = "What audio processing tools are available?"
-    print("Testing agent with question...")
-    async for chunk, node_name in agent.stream_chat(test_question):
-        print(f"[{node_name}]: {chunk}")
-if __name__ == "__main__":
-    asyncio.run(main())

 from dotenv import load_dotenv
+from functools import partial
 from langchain_mcp_adapters.client import MultiServerMCPClient
 from langgraph.graph import StateGraph, END, START
 from .state import AgentState
 from .nodes.chat import chat_node, chat_node_router
         _graph.add_node("planner", planner_node)
         _graph.add_edge("planner", "audio_processor")
+        processor_node_with_tools = partial(processor_node, tools=self.tools)
+        _graph.add_node("audio_processor", processor_node_with_tools)
         _graph.add_edge("audio_processor", "validator")
         _graph.add_node("validator", validator_node)
         _graph.add_conditional_edges(
             "validator",
         clean_message = '\n'.join(clean_lines).strip()
         return clean_message, audio_files
+    async def chat(self, user_message: str):
         """Stream chat responses with node information."""
         if not self.is_initialized:
             await self.initialize()
         }
         # Stream the graph execution
+        return self.graph.invoke(initial_state, stream_mode="values")
     def draw_graph(self) -> None:
         """Draw the graph to a file."""
         graph_image = self.graph.get_graph().draw_mermaid_png()
         with open("graph.png", "wb") as f:
+            f.write(graph_image)

src/nodes/chat.py CHANGED Viewed

@@ -29,6 +29,6 @@ def chat_node(state: AgentState) -> AgentState:
 def chat_node_router(state: AgentState) -> str:
     if state.requires_processing:
-        return "audio_processor"
     else:
         return "end"

 def chat_node_router(state: AgentState) -> str:
     if state.requires_processing:
+        return "planner"
     else:
         return "end"

src/nodes/processor.py CHANGED Viewed

@@ -1,24 +1,32 @@
-from langchain_openai import ChatOpenAI
 from langchain_core.prompts import ChatPromptTemplate
 from src.state import AgentState
-from operator import itemgetter
-from langchain_core.runnables import RunnableParallel
-def processor_node(state: AgentState) -> AgentState:
-    llm = ChatOpenAI(model="gpt-4.1")
-    llm = llm.with_structured_output(AgentState)
-    prompt = ChatPromptTemplate.from_messages([
-        ("system", "You are processor that processes the plan and generates a final response to the user."),
-        ("user", "Current state: {state}")
-    ])
-    chain = (
-        RunnableParallel({
-            "state": itemgetter("state")
-        })
-        | prompt
-        | llm
     )
-    return chain.invoke({"state": state})

 from langchain_core.prompts import ChatPromptTemplate
 from src.state import AgentState
+from langgraph.prebuilt import create_react_agent
+from pydantic import BaseModel, Field
+class ProcessorState(BaseModel):
+    steps_details: list[str] = Field(description="The steps that have been completed.", default=[])
+    final_response: str = Field(description="The final response to the user.", default="")
+    output_audio_files: list[str] = Field(description="The output audio files.", default=[])
+def processor_node(state: AgentState, tools: list) -> AgentState:
+    agent = create_react_agent(
+        model="gpt-4.1",
+        tools=tools,
+        prompt="You are processor that processes the plan and generates a final response to the user.",
+        response_format=ProcessorState,
     )
+    processor_state: ProcessorState = agent.invoke(
+        {"messages": [{"role": "user", "content": f"Current state: {state}"}]}
+    )["structured_response"]
+    return AgentState(
+        steps_details=state.steps_details + processor_state.steps_details,
+        user_input=state.user_input,
+        plan=state.plan,
+        final_response=processor_state.final_response,
+        requires_processing=state.requires_processing,
+        validator_feedback=state.validator_feedback,
+        input_audio_files=state.input_audio_files,
+        output_audio_files=state.output_audio_files + processor_state.output_audio_files,
+    )

src/ui.py CHANGED Viewed

@@ -34,10 +34,10 @@ def user_input(user_message, audio_files, history):
 async def bot_response(history):
     """
-    Generate bot response showing only the final output
     """
     if not history or history[-1]["role"] != "user":
-        return
     user_message = history[-1]["content"]
@@ -46,45 +46,29 @@ async def bot_response(history):
         if not agent.is_initialized:
             await agent.initialize()
-        # Add empty assistant message to start streaming
-        history.append({"role": "assistant", "content": ""})
-        yield history
-        # Track final response and audio files
-        final_response = ""
-        processed_audio_urls = []
-        # Stream the response and collect final output
-        async for chunk, node_name in agent.stream_chat(user_message):
-            # Check if this chunk contains an audio URL
-            if "Audio Ready" in chunk and "http" in chunk:
-                processed_audio_urls.append(chunk)
-                continue
-            # Only show output from chat and response_formatter nodes
-            if node_name in ["chat", "response_formatter"]:
-                final_response += chunk
-                # Build simple display with final response and audio files
-                formatted_content = final_response
-                # Add processed audio files section if any
-                if processed_audio_urls:
-                    formatted_content += "\n\n## 🎵 Generated Audio Files\n\n"
-                    for audio_url in processed_audio_urls:
-                        formatted_content += f"{audio_url}\n"
-                # Update the chat history
-                history[-1]["content"] = formatted_content.rstrip()
-                yield history
     except Exception as e:
-        # Update the last message with error or add error message
-        if history and history[-1]["role"] == "assistant":
-            history[-1]["content"] = f"❌ **Error in Agent Processing**: {str(e)}\n\n*Please try rephrasing your request or check if audio files are properly uploaded.*"
-        else:
-            history.append({"role": "assistant", "content": f"❌ **Error**: {str(e)}"})
-        yield history
 def bot_response_sync(history):
     """
@@ -93,12 +77,7 @@ def bot_response_sync(history):
     loop = asyncio.new_event_loop()
     asyncio.set_event_loop(loop)
     try:
-        async_gen = bot_response(history)
-        while True:
-            try:
-                yield loop.run_until_complete(async_gen.__anext__())
-            except StopAsyncIteration:
-                break
     finally:
         loop.close()
@@ -110,13 +89,6 @@ def create_interface():
         title="Audio Agent - Professional Audio Processing",
         theme=gr.themes.Soft(),
         css="""
-        .audio-upload-area {
-            border: 2px dashed #ccc;
-            border-radius: 10px;
-            padding: 20px;
-            text-align: center;
-            margin: 10px 0;
-        }
         .processed-audio {
             background: #f0f9ff;
             border: 1px solid #0891b2;
@@ -124,28 +96,6 @@ def create_interface():
             padding: 15px;
             margin: 10px 0;
         }
-        .thinking-section h2 {
-            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-            color: white;
-            padding: 10px 15px;
-            border-radius: 8px;
-            margin: 15px 0 10px 0;
-        }
-        .thinking-section h3 {
-            background: #f8f9fa;
-            border-left: 4px solid #667eea;
-            padding: 8px 12px;
-            margin: 10px 0;
-            border-radius: 0 6px 6px 0;
-        }
-        .cot-container {
-            background: #ffffff;
-            border: 1px solid #e1e5e9;
-            border-radius: 12px;
-            padding: 20px;
-            margin: 10px 0;
-            box-shadow: 0 2px 4px rgba(0,0,0,0.05);
-        }
         """
     ) as demo:

 async def bot_response(history):
     """
+    Generate bot response using the simple chat method
     """
     if not history or history[-1]["role"] != "user":
+        return history
     user_message = history[-1]["content"]
         if not agent.is_initialized:
             await agent.initialize()
+        # Get the response from the agent
+        result = await agent.chat(user_message)
+        # Extract the final response and audio files from the result
+        final_response = result.get("final_response", "")
+        output_audio_files = result.get("output_audio_files", [])
+        # Format the response
+        formatted_content = final_response
+        # Add processed audio files section if any
+        if output_audio_files:
+            formatted_content += "\n\n## 🎵 Generated Audio Files\n\n"
+            for audio_file in output_audio_files:
+                formatted_content += f"Audio Ready: {audio_file}\n"
+        # Add assistant response to history
+        history.append({"role": "assistant", "content": formatted_content.rstrip()})
     except Exception as e:
+        history.append({"role": "assistant", "content": f"❌ **Error**: {e}"})
+    return history
 def bot_response_sync(history):
     """
     loop = asyncio.new_event_loop()
     asyncio.set_event_loop(loop)
     try:
+        return loop.run_until_complete(bot_response(history))
     finally:
         loop.close()
         title="Audio Agent - Professional Audio Processing",
         theme=gr.themes.Soft(),
         css="""
         .processed-audio {
             background: #f0f9ff;
             border: 1px solid #0891b2;
             padding: 15px;
             margin: 10px 0;
         }
         """
     ) as demo: