Spaces:

Agents-MCP-Hackathon
/

Audio-Agent

Sleeping

App Files Files Community

YigitSekerci commited on Jun 8, 2025

Commit

5ffb587

1 Parent(s): 8bc8b81

remove old agent

Browse files

Files changed (7) hide show

src/nodes/agent.py +0 -128
src/nodes/chat.py +0 -49
src/nodes/planner.py +0 -58
src/nodes/processor.py +0 -59
src/nodes/state.py +0 -84
src/nodes/validator.py +0 -47
src/ui.py +2 -3

src/nodes/agent.py DELETED Viewed

@@ -1,128 +0,0 @@
-from dotenv import load_dotenv
-from functools import partial
-from langchain_mcp_adapters.client import MultiServerMCPClient
-from langgraph.graph import StateGraph, END, START
-from .state import AgentState, InputState, OutputState
-from .chat import chat_node, chat_node_router
-from .planner import planner_node
-from .processor import processor_node
-from .validator import validator_node, validator_node_router
-class AudioAgent:
-    def __init__(
-        self,
-        model_name: str = "gpt-4o",
-        server_url: str = "https://agents-mcp-hackathon-audioeditor.hf.space/gradio_api/mcp/sse",
-    ):
-        load_dotenv()
-        self.model_name = model_name
-        self.server_url = server_url
-        self.graph = None
-        self._client = MultiServerMCPClient({
-            "audio-tools": {"url": self.server_url, "transport": "sse"}
-        })
-    @property
-    def is_initialized(self) -> bool:
-        return self.graph is not None
-    async def _build_graph(self) -> None:
-        """Build the LangGraph workflow."""
-        _graph = StateGraph(
-            AgentState,
-            input=InputState,
-            output=OutputState
-        )
-        _graph.add_node("chat", chat_node)
-        _graph.add_conditional_edges(
-            "chat",
-            chat_node_router,
-            {
-                "planner": "planner",
-                "end": END
-            }
-        )
-        _graph.add_node("planner", planner_node)
-        _graph.add_edge("planner", "audio_processor")
-        processor_node_with_tools = partial(processor_node, tools=self.tools)
-        _graph.add_node("audio_processor", processor_node_with_tools)
-        # TODO: add validator edge to here
-        _graph.add_edge("audio_processor", "chat")
-        _graph.add_node("validator", validator_node)
-        _graph.add_conditional_edges(
-            "validator",
-            validator_node_router,
-            {
-                "chat": "chat",
-                "planner": "planner"
-            }
-        )
-        _graph.add_edge(START, "chat")
-        _graph.add_edge("chat", END)
-        self.graph = _graph.compile()
-    async def initialize(self) -> None:
-        """Initialize the LangGraph workflow with audio tools."""
-        if self.is_initialized:
-            return
-        self.tools = await self._client.get_tools()
-        if not self.tools:
-            raise RuntimeError("No tools available from MCP server")
-        await self._build_graph()
-    def _extract_audio_paths(self, user_message: str) -> tuple[str, list[str]]:
-        """Extract audio file paths from user message and return cleaned message."""
-        audio_files = []
-        lines = user_message.split('\n')
-        clean_lines = []
-        for line in lines:
-            if line.strip().startswith('Audio file:'):
-                # Extract the file path
-                audio_path = line.replace('Audio file:', '').strip()
-                audio_files.append(audio_path)
-            else:
-                clean_lines.append(line)
-        clean_message = '\n'.join(clean_lines).strip()
-        return clean_message, audio_files
-    async def chat(self, user_message: str):
-        """Stream chat responses with node information."""
-        if not self.is_initialized:
-            await self.initialize()
-        # Extract audio file paths from the message
-        clean_message, audio_files = self._extract_audio_paths(user_message)
-        # Set up initial state
-        initial_state = {
-            "user_input": clean_message,
-            "input_audio_files": audio_files,
-            "steps_details": [],
-            "plan": "",
-            "final_response": "",
-            "requires_processing": False,
-            "validator_feedback": "",
-            "output_audio_files": []
-        }
-        # Stream the graph execution
-        return await self.graph.ainvoke(initial_state, stream_mode="values")
-    def draw_graph(self) -> None:
-        """Draw the graph to a file."""
-        graph_image = self.graph.get_graph().draw_mermaid_png()
-        with open("graph.png", "wb") as f:
-            f.write(graph_image)

src/nodes/chat.py DELETED Viewed

@@ -1,49 +0,0 @@
-from langchain_openai import ChatOpenAI
-from langchain_core.prompts import ChatPromptTemplate
-from langchain_core.runnables import RunnableParallel
-from nodes.state import AgentState, ChatInputState, ChatOutputState
-from operator import itemgetter
-def chat_node(state: ChatInputState) -> ChatOutputState:
-    llm = ChatOpenAI(model="gpt-4.1")
-    llm = llm.with_structured_output(ChatOutputState)
-    # Enhanced prompt to better determine processing needs
-    prompt = ChatPromptTemplate.from_messages([
-        ("system", """You are a helpful assistant that can answer questions and help with audio processing tasks.
-        Analyze the user's input to determine:
-        1. If this is a general question about audio processing → Answer directly (requires_processing=False)
-        2. If this requires actual audio file processing → Set requires_processing=True
-        For audio processing tasks, you should set requires_processing=True.
-        For general questions, information requests, or explanations, answer directly with requires_processing=False.
-        """),
-        ("user", "User input: {user_input}\nInput audio files: {input_files}\nPrevious steps: {steps}\n")
-    ])
-    chain = (
-        RunnableParallel({
-            "user_input": itemgetter("user_input"),
-            "input_files": itemgetter("input_audio_files"),
-            "steps": itemgetter("steps_details"),
-        })
-        | prompt
-        | llm
-    )
-    result = chain.invoke(state.model_dump())
-    # Add this chat step to steps_details
-    updated_steps = state.steps_details + [f"Chat: Processed user input and determined next action"]
-    result.steps_details = updated_steps
-    result.user_input = state.user_input
-    result.input_audio_files = state.input_audio_files
-    return result
-def chat_node_router(state: ChatOutputState) -> str:
-    if state.requires_processing:
-        return "planner"
-    else:
-        return "end"

src/nodes/planner.py DELETED Viewed

@@ -1,58 +0,0 @@
-from langchain_openai import ChatOpenAI
-from langchain_core.prompts import ChatPromptTemplate
-from langchain_core.runnables import RunnableParallel
-from nodes.state import AgentState, PlannerInputState, PlannerOutputState
-from operator import itemgetter
-def planner_node(state: PlannerInputState) -> PlannerOutputState:
-    llm = ChatOpenAI(model="gpt-4.1")
-    llm = llm.with_structured_output(PlannerOutputState)
-    # Enhanced prompt for better planning
-    prompt = ChatPromptTemplate.from_messages([
-        ("system", """You are an expert audio processing planner. Create detailed, step-by-step plans for audio processing tasks.
-        Consider:
-        1. The user's specific requirements
-        2. Available audio files and their characteristics
-        3. Any validator feedback that requires plan adjustments
-        4. Optimal sequence of audio processing operations
-        Generate a comprehensive plan that clearly outlines:
-        - What audio processing steps are needed
-        - The order of operations
-        - Expected outcomes for each step
-        - How to handle the input audio files
-        If there's validator feedback, adjust the plan accordingly to address the issues raised.
-        """),
-        ("user", "User request: {user_input}\nInput audio files: {input_files}\nValidator feedback: {feedback}\nPrevious steps: {steps}")
-    ])
-    chain = (
-        RunnableParallel({
-            "user_input": itemgetter("user_input"),
-            "input_files": itemgetter("input_audio_files"),
-            "feedback": itemgetter("validator_feedback"),
-            "steps": itemgetter("steps_details")
-        })
-        | prompt
-        | llm
-    )
-    result = chain.invoke(state.model_dump())
-    # Ensure planning-specific fields are set
-    result.requires_processing = True
-    result.user_input = state.user_input
-    result.input_audio_files = state.input_audio_files
-    # Add planning step to steps_details
-    planning_step = "Planner: Generated comprehensive audio processing plan"
-    if state.validator_feedback:
-        planning_step += f" (addressing validator feedback: {state.validator_feedback[:100]}...)"
-    updated_steps = state.steps_details + [planning_step]
-    result.steps_details = updated_steps
-    return result

src/nodes/processor.py DELETED Viewed

@@ -1,59 +0,0 @@
-from nodes.state import ProcessorInputState, ProcessorOutputState
-from langgraph.prebuilt import create_react_agent
-from pydantic import BaseModel, Field
-class ProcessorState(BaseModel):
-    steps_details: list[str] = Field(description="The steps that have been completed.", default=[])
-    final_response: str = Field(description="The final response to the user.", default="")
-    output_audio_files: list[str] = Field(description="The output audio files.", default=[])
-async def processor_node(state: ProcessorInputState, tools: list) -> ProcessorOutputState:
-    system_prompt = """You are an expert audio processor that executes audio processing plans using available tools.
-    Your responsibilities:
-    1. Follow the provided plan step-by-step
-    2. Use appropriate tools to process the audio files
-    3. Handle any errors gracefully and adapt the plan if needed
-    4. Provide detailed feedback on each processing step
-    5. Generate clear descriptions of what was accomplished
-    Available tools will help you process audio files according to the plan.
-    Be thorough in your processing and provide detailed step-by-step feedback.
-    Input audio files: {input_files}
-    Plan to execute: {plan}
-    User request: {user_input}
-    """
-    agent = create_react_agent(
-        model="gpt-4.1",
-        tools=tools,
-        prompt=system_prompt,
-        response_format=ProcessorState,
-    )
-    input_context = f"""
-    User Request: {state.user_input}
-    Plan to Execute: {state.plan}
-    Input Audio Files: {', '.join(state.input_audio_files) if state.input_audio_files else 'None'}
-    Previous Steps: {', '.join(state.steps_details) if state.steps_details else 'None'}
-    Please execute this plan step by step using the available tools.
-    """
-    res = await agent.ainvoke(
-        {"messages": [{"role": "user", "content": input_context}]}
-    )
-    processor_state: ProcessorState = res["structured_response"]
-    processor_steps = [f"Processor: {step}" for step in processor_state.steps_details]
-    combined_steps = state.steps_details + processor_steps
-    return ProcessorOutputState(
-        steps_details=combined_steps,
-        final_response=processor_state.final_response,
-        output_audio_files=processor_state.output_audio_files,
-        plan=state.plan,
-        user_input=state.user_input,
-        input_audio_files=state.input_audio_files,
-    )

src/nodes/state.py DELETED Viewed

@@ -1,84 +0,0 @@
-from pydantic import BaseModel, Field
-# Main AgentState - used for overall workflow coordination
-class AgentState(BaseModel):
-    steps_details: list[str] = Field(description="The steps that have been completed by nodes.", default=[])
-    user_input: str = Field(description="The user's input.", default="")
-    plan: str = Field(description="The plan for the user.", default="")
-    final_response: str = Field(description="The final response to the user.", default="")
-    requires_processing: bool = Field(description="Whether the response requires detailed audio processing.", default=False)
-    validator_feedback: str = Field(description="The feedback from the validator. Indicates steps must be taken again.", default="")
-    input_audio_files: list[str] = Field(description="The input audio files.", default=[])
-    output_audio_files: list[str] = Field(description="The output audio files.", default=[])
-# Chat Node States
-class ChatInputState(BaseModel):
-    user_input: str = Field(description="The user's input.", default="")
-    input_audio_files: list[str] = Field(description="The input audio files.", default=[])
-    output_audio_files: list[str] = Field(description="The output audio files.", default=[])
-    steps_details: list[str] = Field(description="The steps that have been completed by nodes.", default=[])
-class ChatOutputState(BaseModel):
-    steps_details: list[str] = Field(description="The steps that have been completed by nodes.", default=[])
-    final_response: str = Field(description="The final response to the user.", default="")
-    requires_processing: bool = Field(description="Whether the response requires detailed audio processing.", default=False)
-    user_input: str = Field(description="The user's input to pass through.", default="")
-    input_audio_files: list[str] = Field(description="The input audio files to pass through.", default=[])
-    output_audio_files: list[str] = Field(description="The output audio files.", default=[])
-# Planner Node States
-class PlannerInputState(BaseModel):
-    user_input: str = Field(description="The user's input.", default="")
-    input_audio_files: list[str] = Field(description="The input audio files.", default=[])
-    steps_details: list[str] = Field(description="The steps that have been completed by nodes.", default=[])
-    validator_feedback: str = Field(description="The feedback from the validator requiring replanning.", default="")
-class PlannerOutputState(BaseModel):
-    plan: str = Field(description="The plan for the user.", default="")
-    user_input: str = Field(description="The user's input to pass through.", default="")
-    input_audio_files: list[str] = Field(description="The input audio files to pass through.", default=[])
-    steps_details: list[str] = Field(description="The steps that have been completed by nodes.", default=[])
-    requires_processing: bool = Field(description="Whether the response requires detailed audio processing.", default=True)
-# Processor Node States
-class ProcessorInputState(BaseModel):
-    plan: str = Field(description="The plan to execute.", default="")
-    user_input: str = Field(description="The user's input.", default="")
-    input_audio_files: list[str] = Field(description="The input audio files.", default=[])
-    steps_details: list[str] = Field(description="The steps that have been completed by nodes.", default=[])
-class ProcessorOutputState(BaseModel):
-    steps_details: list[str] = Field(description="The steps that have been completed during processing.", default=[])
-    final_response: str = Field(description="The final response to the user.", default="")
-    output_audio_files: list[str] = Field(description="The output audio files generated.", default=[])
-    plan: str = Field(description="The plan to pass through.", default="")
-    user_input: str = Field(description="The user's input to pass through.", default="")
-    input_audio_files: list[str] = Field(description="The input audio files to pass through.", default=[])
-# Validator Node States
-class ValidatorInputState(BaseModel):
-    steps_details: list[str] = Field(description="The steps that have been completed by the processor.", default=[])
-    final_response: str = Field(description="The final response to validate.", default="")
-    output_audio_files: list[str] = Field(description="The output audio files to validate.", default=[])
-    plan: str = Field(description="The original plan.", default="")
-    user_input: str = Field(description="The user's input.", default="")
-    input_audio_files: list[str] = Field(description="The input audio files.", default=[])
-class ValidatorOutputState(BaseModel):
-    validator_feedback: str = Field(description="The feedback from the validator. Empty if validation passed.", default="")
-    steps_details: list[str] = Field(description="The validated steps.", default=[])
-    final_response: str = Field(description="The validated final response.", default="")
-    output_audio_files: list[str] = Field(description="The validated output audio files.", default=[])
-    plan: str = Field(description="The plan to pass through.", default="")
-    user_input: str = Field(description="The user's input to pass through.", default="")
-    input_audio_files: list[str] = Field(description="The input audio files to pass through.", default=[])
-# Flow Entry and Exit States
-class InputState(BaseModel):
-    user_input: str = Field(description="The user's input.", default="")
-    input_audio_files: list[str] = Field(description="The input audio files.", default=[])
-class OutputState(BaseModel):
-    final_response: str = Field(description="The final response to the user.", default="")
-    output_audio_files: list[str] = Field(description="The output audio files.", default=[])
-    steps_details: list[str] = Field(description="The steps that have been completed.", default=[])

src/nodes/validator.py DELETED Viewed

@@ -1,47 +0,0 @@
-from langchain_openai import ChatOpenAI
-from langchain_core.prompts import ChatPromptTemplate
-from nodes.state import AgentState, ValidatorInputState, ValidatorOutputState
-from operator import itemgetter
-from langchain_core.runnables import RunnableParallel
-def validator_node(state: ValidatorInputState) -> ValidatorOutputState:
-    llm = ChatOpenAI(model="gpt-4.1")
-    llm = llm.with_structured_output(ValidatorOutputState)
-    prompt = ChatPromptTemplate.from_messages([
-        ("system", "You are validator that checks the steps taken and output if something is wrong. Give feedback to flow. If everything is correct, leave validator_feedback empty."),
-        ("user", "Steps taken: {steps}\nFinal response: {response}\nOutput files: {output_files}\nOriginal plan: {plan}")
-    ])
-    chain = (
-        RunnableParallel({
-            "steps": itemgetter("steps_details"),
-            "response": itemgetter("final_response"),
-            "output_files": itemgetter("output_audio_files"),
-            "plan": itemgetter("plan")
-        })
-        | prompt
-        | llm
-    )
-    result = chain.invoke(state.model_dump())
-    validation_step = "Validator: Checked processing results"
-    if result.validator_feedback:
-        validation_step += " - Issues found, feedback provided"
-    else:
-        validation_step += " - All checks passed"
-    updated_steps = state.steps_details + [validation_step]
-    result.steps_details = updated_steps
-    result.plan = state.plan
-    result.user_input = state.user_input
-    result.input_audio_files = state.input_audio_files
-    return result
-def validator_node_router(state: ValidatorOutputState) -> str:
-    if state.validator_feedback == "":
-        return "chat"
-    else:
-        return "planner"

src/ui.py CHANGED Viewed

@@ -82,7 +82,7 @@ def bot_response_sync(history, audio_file_urls):
 def create_interface():
     with gr.Blocks(
         title="Audio Agent - Professional Audio Processing",
-        theme=gr.themes.Soft(),
     ) as interface:
         gr.Markdown("""
         # 🎵 Audio Agent - Professional Audio Processing
@@ -96,7 +96,7 @@ def create_interface():
             with gr.Column(scale=2):
                 chatbot = gr.Chatbot(
                     type="messages",
-                    height=400,
                     show_copy_button=True,
                     show_share_button=False
                 )
@@ -113,7 +113,6 @@ def create_interface():
                     file_types=["audio"],
                     label="Download Generated Audio",
                     interactive=False,
-                    visible=True,
                     height=150
                 )

 def create_interface():
     with gr.Blocks(
         title="Audio Agent - Professional Audio Processing",
+        theme=gr.themes.Default(),
     ) as interface:
         gr.Markdown("""
         # 🎵 Audio Agent - Professional Audio Processing
             with gr.Column(scale=2):
                 chatbot = gr.Chatbot(
                     type="messages",
+                    height=500,
                     show_copy_button=True,
                     show_share_button=False
                 )
                     file_types=["audio"],
                     label="Download Generated Audio",
                     interactive=False,
                     height=150
                 )