YigitSekerci commited on
Commit
5ffb587
·
1 Parent(s): 8bc8b81

remove old agent

Browse files
src/nodes/agent.py DELETED
@@ -1,128 +0,0 @@
1
- from dotenv import load_dotenv
2
- from functools import partial
3
-
4
- from langchain_mcp_adapters.client import MultiServerMCPClient
5
- from langgraph.graph import StateGraph, END, START
6
-
7
- from .state import AgentState, InputState, OutputState
8
- from .chat import chat_node, chat_node_router
9
- from .planner import planner_node
10
- from .processor import processor_node
11
- from .validator import validator_node, validator_node_router
12
-
13
- class AudioAgent:
14
- def __init__(
15
- self,
16
- model_name: str = "gpt-4o",
17
- server_url: str = "https://agents-mcp-hackathon-audioeditor.hf.space/gradio_api/mcp/sse",
18
- ):
19
- load_dotenv()
20
- self.model_name = model_name
21
- self.server_url = server_url
22
- self.graph = None
23
-
24
- self._client = MultiServerMCPClient({
25
- "audio-tools": {"url": self.server_url, "transport": "sse"}
26
- })
27
-
28
- @property
29
- def is_initialized(self) -> bool:
30
- return self.graph is not None
31
-
32
- async def _build_graph(self) -> None:
33
- """Build the LangGraph workflow."""
34
-
35
- _graph = StateGraph(
36
- AgentState,
37
- input=InputState,
38
- output=OutputState
39
- )
40
-
41
- _graph.add_node("chat", chat_node)
42
- _graph.add_conditional_edges(
43
- "chat",
44
- chat_node_router,
45
- {
46
- "planner": "planner",
47
- "end": END
48
- }
49
- )
50
-
51
- _graph.add_node("planner", planner_node)
52
- _graph.add_edge("planner", "audio_processor")
53
-
54
- processor_node_with_tools = partial(processor_node, tools=self.tools)
55
- _graph.add_node("audio_processor", processor_node_with_tools)
56
- # TODO: add validator edge to here
57
- _graph.add_edge("audio_processor", "chat")
58
-
59
- _graph.add_node("validator", validator_node)
60
- _graph.add_conditional_edges(
61
- "validator",
62
- validator_node_router,
63
- {
64
- "chat": "chat",
65
- "planner": "planner"
66
- }
67
- )
68
-
69
- _graph.add_edge(START, "chat")
70
- _graph.add_edge("chat", END)
71
- self.graph = _graph.compile()
72
-
73
- async def initialize(self) -> None:
74
- """Initialize the LangGraph workflow with audio tools."""
75
- if self.is_initialized:
76
- return
77
-
78
- self.tools = await self._client.get_tools()
79
- if not self.tools:
80
- raise RuntimeError("No tools available from MCP server")
81
-
82
- await self._build_graph()
83
-
84
- def _extract_audio_paths(self, user_message: str) -> tuple[str, list[str]]:
85
- """Extract audio file paths from user message and return cleaned message."""
86
- audio_files = []
87
- lines = user_message.split('\n')
88
- clean_lines = []
89
-
90
- for line in lines:
91
- if line.strip().startswith('Audio file:'):
92
- # Extract the file path
93
- audio_path = line.replace('Audio file:', '').strip()
94
- audio_files.append(audio_path)
95
- else:
96
- clean_lines.append(line)
97
-
98
- clean_message = '\n'.join(clean_lines).strip()
99
- return clean_message, audio_files
100
-
101
- async def chat(self, user_message: str):
102
- """Stream chat responses with node information."""
103
- if not self.is_initialized:
104
- await self.initialize()
105
-
106
- # Extract audio file paths from the message
107
- clean_message, audio_files = self._extract_audio_paths(user_message)
108
-
109
- # Set up initial state
110
- initial_state = {
111
- "user_input": clean_message,
112
- "input_audio_files": audio_files,
113
- "steps_details": [],
114
- "plan": "",
115
- "final_response": "",
116
- "requires_processing": False,
117
- "validator_feedback": "",
118
- "output_audio_files": []
119
- }
120
-
121
- # Stream the graph execution
122
- return await self.graph.ainvoke(initial_state, stream_mode="values")
123
-
124
- def draw_graph(self) -> None:
125
- """Draw the graph to a file."""
126
- graph_image = self.graph.get_graph().draw_mermaid_png()
127
- with open("graph.png", "wb") as f:
128
- f.write(graph_image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/nodes/chat.py DELETED
@@ -1,49 +0,0 @@
1
- from langchain_openai import ChatOpenAI
2
- from langchain_core.prompts import ChatPromptTemplate
3
- from langchain_core.runnables import RunnableParallel
4
- from nodes.state import AgentState, ChatInputState, ChatOutputState
5
- from operator import itemgetter
6
-
7
- def chat_node(state: ChatInputState) -> ChatOutputState:
8
- llm = ChatOpenAI(model="gpt-4.1")
9
- llm = llm.with_structured_output(ChatOutputState)
10
-
11
- # Enhanced prompt to better determine processing needs
12
- prompt = ChatPromptTemplate.from_messages([
13
- ("system", """You are a helpful assistant that can answer questions and help with audio processing tasks.
14
-
15
- Analyze the user's input to determine:
16
- 1. If this is a general question about audio processing → Answer directly (requires_processing=False)
17
- 2. If this requires actual audio file processing → Set requires_processing=True
18
-
19
- For audio processing tasks, you should set requires_processing=True.
20
- For general questions, information requests, or explanations, answer directly with requires_processing=False.
21
- """),
22
- ("user", "User input: {user_input}\nInput audio files: {input_files}\nPrevious steps: {steps}\n")
23
- ])
24
-
25
- chain = (
26
- RunnableParallel({
27
- "user_input": itemgetter("user_input"),
28
- "input_files": itemgetter("input_audio_files"),
29
- "steps": itemgetter("steps_details"),
30
- })
31
- | prompt
32
- | llm
33
- )
34
-
35
- result = chain.invoke(state.model_dump())
36
-
37
- # Add this chat step to steps_details
38
- updated_steps = state.steps_details + [f"Chat: Processed user input and determined next action"]
39
- result.steps_details = updated_steps
40
- result.user_input = state.user_input
41
- result.input_audio_files = state.input_audio_files
42
-
43
- return result
44
-
45
- def chat_node_router(state: ChatOutputState) -> str:
46
- if state.requires_processing:
47
- return "planner"
48
- else:
49
- return "end"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/nodes/planner.py DELETED
@@ -1,58 +0,0 @@
1
- from langchain_openai import ChatOpenAI
2
- from langchain_core.prompts import ChatPromptTemplate
3
- from langchain_core.runnables import RunnableParallel
4
- from nodes.state import AgentState, PlannerInputState, PlannerOutputState
5
- from operator import itemgetter
6
-
7
- def planner_node(state: PlannerInputState) -> PlannerOutputState:
8
- llm = ChatOpenAI(model="gpt-4.1")
9
- llm = llm.with_structured_output(PlannerOutputState)
10
-
11
- # Enhanced prompt for better planning
12
- prompt = ChatPromptTemplate.from_messages([
13
- ("system", """You are an expert audio processing planner. Create detailed, step-by-step plans for audio processing tasks.
14
-
15
- Consider:
16
- 1. The user's specific requirements
17
- 2. Available audio files and their characteristics
18
- 3. Any validator feedback that requires plan adjustments
19
- 4. Optimal sequence of audio processing operations
20
-
21
- Generate a comprehensive plan that clearly outlines:
22
- - What audio processing steps are needed
23
- - The order of operations
24
- - Expected outcomes for each step
25
- - How to handle the input audio files
26
-
27
- If there's validator feedback, adjust the plan accordingly to address the issues raised.
28
- """),
29
- ("user", "User request: {user_input}\nInput audio files: {input_files}\nValidator feedback: {feedback}\nPrevious steps: {steps}")
30
- ])
31
-
32
- chain = (
33
- RunnableParallel({
34
- "user_input": itemgetter("user_input"),
35
- "input_files": itemgetter("input_audio_files"),
36
- "feedback": itemgetter("validator_feedback"),
37
- "steps": itemgetter("steps_details")
38
- })
39
- | prompt
40
- | llm
41
- )
42
-
43
- result = chain.invoke(state.model_dump())
44
-
45
- # Ensure planning-specific fields are set
46
- result.requires_processing = True
47
- result.user_input = state.user_input
48
- result.input_audio_files = state.input_audio_files
49
-
50
- # Add planning step to steps_details
51
- planning_step = "Planner: Generated comprehensive audio processing plan"
52
- if state.validator_feedback:
53
- planning_step += f" (addressing validator feedback: {state.validator_feedback[:100]}...)"
54
-
55
- updated_steps = state.steps_details + [planning_step]
56
- result.steps_details = updated_steps
57
-
58
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/nodes/processor.py DELETED
@@ -1,59 +0,0 @@
1
- from nodes.state import ProcessorInputState, ProcessorOutputState
2
- from langgraph.prebuilt import create_react_agent
3
- from pydantic import BaseModel, Field
4
-
5
- class ProcessorState(BaseModel):
6
- steps_details: list[str] = Field(description="The steps that have been completed.", default=[])
7
- final_response: str = Field(description="The final response to the user.", default="")
8
- output_audio_files: list[str] = Field(description="The output audio files.", default=[])
9
-
10
- async def processor_node(state: ProcessorInputState, tools: list) -> ProcessorOutputState:
11
- system_prompt = """You are an expert audio processor that executes audio processing plans using available tools.
12
-
13
- Your responsibilities:
14
- 1. Follow the provided plan step-by-step
15
- 2. Use appropriate tools to process the audio files
16
- 3. Handle any errors gracefully and adapt the plan if needed
17
- 4. Provide detailed feedback on each processing step
18
- 5. Generate clear descriptions of what was accomplished
19
-
20
- Available tools will help you process audio files according to the plan.
21
- Be thorough in your processing and provide detailed step-by-step feedback.
22
-
23
- Input audio files: {input_files}
24
- Plan to execute: {plan}
25
- User request: {user_input}
26
- """
27
-
28
- agent = create_react_agent(
29
- model="gpt-4.1",
30
- tools=tools,
31
- prompt=system_prompt,
32
- response_format=ProcessorState,
33
- )
34
-
35
- input_context = f"""
36
- User Request: {state.user_input}
37
- Plan to Execute: {state.plan}
38
- Input Audio Files: {', '.join(state.input_audio_files) if state.input_audio_files else 'None'}
39
- Previous Steps: {', '.join(state.steps_details) if state.steps_details else 'None'}
40
-
41
- Please execute this plan step by step using the available tools.
42
- """
43
-
44
- res = await agent.ainvoke(
45
- {"messages": [{"role": "user", "content": input_context}]}
46
- )
47
- processor_state: ProcessorState = res["structured_response"]
48
-
49
- processor_steps = [f"Processor: {step}" for step in processor_state.steps_details]
50
- combined_steps = state.steps_details + processor_steps
51
-
52
- return ProcessorOutputState(
53
- steps_details=combined_steps,
54
- final_response=processor_state.final_response,
55
- output_audio_files=processor_state.output_audio_files,
56
- plan=state.plan,
57
- user_input=state.user_input,
58
- input_audio_files=state.input_audio_files,
59
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/nodes/state.py DELETED
@@ -1,84 +0,0 @@
1
- from pydantic import BaseModel, Field
2
-
3
- # Main AgentState - used for overall workflow coordination
4
- class AgentState(BaseModel):
5
- steps_details: list[str] = Field(description="The steps that have been completed by nodes.", default=[])
6
- user_input: str = Field(description="The user's input.", default="")
7
- plan: str = Field(description="The plan for the user.", default="")
8
- final_response: str = Field(description="The final response to the user.", default="")
9
- requires_processing: bool = Field(description="Whether the response requires detailed audio processing.", default=False)
10
- validator_feedback: str = Field(description="The feedback from the validator. Indicates steps must be taken again.", default="")
11
- input_audio_files: list[str] = Field(description="The input audio files.", default=[])
12
- output_audio_files: list[str] = Field(description="The output audio files.", default=[])
13
-
14
- # Chat Node States
15
- class ChatInputState(BaseModel):
16
- user_input: str = Field(description="The user's input.", default="")
17
- input_audio_files: list[str] = Field(description="The input audio files.", default=[])
18
- output_audio_files: list[str] = Field(description="The output audio files.", default=[])
19
- steps_details: list[str] = Field(description="The steps that have been completed by nodes.", default=[])
20
-
21
- class ChatOutputState(BaseModel):
22
- steps_details: list[str] = Field(description="The steps that have been completed by nodes.", default=[])
23
- final_response: str = Field(description="The final response to the user.", default="")
24
- requires_processing: bool = Field(description="Whether the response requires detailed audio processing.", default=False)
25
- user_input: str = Field(description="The user's input to pass through.", default="")
26
- input_audio_files: list[str] = Field(description="The input audio files to pass through.", default=[])
27
- output_audio_files: list[str] = Field(description="The output audio files.", default=[])
28
-
29
- # Planner Node States
30
- class PlannerInputState(BaseModel):
31
- user_input: str = Field(description="The user's input.", default="")
32
- input_audio_files: list[str] = Field(description="The input audio files.", default=[])
33
- steps_details: list[str] = Field(description="The steps that have been completed by nodes.", default=[])
34
- validator_feedback: str = Field(description="The feedback from the validator requiring replanning.", default="")
35
-
36
- class PlannerOutputState(BaseModel):
37
- plan: str = Field(description="The plan for the user.", default="")
38
- user_input: str = Field(description="The user's input to pass through.", default="")
39
- input_audio_files: list[str] = Field(description="The input audio files to pass through.", default=[])
40
- steps_details: list[str] = Field(description="The steps that have been completed by nodes.", default=[])
41
- requires_processing: bool = Field(description="Whether the response requires detailed audio processing.", default=True)
42
-
43
- # Processor Node States
44
- class ProcessorInputState(BaseModel):
45
- plan: str = Field(description="The plan to execute.", default="")
46
- user_input: str = Field(description="The user's input.", default="")
47
- input_audio_files: list[str] = Field(description="The input audio files.", default=[])
48
- steps_details: list[str] = Field(description="The steps that have been completed by nodes.", default=[])
49
-
50
- class ProcessorOutputState(BaseModel):
51
- steps_details: list[str] = Field(description="The steps that have been completed during processing.", default=[])
52
- final_response: str = Field(description="The final response to the user.", default="")
53
- output_audio_files: list[str] = Field(description="The output audio files generated.", default=[])
54
- plan: str = Field(description="The plan to pass through.", default="")
55
- user_input: str = Field(description="The user's input to pass through.", default="")
56
- input_audio_files: list[str] = Field(description="The input audio files to pass through.", default=[])
57
-
58
- # Validator Node States
59
- class ValidatorInputState(BaseModel):
60
- steps_details: list[str] = Field(description="The steps that have been completed by the processor.", default=[])
61
- final_response: str = Field(description="The final response to validate.", default="")
62
- output_audio_files: list[str] = Field(description="The output audio files to validate.", default=[])
63
- plan: str = Field(description="The original plan.", default="")
64
- user_input: str = Field(description="The user's input.", default="")
65
- input_audio_files: list[str] = Field(description="The input audio files.", default=[])
66
-
67
- class ValidatorOutputState(BaseModel):
68
- validator_feedback: str = Field(description="The feedback from the validator. Empty if validation passed.", default="")
69
- steps_details: list[str] = Field(description="The validated steps.", default=[])
70
- final_response: str = Field(description="The validated final response.", default="")
71
- output_audio_files: list[str] = Field(description="The validated output audio files.", default=[])
72
- plan: str = Field(description="The plan to pass through.", default="")
73
- user_input: str = Field(description="The user's input to pass through.", default="")
74
- input_audio_files: list[str] = Field(description="The input audio files to pass through.", default=[])
75
-
76
- # Flow Entry and Exit States
77
- class InputState(BaseModel):
78
- user_input: str = Field(description="The user's input.", default="")
79
- input_audio_files: list[str] = Field(description="The input audio files.", default=[])
80
-
81
- class OutputState(BaseModel):
82
- final_response: str = Field(description="The final response to the user.", default="")
83
- output_audio_files: list[str] = Field(description="The output audio files.", default=[])
84
- steps_details: list[str] = Field(description="The steps that have been completed.", default=[])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/nodes/validator.py DELETED
@@ -1,47 +0,0 @@
1
- from langchain_openai import ChatOpenAI
2
- from langchain_core.prompts import ChatPromptTemplate
3
- from nodes.state import AgentState, ValidatorInputState, ValidatorOutputState
4
- from operator import itemgetter
5
- from langchain_core.runnables import RunnableParallel
6
-
7
- def validator_node(state: ValidatorInputState) -> ValidatorOutputState:
8
- llm = ChatOpenAI(model="gpt-4.1")
9
- llm = llm.with_structured_output(ValidatorOutputState)
10
-
11
- prompt = ChatPromptTemplate.from_messages([
12
- ("system", "You are validator that checks the steps taken and output if something is wrong. Give feedback to flow. If everything is correct, leave validator_feedback empty."),
13
- ("user", "Steps taken: {steps}\nFinal response: {response}\nOutput files: {output_files}\nOriginal plan: {plan}")
14
- ])
15
-
16
- chain = (
17
- RunnableParallel({
18
- "steps": itemgetter("steps_details"),
19
- "response": itemgetter("final_response"),
20
- "output_files": itemgetter("output_audio_files"),
21
- "plan": itemgetter("plan")
22
- })
23
- | prompt
24
- | llm
25
- )
26
-
27
- result = chain.invoke(state.model_dump())
28
-
29
- validation_step = "Validator: Checked processing results"
30
- if result.validator_feedback:
31
- validation_step += " - Issues found, feedback provided"
32
- else:
33
- validation_step += " - All checks passed"
34
-
35
- updated_steps = state.steps_details + [validation_step]
36
- result.steps_details = updated_steps
37
- result.plan = state.plan
38
- result.user_input = state.user_input
39
- result.input_audio_files = state.input_audio_files
40
-
41
- return result
42
-
43
- def validator_node_router(state: ValidatorOutputState) -> str:
44
- if state.validator_feedback == "":
45
- return "chat"
46
- else:
47
- return "planner"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/ui.py CHANGED
@@ -82,7 +82,7 @@ def bot_response_sync(history, audio_file_urls):
82
  def create_interface():
83
  with gr.Blocks(
84
  title="Audio Agent - Professional Audio Processing",
85
- theme=gr.themes.Soft(),
86
  ) as interface:
87
  gr.Markdown("""
88
  # 🎵 Audio Agent - Professional Audio Processing
@@ -96,7 +96,7 @@ def create_interface():
96
  with gr.Column(scale=2):
97
  chatbot = gr.Chatbot(
98
  type="messages",
99
- height=400,
100
  show_copy_button=True,
101
  show_share_button=False
102
  )
@@ -113,7 +113,6 @@ def create_interface():
113
  file_types=["audio"],
114
  label="Download Generated Audio",
115
  interactive=False,
116
- visible=True,
117
  height=150
118
  )
119
 
 
82
  def create_interface():
83
  with gr.Blocks(
84
  title="Audio Agent - Professional Audio Processing",
85
+ theme=gr.themes.Default(),
86
  ) as interface:
87
  gr.Markdown("""
88
  # 🎵 Audio Agent - Professional Audio Processing
 
96
  with gr.Column(scale=2):
97
  chatbot = gr.Chatbot(
98
  type="messages",
99
+ height=500,
100
  show_copy_button=True,
101
  show_share_button=False
102
  )
 
113
  file_types=["audio"],
114
  label="Download Generated Audio",
115
  interactive=False,
 
116
  height=150
117
  )
118