Spaces:

Agents-MCP-Hackathon
/

Audio-Agent

Sleeping

App Files Files Community

YigitSekerci commited on Jun 9, 2025

Commit

5a5e484

1 Parent(s): 23c0e5d

add llm controls

Browse files

Files changed (2) hide show

src/agent.py +16 -2
src/ui.py +79 -36

src/agent.py CHANGED Viewed

@@ -2,6 +2,7 @@ from langgraph.prebuilt import create_react_agent
 from pydantic import BaseModel, Field
 from dotenv import load_dotenv
 from langchain_mcp_adapters.client import MultiServerMCPClient
 import os
 class AgentOutput(BaseModel):
@@ -73,10 +74,14 @@ Output Audio Files: {output_audio_files}
 class AudioAgent:
     def __init__(
         self,
-        model_name: str = "gpt-4.1-mini",
     ):
         load_dotenv()
         self.model_name = model_name
         self.server_url = os.getenv("MCP_SERVER")
         self.graph = None
@@ -87,10 +92,19 @@ class AudioAgent:
         self.agent = None
     async def build_agent(self):
         tools = await self._client.get_tools()
-        agent = create_react_agent(
             model=self.model_name,
             tools=tools,
             prompt=system_prompt,
             response_format=AgentOutput,

 from pydantic import BaseModel, Field
 from dotenv import load_dotenv
 from langchain_mcp_adapters.client import MultiServerMCPClient
+from langchain_openai import ChatOpenAI
 import os
 class AgentOutput(BaseModel):
 class AudioAgent:
     def __init__(
         self,
+        model_name: str = "gpt-4.1",
+        temperature: float = 0.3,
+        api_key: str = None,
     ):
         load_dotenv()
         self.model_name = model_name
+        self.temperature = temperature
+        self.api_key = api_key # or os.getenv("OPENAI_API_KEY")
         self.server_url = os.getenv("MCP_SERVER")
         self.graph = None
         self.agent = None
     async def build_agent(self):
+        if not self.api_key:
+            raise ValueError("OpenAI API key is required")
         tools = await self._client.get_tools()
+        llm = ChatOpenAI(
             model=self.model_name,
+            temperature=self.temperature,
+            api_key=self.api_key
+        )
+        agent = create_react_agent(
+            model=llm,
             tools=tools,
             prompt=system_prompt,
             response_format=AgentOutput,

src/ui.py CHANGED Viewed

@@ -3,7 +3,7 @@ import gradio as gr
 from .agent import AudioAgent
 # Global agent instance
-agent = AudioAgent()
 # Global demo instance
 demo = None
@@ -14,10 +14,28 @@ def get_share_url(path):
         return path
     return f"{demo.share_url}/gradio_api/file={path}"
-def user_input(user_message, audio_files, history, custom_history):
     """
     Handle user input with text and audio files
     """
     if not user_message.strip() and not audio_files:
         return "", audio_files, history, custom_history
@@ -46,12 +64,15 @@ def user_input(user_message, audio_files, history, custom_history):
         "input_files": audio_file_urls
     })
-    return "", audio_files, history, audio_file_urls, custom_history
 async def bot_response(history, audio_file_urls, custom_history):
     """
     Generate bot response using the agent
     """
     if not history or history[-1]["role"] != "user":
         return history, []
@@ -65,7 +86,7 @@ async def bot_response(history, audio_file_urls, custom_history):
     try:
         # Use the agent's run_agent method with history
-        result = await agent.run_agent(user_message, input_files, custom_history)
         # Extract the final response and audio files from the result
         final_response = result.final_response
@@ -87,16 +108,7 @@ async def bot_response(history, audio_file_urls, custom_history):
         return history, output_audio_files
     except Exception as e:
-        history.append({
-            "role": "assistant",
-            "content": f"❌ **Error**: {e}",
-        })
-        custom_history.append({
-            "role": "assistant",
-            "content": f"❌ **Error**: {e}",
-            "output_files": []
-        })
-        return history, []
 def bot_response_sync(history, audio_file_urls, custom_history):
     """
@@ -122,7 +134,7 @@ def create_interface():
         # Hidden state to store audio file URLs and custom history
         audio_urls_state = gr.State([])
         custom_history_state = gr.State([])
         with gr.Row():
             with gr.Column(scale=2):
                 chatbot = gr.Chatbot(
@@ -133,42 +145,73 @@ def create_interface():
                 )
             with gr.Column(scale=1):
-                audio_files = gr.File(
-                    file_count="multiple",
-                    file_types=["audio"],
-                    label="Upload Audio Files to Process",
-                    height=150
-                )
-                output_audio_files = gr.File(
-                    file_count="multiple",
-                    file_types=["audio"],
-                    label="Download Generated Audio",
-                    interactive=False,
-                    height=150
-                )
         with gr.Row(equal_height=True):
             msg = gr.Textbox(
                 label="Describe what you want to do?",
                 placeholder="e.g., 'Remove filler words and improve audio quality''",
                 lines=3,
-                scale=4
             )
             send_btn = gr.Button("Ask", variant="primary", scale=1, size="lg")
         # Handle user input and bot response
-        def handle_submit(message, files, history, custom_history):
-            new_msg, new_files, updated_history, audio_urls, updated_custom_history = user_input(message, files, history, custom_history)
-            return new_msg, new_files, updated_history, audio_urls, updated_custom_history
         def handle_bot_response(history, audio_urls, custom_history):
             updated_history, output_files = bot_response_sync(history, audio_urls, custom_history)
-            return updated_history, output_files, custom_history
         msg.submit(
             handle_submit,
             [msg, audio_files, chatbot, custom_history_state],
-            [msg, audio_files, chatbot, audio_urls_state, custom_history_state],
             queue=False
         ).then(
             handle_bot_response,
@@ -178,8 +221,8 @@ def create_interface():
         send_btn.click(
             handle_submit,
             [msg, audio_files, chatbot, custom_history_state],
-            [msg, audio_files, chatbot, audio_urls_state, custom_history_state],
             queue=False
         ).then(
             handle_bot_response,

 from .agent import AudioAgent
 # Global agent instance
+agent = None
 # Global demo instance
 demo = None
         return path
     return f"{demo.share_url}/gradio_api/file={path}"
+def update_agent(model_name, temperature, api_key):
+    """Update the agent with new configuration"""
+    global agent
+    try:
+        agent = AudioAgent(
+            model_name=model_name,
+            temperature=float(temperature),
+            api_key=api_key
+        )
+        return True, None
+    except Exception as e:
+        return False, str(e)
+def user_input(user_message, audio_files, history, custom_history, model_name, temperature, api_key):
     """
     Handle user input with text and audio files
     """
+    # Try to update agent configuration
+    success, error = update_agent(model_name, temperature, api_key)
+    if not success:
+        raise gr.Error(error)
     if not user_message.strip() and not audio_files:
         return "", audio_files, history, custom_history
         "input_files": audio_file_urls
     })
+    return "", audio_files, history, custom_history
 async def bot_response(history, audio_file_urls, custom_history):
     """
     Generate bot response using the agent
     """
+    if not agent:
+        raise gr.Error("Please configure the agent first")
     if not history or history[-1]["role"] != "user":
         return history, []
     try:
         # Use the agent's run_agent method with history
+        result = await agent.run_agent(user_message, input_files, custom_history[:-1])
         # Extract the final response and audio files from the result
         final_response = result.final_response
         return history, output_audio_files
     except Exception as e:
+        raise gr.Error(str(e))
 def bot_response_sync(history, audio_file_urls, custom_history):
     """
         # Hidden state to store audio file URLs and custom history
         audio_urls_state = gr.State([])
         custom_history_state = gr.State([])
         with gr.Row():
             with gr.Column(scale=2):
                 chatbot = gr.Chatbot(
                 )
             with gr.Column(scale=1):
+                # Model Configuration
+                with gr.Group():
+                    model_name = gr.Dropdown(
+                        choices=["gpt-4.1", "gpt-4.1-mini", "gpt-4o", "o3"],
+                        value="gpt-4.1",
+                        label="Model",
+                        info="Select the model to use"
+                    )
+                    temperature = gr.Slider(
+                        minimum=0.0,
+                        maximum=1.0,
+                        value=0.3,
+                        step=0.1,
+                        label="Temperature",
+                        info="Higher values make output more random"
+                    )
+                    api_key = gr.Textbox(
+                        label="OpenAI API Key",
+                        placeholder="sk-...",
+                        type="password",
+                        info="Your OpenAI API key"
+                    )
+                with gr.Group():
+                    audio_files = gr.File(
+                        file_count="multiple",
+                        file_types=["audio"],
+                        label="Upload Audio Files to Process",
+                        height=150
+                    )
+                    output_audio_files = gr.File(
+                        file_count="multiple",
+                        file_types=["audio"],
+                        label="Download Generated Audio",
+                        height=150,
+                        interactive=False,
+                        visible=False  # Start hidden
+                    )
         with gr.Row(equal_height=True):
             msg = gr.Textbox(
                 label="Describe what you want to do?",
                 placeholder="e.g., 'Remove filler words and improve audio quality''",
                 lines=3,
+                scale=6
             )
             send_btn = gr.Button("Ask", variant="primary", scale=1, size="lg")
+        # Error message component
+        error_msg = gr.Textbox(label="Error", visible=False)
         # Handle user input and bot response
+        def handle_submit(message, files, history, custom_history, model, temp, key):
+            new_msg, new_files, updated_history, updated_custom_history = user_input(
+                message, files, history, custom_history, model, temp, key
+            )
+            return new_msg, new_files, updated_history, updated_custom_history
         def handle_bot_response(history, audio_urls, custom_history):
             updated_history, output_files = bot_response_sync(history, audio_urls, custom_history)
+            output_visible = bool(output_files)  # True if there are files, else False
+            return updated_history, gr.update(value=output_files, visible=output_visible), custom_history
         msg.submit(
             handle_submit,
+            [msg, audio_files, chatbot, custom_history_state, model_name, temperature, api_key],
             [msg, audio_files, chatbot, custom_history_state],
             queue=False
         ).then(
             handle_bot_response,
         send_btn.click(
             handle_submit,
+            [msg, audio_files, chatbot, custom_history_state, model_name, temperature, api_key],
             [msg, audio_files, chatbot, custom_history_state],
             queue=False
         ).then(
             handle_bot_response,