Spaces:

Agents-MCP-Hackathon
/

Audio-Agent

Sleeping

Ahmet Emre Şafak

add os

0ce56db 7 months ago

9.52 kB

	import asyncio
	import gradio as gr
	import os

	from agent import AudioAgent

	# Global agent instance
	agent = None

	# Global demo instance
	demo = None


	def get_share_url(path):
	"""Get the share URL for a given path"""

	agent_url = os.environ.get('AGENT_URL')
	if agent_url:
	return f"{agent_url}/gradio_api/file={path}"
	if demo:
	return f"{demo.share_url}/gradio_api/file={path}"
	return path


	def update_agent(model_name, temperature, api_key):
	"""Update the agent with new configuration"""
	global agent
	try:
	agent = AudioAgent(
	model_name=model_name,
	temperature=float(temperature),
	api_key=api_key
	)
	return True, None
	except Exception as e:
	return False, str(e)


	def user_input(user_message, audio_files, history, custom_history, model_name, temperature, api_key):
	"""
	Handle user input with text and audio files
	"""
	# Try to update agent configuration
	success, error = update_agent(model_name, temperature, api_key)
	if not success:
	raise gr.Error(error)

	if not user_message.strip() and not audio_files:
	return "", audio_files, history, custom_history

	# Process audio files into URLs
	audio_file_urls = []

	if audio_files:
	for audio_file in audio_files:
	if hasattr(audio_file, 'name'):
	file_path = audio_file.name
	else:
	file_path = str(audio_file)

	audio_file_urls.append(get_share_url(file_path))

	# Add user message to history with input files
	history.append({
	"role": "user",
	"content": user_message,
	})

	# Update custom history
	custom_history.append({
	"role": "user",
	"content": user_message,
	"input_files": audio_file_urls
	})

	return "", audio_files, history, custom_history


	async def bot_response(history, audio_file_urls, custom_history):
	"""
	Generate bot response using the agent
	"""
	if not agent:
	raise gr.Error("Please configure the agent first")

	if not history or history[-1]["role"] != "user":
	return history, []

	# Get the user message and input files
	user_message = custom_history[-1]["content"]
	input_files = custom_history[-1].get("input_files", [])

	# If message is empty but we have audio files, provide default message
	if not user_message.strip() and audio_file_urls:
	user_message = "Please process these audio files"

	try:
	# Use the agent's run_agent method with history
	result = await agent.run_agent(user_message, input_files, custom_history[:-1])

	# Extract the final response and audio files from the result
	final_response = result["final_response"]
	output_audio_files = result["output_audio_files"]

	# Add assistant response to history with output files
	history.append({
	"role": "assistant",
	"content": final_response,
	})

	# Update custom history
	custom_history.append({
	"role": "assistant",
	"content": final_response,
	"output_files": output_audio_files
	})

	return history, output_audio_files

	except Exception as e:
	history.pop()
	custom_history.pop()
	raise gr.Error(str(e))


	def bot_response_sync(history, audio_file_urls, custom_history):
	"""
	Synchronous wrapper for the async bot response
	"""
	loop = asyncio.new_event_loop()
	asyncio.set_event_loop(loop)
	try:
	return loop.run_until_complete(bot_response(history, audio_file_urls, custom_history))
	finally:
	loop.close()


	def create_interface():
	with gr.Blocks(
	title="Audio Agent - Professional Audio Processing",
	theme=gr.themes.Default(),
	) as interface:
	gr.Markdown("""
	# Audio Agent - Your AI Audio Assistant
	Upload your audio files and tell me what you need. I'll handle the rest!
	""")

	# Hidden state to store audio file URLs and custom history
	audio_urls_state = gr.State([])
	custom_history_state = gr.State([])

	with gr.Row():
	with gr.Column(scale=4):
	chatbot = gr.Chatbot(
	type="messages",
	height=500,
	show_copy_button=True,
	show_share_button=False
	)

	msg = gr.Textbox(
	label="Describe what you want to do?",
	placeholder="e.g., 'Remove filler words and improve audio quality''",
	lines=3,
	submit_btn=True
	)

	with gr.Column(scale=1):
	# Model Configuration
	with gr.Group():
	model_name = gr.Dropdown(
	choices=["gpt-4.1", "gpt-4.1-mini", "gpt-4o", "o3"],
	value="gpt-4.1",
	label="Model",
	info="Select the model to use"
	)
	temperature = gr.Slider(
	minimum=0.0,
	maximum=1.0,
	value=0.3,
	step=0.1,
	label="Temperature",
	info="Higher values make output more random"
	)
	api_key = gr.Textbox(
	label="OpenAI API Key",
	placeholder="sk-...",
	type="password",
	info="Your OpenAI API key"
	)

	# Set temperature to 1.0 when o3 model is selected
	def update_temperature(model):
	if model == "o3":
	return gr.update(value=1.0, interactive=False)
	return gr.update(interactive=True)

	model_name.change(
	update_temperature,
	inputs=[model_name],
	outputs=[temperature]
	)

	with gr.Group():
	audio_files = gr.File(
	file_count="multiple",
	file_types=["audio"],
	label="Upload Audio Files to Process",
	height=150
	)
	output_audio_files = gr.File(
	file_count="multiple",
	file_types=["audio"],
	label="Download Generated Audio",
	height=150,
	interactive=False,
	visible=False # Start hidden
	)

	# Handle user input and bot response
	def handle_submit(message, files, history, custom_history, model, temp, key):
	new_msg, new_files, updated_history, updated_custom_history = user_input(
	message, files, history, custom_history, model, temp, key
	)
	return new_msg, new_files, updated_history, updated_custom_history

	def handle_bot_response(history, audio_urls, custom_history):
	updated_history, output_files = bot_response_sync(history, audio_urls, custom_history)
	output_visible = bool(output_files) # True if there are files, else False
	return updated_history, gr.update(value=output_files, visible=output_visible), custom_history

	msg.submit(
	handle_submit,
	[msg, audio_files, chatbot, custom_history_state, model_name, temperature, api_key],
	[msg, audio_files, chatbot, custom_history_state],
	queue=False
	).then(
	handle_bot_response,
	[chatbot, audio_urls_state, custom_history_state],
	[chatbot, output_audio_files, custom_history_state]
	)

	gr.Markdown("""
	---
	""")

	with gr.Row():
	gr.Markdown("""

	## 🎚️ What I Can Do For You

	Audio Manipulation:
	- Merge multiple audio files into one continuous track
	- Cut or trim specific sections from any file
	- Adjust volume levels (increase or decrease)
	- Normalize audio levels for consistency
	- Apply fade-in or fade-out effects for smooth transitions (Mono channel only)
	- Change playback speed (faster or slower, with pitch change)
	- Reverse audio for creative effects
	- Remove silence from beginning or end of files

	Analysis & Transcription: (English only)
	- Transcribe speech in audio to text
	- Analyze audio properties (duration, sample rate, etc.)
	""")
	gr.Markdown("""
	## 💡 Example Requests

	- "Merge these two audio files and add a fade-in effect"
	- "Remove the silence at the beginning of this recording"
	- "Transcribe the speech in this audio file"
	- "Increase the volume of the first track and normalize both files"
	- "Cut out the middle section from 1:30 to 2:45"
	- "Make this audio play 1.5x faster"
	- "Apply a fade-out effect to the end of this track"
	""")
	return interface


	if __name__ == "__main__":
	demo = create_interface()
	demo.launch()