develops20 commited on
Commit
f3069a1
Β·
verified Β·
1 Parent(s): 8bacb6d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -11
app.py CHANGED
@@ -10,6 +10,10 @@ import base64
10
  from typing import Optional, Dict, Any
11
  import asyncio
12
  import aiohttp
 
 
 
 
13
 
14
  # Configuration
15
  ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
@@ -23,15 +27,27 @@ ELEVENLABS_API_URL = "https://api.elevenlabs.io/v1"
23
  class VoiceAgent:
24
  def __init__(self):
25
  self.recognizer = sr.Recognizer()
26
- self.microphone = sr.Microphone()
27
 
28
  async def speech_to_text(self, audio_file) -> str:
29
  """Convert speech to text using speech_recognition"""
30
  try:
31
- with sr.AudioFile(audio_file) as source:
32
- audio = self.recognizer.record(source)
33
- text = self.recognizer.recognize_google(audio)
34
- return text
 
 
 
 
 
 
 
 
 
 
 
 
35
  except Exception as e:
36
  return f"Error in speech recognition: {str(e)}"
37
 
@@ -227,21 +243,24 @@ with gr.Blocks(title="Voice Agent - Gradio MCP Hackathon", theme=gr.themes.Soft(
227
  """)
228
 
229
  with gr.Tab("🎀 Voice Mode"):
 
230
  with gr.Row():
231
  with gr.Column():
232
  audio_input = gr.Audio(
233
  sources=["microphone"],
234
  type="filepath",
235
- label="Record your voice"
 
236
  )
237
- voice_button = gr.Button("Process Voice Input", variant="primary")
238
 
239
  with gr.Column():
240
- audio_output = gr.Audio(label="AI Response (Voice)")
241
  text_output = gr.Textbox(
242
- label="Conversation Log",
243
- lines=6,
244
- interactive=False
 
245
  )
246
 
247
  voice_button.click(
 
10
  from typing import Optional, Dict, Any
11
  import asyncio
12
  import aiohttp
13
+ from dotenv import load_dotenv
14
+
15
+ # Load environment variables from .env file
16
+ load_dotenv()
17
 
18
  # Configuration
19
  ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
 
27
  class VoiceAgent:
28
  def __init__(self):
29
  self.recognizer = sr.Recognizer()
30
+ # Remove microphone initialization - we'll use Gradio's audio input
31
 
32
  async def speech_to_text(self, audio_file) -> str:
33
  """Convert speech to text using speech_recognition"""
34
  try:
35
+ # Handle different audio file types
36
+ if audio_file.endswith('.webm') or audio_file.endswith('.wav'):
37
+ with sr.AudioFile(audio_file) as source:
38
+ audio = self.recognizer.record(source)
39
+ text = self.recognizer.recognize_google(audio)
40
+ return text
41
+ else:
42
+ # For other formats, try direct processing
43
+ with sr.AudioFile(audio_file) as source:
44
+ audio = self.recognizer.record(source)
45
+ text = self.recognizer.recognize_google(audio)
46
+ return text
47
+ except sr.UnknownValueError:
48
+ return "Sorry, I couldn't understand the audio. Please try speaking more clearly."
49
+ except sr.RequestError as e:
50
+ return f"Could not request results from speech recognition service; {e}"
51
  except Exception as e:
52
  return f"Error in speech recognition: {str(e)}"
53
 
 
243
  """)
244
 
245
  with gr.Tab("🎀 Voice Mode"):
246
+ gr.Markdown("**Record your voice using the microphone button below**")
247
  with gr.Row():
248
  with gr.Column():
249
  audio_input = gr.Audio(
250
  sources=["microphone"],
251
  type="filepath",
252
+ label="πŸŽ™οΈ Click to record your voice",
253
+ format="wav"
254
  )
255
+ voice_button = gr.Button("πŸš€ Process Voice Input", variant="primary", size="lg")
256
 
257
  with gr.Column():
258
+ audio_output = gr.Audio(label="πŸ”Š AI Voice Response")
259
  text_output = gr.Textbox(
260
+ label="πŸ“‹ Conversation Log",
261
+ lines=8,
262
+ interactive=False,
263
+ placeholder="Your conversation will appear here..."
264
  )
265
 
266
  voice_button.click(