Spaces:

develops20
/

VoiceSupportAgent

Sleeping

App Files Files Community

develops20 commited on Jun 9, 2025

Commit

f3069a1

verified ·

1 Parent(s): 8bacb6d

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -11

app.py CHANGED Viewed

@@ -10,6 +10,10 @@ import base64
 from typing import Optional, Dict, Any
 import asyncio
 import aiohttp
 # Configuration
 ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
@@ -23,15 +27,27 @@ ELEVENLABS_API_URL = "https://api.elevenlabs.io/v1"
 class VoiceAgent:
     def __init__(self):
         self.recognizer = sr.Recognizer()
-        self.microphone = sr.Microphone()
     async def speech_to_text(self, audio_file) -> str:
         """Convert speech to text using speech_recognition"""
         try:
-            with sr.AudioFile(audio_file) as source:
-                audio = self.recognizer.record(source)
-            text = self.recognizer.recognize_google(audio)
-            return text
         except Exception as e:
             return f"Error in speech recognition: {str(e)}"
@@ -227,21 +243,24 @@ with gr.Blocks(title="Voice Agent - Gradio MCP Hackathon", theme=gr.themes.Soft(
     """)
     with gr.Tab("🎤 Voice Mode"):
         with gr.Row():
             with gr.Column():
                 audio_input = gr.Audio(
                     sources=["microphone"],
                     type="filepath",
-                    label="Record your voice"
                 )
-                voice_button = gr.Button("Process Voice Input", variant="primary")
             with gr.Column():
-                audio_output = gr.Audio(label="AI Response (Voice)")
                 text_output = gr.Textbox(
-                    label="Conversation Log",
-                    lines=6,
-                    interactive=False
                 )
         voice_button.click(

 from typing import Optional, Dict, Any
 import asyncio
 import aiohttp
+from dotenv import load_dotenv
+# Load environment variables from .env file
+load_dotenv()
 # Configuration
 ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
 class VoiceAgent:
     def __init__(self):
         self.recognizer = sr.Recognizer()
+        # Remove microphone initialization - we'll use Gradio's audio input
     async def speech_to_text(self, audio_file) -> str:
         """Convert speech to text using speech_recognition"""
         try:
+            # Handle different audio file types
+            if audio_file.endswith('.webm') or audio_file.endswith('.wav'):
+                with sr.AudioFile(audio_file) as source:
+                    audio = self.recognizer.record(source)
+                text = self.recognizer.recognize_google(audio)
+                return text
+            else:
+                # For other formats, try direct processing
+                with sr.AudioFile(audio_file) as source:
+                    audio = self.recognizer.record(source)
+                text = self.recognizer.recognize_google(audio)
+                return text
+        except sr.UnknownValueError:
+            return "Sorry, I couldn't understand the audio. Please try speaking more clearly."
+        except sr.RequestError as e:
+            return f"Could not request results from speech recognition service; {e}"
         except Exception as e:
             return f"Error in speech recognition: {str(e)}"
     """)
     with gr.Tab("🎤 Voice Mode"):
+        gr.Markdown("**Record your voice using the microphone button below**")
         with gr.Row():
             with gr.Column():
                 audio_input = gr.Audio(
                     sources=["microphone"],
                     type="filepath",
+                    label="🎙️ Click to record your voice",
+                    format="wav"
                 )
+                voice_button = gr.Button("🚀 Process Voice Input", variant="primary", size="lg")
             with gr.Column():
+                audio_output = gr.Audio(label="🔊 AI Voice Response")
                 text_output = gr.Textbox(
+                    label="📋 Conversation Log",
+                    lines=8,
+                    interactive=False,
+                    placeholder="Your conversation will appear here..."
                 )
         voice_button.click(