Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -10,6 +10,10 @@ import base64
|
|
| 10 |
from typing import Optional, Dict, Any
|
| 11 |
import asyncio
|
| 12 |
import aiohttp
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
# Configuration
|
| 15 |
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
|
|
@@ -23,15 +27,27 @@ ELEVENLABS_API_URL = "https://api.elevenlabs.io/v1"
|
|
| 23 |
class VoiceAgent:
|
| 24 |
def __init__(self):
|
| 25 |
self.recognizer = sr.Recognizer()
|
| 26 |
-
|
| 27 |
|
| 28 |
async def speech_to_text(self, audio_file) -> str:
|
| 29 |
"""Convert speech to text using speech_recognition"""
|
| 30 |
try:
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
except Exception as e:
|
| 36 |
return f"Error in speech recognition: {str(e)}"
|
| 37 |
|
|
@@ -227,21 +243,24 @@ with gr.Blocks(title="Voice Agent - Gradio MCP Hackathon", theme=gr.themes.Soft(
|
|
| 227 |
""")
|
| 228 |
|
| 229 |
with gr.Tab("π€ Voice Mode"):
|
|
|
|
| 230 |
with gr.Row():
|
| 231 |
with gr.Column():
|
| 232 |
audio_input = gr.Audio(
|
| 233 |
sources=["microphone"],
|
| 234 |
type="filepath",
|
| 235 |
-
label="
|
|
|
|
| 236 |
)
|
| 237 |
-
voice_button = gr.Button("Process Voice Input", variant="primary")
|
| 238 |
|
| 239 |
with gr.Column():
|
| 240 |
-
audio_output = gr.Audio(label="AI
|
| 241 |
text_output = gr.Textbox(
|
| 242 |
-
label="Conversation Log",
|
| 243 |
-
lines=
|
| 244 |
-
interactive=False
|
|
|
|
| 245 |
)
|
| 246 |
|
| 247 |
voice_button.click(
|
|
|
|
| 10 |
from typing import Optional, Dict, Any
|
| 11 |
import asyncio
|
| 12 |
import aiohttp
|
| 13 |
+
from dotenv import load_dotenv
|
| 14 |
+
|
| 15 |
+
# Load environment variables from .env file
|
| 16 |
+
load_dotenv()
|
| 17 |
|
| 18 |
# Configuration
|
| 19 |
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
|
|
|
|
| 27 |
class VoiceAgent:
|
| 28 |
def __init__(self):
|
| 29 |
self.recognizer = sr.Recognizer()
|
| 30 |
+
# Remove microphone initialization - we'll use Gradio's audio input
|
| 31 |
|
| 32 |
async def speech_to_text(self, audio_file) -> str:
|
| 33 |
"""Convert speech to text using speech_recognition"""
|
| 34 |
try:
|
| 35 |
+
# Handle different audio file types
|
| 36 |
+
if audio_file.endswith('.webm') or audio_file.endswith('.wav'):
|
| 37 |
+
with sr.AudioFile(audio_file) as source:
|
| 38 |
+
audio = self.recognizer.record(source)
|
| 39 |
+
text = self.recognizer.recognize_google(audio)
|
| 40 |
+
return text
|
| 41 |
+
else:
|
| 42 |
+
# For other formats, try direct processing
|
| 43 |
+
with sr.AudioFile(audio_file) as source:
|
| 44 |
+
audio = self.recognizer.record(source)
|
| 45 |
+
text = self.recognizer.recognize_google(audio)
|
| 46 |
+
return text
|
| 47 |
+
except sr.UnknownValueError:
|
| 48 |
+
return "Sorry, I couldn't understand the audio. Please try speaking more clearly."
|
| 49 |
+
except sr.RequestError as e:
|
| 50 |
+
return f"Could not request results from speech recognition service; {e}"
|
| 51 |
except Exception as e:
|
| 52 |
return f"Error in speech recognition: {str(e)}"
|
| 53 |
|
|
|
|
| 243 |
""")
|
| 244 |
|
| 245 |
with gr.Tab("π€ Voice Mode"):
|
| 246 |
+
gr.Markdown("**Record your voice using the microphone button below**")
|
| 247 |
with gr.Row():
|
| 248 |
with gr.Column():
|
| 249 |
audio_input = gr.Audio(
|
| 250 |
sources=["microphone"],
|
| 251 |
type="filepath",
|
| 252 |
+
label="ποΈ Click to record your voice",
|
| 253 |
+
format="wav"
|
| 254 |
)
|
| 255 |
+
voice_button = gr.Button("π Process Voice Input", variant="primary", size="lg")
|
| 256 |
|
| 257 |
with gr.Column():
|
| 258 |
+
audio_output = gr.Audio(label="π AI Voice Response")
|
| 259 |
text_output = gr.Textbox(
|
| 260 |
+
label="π Conversation Log",
|
| 261 |
+
lines=8,
|
| 262 |
+
interactive=False,
|
| 263 |
+
placeholder="Your conversation will appear here..."
|
| 264 |
)
|
| 265 |
|
| 266 |
voice_button.click(
|