geethareddy commited on
Commit
34314d1
·
verified ·
1 Parent(s): 2160b48

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -64
app.py CHANGED
@@ -1,73 +1,68 @@
 
1
  import gradio as gr
2
- import pyttsx3
3
- import speech_recognition as sr
4
- from gtts import gTTS
5
  import os
6
 
7
- # Initialize Text-to-Speech engine (pyttsx3 for offline or gTTS for better quality)
8
- engine = pyttsx3.init()
 
 
9
 
10
- # Initialize Speech Recognition engine
11
- recognizer = sr.Recognizer()
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- # Define Menu items for the restaurant
14
- menu = {
15
- "pizza": "Delicious pepperoni pizza with cheese and sauce.",
16
- "burger": "Beef burger with lettuce, tomato, and cheese.",
17
- "pasta": "Creamy pasta with garlic and parmesan.",
18
- "salad": "A healthy vegetable salad with olive oil dressing."
19
- }
20
 
21
- # Function to convert text to speech using gTTS
22
- def speak_response(text):
23
- tts = gTTS(text=text, lang='en')
24
- tts.save("response.mp3")
25
- os.system("start response.mp3") # Windows; on Linux, use 'mpg321 response.mp3'
26
-
27
- # Function to listen to user speech and convert it to text
28
- def listen_input():
29
- with sr.Microphone() as source:
30
- print("Listening for order...")
31
- audio = recognizer.listen(source)
32
- try:
33
- command = recognizer.recognize_google(audio)
34
- print(f"You said: {command}")
35
- return command
36
- except sr.UnknownValueError:
37
- return "Sorry, I didn't understand that. Could you please repeat?"
38
- except sr.RequestError:
39
- return "Sorry, there was an issue with the speech service."
40
-
41
- # Function to handle menu interaction and confirm orders
42
- def handle_order(item):
43
- item = item.lower()
44
- response = menu.get(item, "Sorry, we don't have that item on the menu.")
45
- speak_response(response)
46
- return response
47
-
48
- # Gradio interface setup
49
- def chatbot_interface():
50
- with gr.Blocks() as demo:
51
- # Display menu logo image (optional)
52
- gr.Image("assets/menu_logo.png", label="Click to speak", elem_id="voice_logo", interactive=True)
53
-
54
- # Textbox for displaying responses
55
- output_text = gr.Textbox(label="Assistant Response", interactive=False)
56
-
57
- # Microphone button to listen to user input
58
- audio_input = gr.Audio(source="microphone", type="numpy", label="Speak your order")
59
-
60
- # Handle audio input and provide the appropriate response
61
- def on_audio_input(audio):
62
- user_input = listen_input() # Convert speech to text
63
- response = handle_order(user_input) # Handle order
64
- output_text.update(value=response) # Display assistant's response
65
- return response
66
-
67
- # Attach microphone action to trigger audio input
68
- audio_input.change(on_audio_input, inputs=[audio_input], outputs=[output_text])
69
 
70
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
 
72
  if __name__ == "__main__":
73
- chatbot_interface()
 
 
1
+ import spaces
2
  import gradio as gr
3
+ import edge_tts
4
+ import asyncio
5
+ import tempfile
6
  import os
7
 
8
+ # Get all available voices
9
+ async def get_voices():
10
+ voices = await edge_tts.list_voices()
11
+ return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
12
 
13
+ # Text-to-speech function
14
+ async def text_to_speech(text, voice, rate, pitch):
15
+ if not text.strip():
16
+ return None, gr.Warning("Please enter text to convert.")
17
+ if not voice:
18
+ return None, gr.Warning("Please select a voice.")
19
+
20
+ voice_short_name = voice.split(" - ")[0]
21
+ rate_str = f"{rate:+d}%"
22
+ pitch_str = f"{pitch:+d}Hz"
23
+ communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
24
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
25
+ tmp_path = tmp_file.name
26
+ await communicate.save(tmp_path)
27
+ return tmp_path, None
28
 
29
+ # Gradio interface function
30
+ @spaces.GPU
31
+ def tts_interface(text, voice, rate, pitch):
32
+ audio, warning = asyncio.run(text_to_speech(text, voice, rate, pitch))
33
+ return audio, warning
 
 
34
 
35
+ # Create Gradio application
36
+ import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
+ async def create_demo():
39
+ voices = await get_voices()
40
+
41
+ description = """
42
+ Experience the power of Voicecloning.be for text-to-speech conversion.
43
+ """
44
+
45
+ demo = gr.Interface(
46
+ fn=tts_interface,
47
+ inputs=[
48
+ gr.Textbox(label="Input Text", lines=5),
49
+ gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value=""),
50
+ gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1),
51
+ gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
52
+ ],
53
+ outputs=[
54
+ gr.Audio(label="Generated Audio", type="filepath"),
55
+ gr.Markdown(label="Warning", visible=False)
56
+ ],
57
+ title="Voicecloning.be Text-to-Speech",
58
+ description=description,
59
+ article="Experience the power of Voicecloning.be for text-to-speech conversion.",
60
+ analytics_enabled=False,
61
+ allow_flagging=False
62
+ )
63
+ return demo
64
 
65
+ # Run the application
66
  if __name__ == "__main__":
67
+ demo = asyncio.run(create_demo())
68
+ demo.launch()