dschandra commited on
Commit
886085a
·
verified ·
1 Parent(s): 9160aca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -23
app.py CHANGED
@@ -1,48 +1,46 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
- import speech_recognition as sr
4
- from gtts import gTTS
5
  import os
 
 
6
 
7
- # Set up Hugging Face conversational model
8
  conversational_pipeline = pipeline("conversational", model="microsoft/DialoGPT-medium")
9
 
 
 
 
 
10
  def process_audio(audio_file):
11
- # Convert the audio file to text using SpeechRecognition
12
- recognizer = sr.Recognizer()
13
- with sr.AudioFile(audio_file.name) as source:
14
- audio = recognizer.record(source)
15
- try:
16
- print("Recognizing...")
17
- text = recognizer.recognize_google(audio)
18
- print(f"You said: {text}")
19
- except sr.UnknownValueError:
20
- text = "Sorry, I couldn't understand that."
21
- except sr.RequestError:
22
- text = "Could not request results."
23
-
24
- # Get the bot's response using Hugging Face's model
25
  response = conversational_pipeline(text)
26
  bot_response = response[0]['generated_text']
27
- print(f"Bot: {bot_response}")
28
 
29
  # Convert the bot's response to speech using gTTS
30
  tts = gTTS(bot_response)
31
  tts.save("response.mp3")
32
 
33
- # Play the audio
34
- os.system("mpg321 response.mp3")
35
 
36
- return bot_response, "response.mp3" # Return the bot's text response and the audio file
37
 
38
  # Create Gradio interface
39
  iface = gr.Interface(
40
- fn=process_audio,
41
  inputs=gr.inputs.Audio(source="microphone", type="file"),
42
  outputs=[gr.outputs.Textbox(), gr.outputs.Audio(type="file")],
43
  live=True,
44
  title="Voice Bot",
45
- description="Speak to the bot, and it will respond to you!"
46
  )
47
 
48
  # Launch the interface
 
1
  import gradio as gr
2
  from transformers import pipeline
3
+ from vosk import Model, KaldiRecognizer
 
4
  import os
5
+ import wave
6
+ from gtts import gTTS
7
 
8
+ # Load the Hugging Face conversational pipeline
9
  conversational_pipeline = pipeline("conversational", model="microsoft/DialoGPT-medium")
10
 
11
+ # Initialize the Vosk ASR model
12
+ model = Model("model") # Download the Vosk model beforehand
13
+ recognizer = KaldiRecognizer(model, 16000)
14
+
15
  def process_audio(audio_file):
16
+ # Convert the audio file to text using Vosk
17
+ audio = audio_file.getarray() # Get audio data as array
18
+ if recognizer.AcceptWaveform(audio):
19
+ text = recognizer.Result() # Convert to text
20
+ else:
21
+ text = "Sorry, I couldn't understand that."
22
+
23
+ # Use Hugging Face's model to get a response
 
 
 
 
 
 
24
  response = conversational_pipeline(text)
25
  bot_response = response[0]['generated_text']
 
26
 
27
  # Convert the bot's response to speech using gTTS
28
  tts = gTTS(bot_response)
29
  tts.save("response.mp3")
30
 
31
+ # Play the audio file
32
+ os.system("mpg321 response.mp3") # Make sure mpg321 is installed in the Hugging Face space
33
 
34
+ return bot_response, "response.mp3"
35
 
36
  # Create Gradio interface
37
  iface = gr.Interface(
38
+ fn=process_audio,
39
  inputs=gr.inputs.Audio(source="microphone", type="file"),
40
  outputs=[gr.outputs.Textbox(), gr.outputs.Audio(type="file")],
41
  live=True,
42
  title="Voice Bot",
43
+ description="Talk to the bot, and it will respond!"
44
  )
45
 
46
  # Launch the interface