Spaces:

dschandra
/

AIVoice

Sleeping

App Files Files Community

dschandra commited on Dec 28, 2024

Commit

886085a

verified ·

1 Parent(s): 9160aca

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -23

app.py CHANGED Viewed

@@ -1,48 +1,46 @@
 import gradio as gr
 from transformers import pipeline
-import speech_recognition as sr
-from gtts import gTTS
 import os
-# Set up Hugging Face conversational model
 conversational_pipeline = pipeline("conversational", model="microsoft/DialoGPT-medium")
 def process_audio(audio_file):
-    # Convert the audio file to text using SpeechRecognition
-    recognizer = sr.Recognizer()
-    with sr.AudioFile(audio_file.name) as source:
-        audio = recognizer.record(source)
-        try:
-            print("Recognizing...")
-            text = recognizer.recognize_google(audio)
-            print(f"You said: {text}")
-        except sr.UnknownValueError:
-            text = "Sorry, I couldn't understand that."
-        except sr.RequestError:
-            text = "Could not request results."
-    # Get the bot's response using Hugging Face's model
     response = conversational_pipeline(text)
     bot_response = response[0]['generated_text']
-    print(f"Bot: {bot_response}")
     # Convert the bot's response to speech using gTTS
     tts = gTTS(bot_response)
     tts.save("response.mp3")
-    # Play the audio
-    os.system("mpg321 response.mp3")
-    return bot_response, "response.mp3"  # Return the bot's text response and the audio file
 # Create Gradio interface
 iface = gr.Interface(
-    fn=process_audio,
     inputs=gr.inputs.Audio(source="microphone", type="file"),
     outputs=[gr.outputs.Textbox(), gr.outputs.Audio(type="file")],
     live=True,
     title="Voice Bot",
-    description="Speak to the bot, and it will respond to you!"
 )
 # Launch the interface

 import gradio as gr
 from transformers import pipeline
+from vosk import Model, KaldiRecognizer
 import os
+import wave
+from gtts import gTTS
+# Load the Hugging Face conversational pipeline
 conversational_pipeline = pipeline("conversational", model="microsoft/DialoGPT-medium")
+# Initialize the Vosk ASR model
+model = Model("model")  # Download the Vosk model beforehand
+recognizer = KaldiRecognizer(model, 16000)
 def process_audio(audio_file):
+    # Convert the audio file to text using Vosk
+    audio = audio_file.getarray()  # Get audio data as array
+    if recognizer.AcceptWaveform(audio):
+        text = recognizer.Result()  # Convert to text
+    else:
+        text = "Sorry, I couldn't understand that."
+    # Use Hugging Face's model to get a response
     response = conversational_pipeline(text)
     bot_response = response[0]['generated_text']
     # Convert the bot's response to speech using gTTS
     tts = gTTS(bot_response)
     tts.save("response.mp3")
+    # Play the audio file
+    os.system("mpg321 response.mp3")  # Make sure mpg321 is installed in the Hugging Face space
+    return bot_response, "response.mp3"
 # Create Gradio interface
 iface = gr.Interface(
+    fn=process_audio,
     inputs=gr.inputs.Audio(source="microphone", type="file"),
     outputs=[gr.outputs.Textbox(), gr.outputs.Audio(type="file")],
     live=True,
     title="Voice Bot",
+    description="Talk to the bot, and it will respond!"
 )
 # Launch the interface