Spaces:

dschandra
/

AIVoice

Sleeping

App Files Files Community

dschandra commited on Dec 28, 2024

Commit

83a6bbe

verified ·

1 Parent(s): c9030e0

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -9

app.py CHANGED Viewed

@@ -4,13 +4,17 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 from gtts import gTTS
 import os
 import librosa
 # Load Wav2Vec2 model and processor for speech-to-text
 processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
 model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
 def speech_to_text(audio_file):
-    # Load audio file and process with Wav2Vec 2.0
     audio_input, _ = librosa.load(audio_file, sr=16000)
     input_values = processor(audio_input, return_tensors="pt").input_values
@@ -25,12 +29,12 @@ def speech_to_text(audio_file):
     return transcription
 def generate_response(text):
-    # Using Hugging Face to generate a text-based response
-    conversational_pipeline = pipeline("text-generation", model="microsoft/DialoGPT-medium")
     response = conversational_pipeline(text, max_length=50)
     return response[0]['generated_text']
 def process_audio(audio_file):
     # Convert speech to text using Wav2Vec 2.0
     text = speech_to_text(audio_file)
     print(f"User said: {text}")
@@ -39,20 +43,18 @@ def process_audio(audio_file):
     bot_response = generate_response(text)
     print(f"Bot response: {bot_response}")
-    # Convert the bot's response to speech
     tts = gTTS(bot_response)
     tts.save("response.mp3")
-    # Play the response
-    os.system("mpg321 response.mp3")
     return bot_response, "response.mp3"
 # Create Gradio interface for audio input/output
 iface = gr.Interface(
     fn=process_audio,
-    inputs=gr.Audio(type="filepath"),  # Corrected for Gradio v3.x
-    outputs=[gr.Textbox(), gr.Audio(type="file")],  # Corrected for Gradio v3.x
     live=True,
     title="Voice Bot with Wav2Vec2.0",
     description="Speak to the bot and get a response!"

 from gtts import gTTS
 import os
 import librosa
+from transformers import pipeline
 # Load Wav2Vec2 model and processor for speech-to-text
 processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
 model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
+# Hugging Face conversational model (DialoGPT) for generating responses
+conversational_pipeline = pipeline("text-generation", model="microsoft/DialoGPT-medium")
 def speech_to_text(audio_file):
+    """Convert speech in audio file to text using Wav2Vec2"""
     audio_input, _ = librosa.load(audio_file, sr=16000)
     input_values = processor(audio_input, return_tensors="pt").input_values
     return transcription
 def generate_response(text):
+    """Generate a response based on user input using DialoGPT"""
     response = conversational_pipeline(text, max_length=50)
     return response[0]['generated_text']
 def process_audio(audio_file):
+    """Process the audio input: Convert to text, generate response, and convert response to speech"""
     # Convert speech to text using Wav2Vec 2.0
     text = speech_to_text(audio_file)
     print(f"User said: {text}")
     bot_response = generate_response(text)
     print(f"Bot response: {bot_response}")
+    # Convert the bot's response to speech using gTTS
     tts = gTTS(bot_response)
     tts.save("response.mp3")
+    # Return the bot's response text and the path to the generated audio file
     return bot_response, "response.mp3"
 # Create Gradio interface for audio input/output
 iface = gr.Interface(
     fn=process_audio,
+    inputs=gr.Audio(type="filepath"),  # Updated for Gradio v3.x (type="filepath" for audio input)
+    outputs=[gr.Textbox(), gr.Audio(type="filepath")],  # Updated for Gradio v3.x (type="filepath" for audio output)
     live=True,
     title="Voice Bot with Wav2Vec2.0",
     description="Speak to the bot and get a response!"