Spaces:

dschandra
/

AIVoice

Sleeping

App Files Files Community

dschandra commited on Dec 28, 2024

Commit

a298e65

verified ·

1 Parent(s): c6550ef

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -4

app.py CHANGED Viewed

@@ -1,10 +1,11 @@
 import gradio as gr
 import torch
-from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 from gtts import gTTS
 import os
 import librosa
-from transformers import pipeline
 # Load Wav2Vec2 model and processor for speech-to-text
 processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
@@ -13,6 +14,9 @@ model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
 # Hugging Face conversational model (DialoGPT) for generating responses
 conversational_pipeline = pipeline("text-generation", model="microsoft/DialoGPT-medium")
 def speech_to_text(audio_file):
     """Convert speech in audio file to text using Wav2Vec2"""
     audio_input, _ = librosa.load(audio_file, sr=16000)  # Load the audio
@@ -33,14 +37,39 @@ def generate_response(text):
     response = conversational_pipeline(text, max_length=50)
     return response[0]['generated_text']
 def process_audio(audio_file):
     """Process the audio input: Convert to text, generate response, and convert response to speech"""
     # Convert speech to text using Wav2Vec 2.0
     text = speech_to_text(audio_file)
     print(f"User said: {text}")
-    # Get the bot's response
-    bot_response = generate_response(text)
     print(f"Bot response: {bot_response}")
     # Convert the bot's response to speech using gTTS

 import gradio as gr
 import torch
+from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline
 from gtts import gTTS
 import os
 import librosa
+import webbrowser
+import random
 # Load Wav2Vec2 model and processor for speech-to-text
 processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
 # Hugging Face conversational model (DialoGPT) for generating responses
 conversational_pipeline = pipeline("text-generation", model="microsoft/DialoGPT-medium")
+# Load the question answering model for specific commands
+qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
 def speech_to_text(audio_file):
     """Convert speech in audio file to text using Wav2Vec2"""
     audio_input, _ = librosa.load(audio_file, sr=16000)  # Load the audio
     response = conversational_pipeline(text, max_length=50)
     return response[0]['generated_text']
+def execute_action(command):
+    """Execute actions like opening YouTube or playing music based on the user's command"""
+    command = command.lower()
+    if 'youtube' in command or 'open youtube' in command:
+        webbrowser.open('https://www.youtube.com')
+        return "Opening YouTube..."
+    elif 'play music' in command or 'play song' in command:
+        # Playing a random song (or you can modify to play a specific song)
+        songs = ["song1.mp3", "song2.mp3", "song3.mp3"]  # Replace with actual file names
+        song = random.choice(songs)
+        os.system(f"mpg321 {song}")  # Use your preferred way to play music
+        return f"Playing music: {song}"
+    else:
+        return "Sorry, I don't understand that command."
 def process_audio(audio_file):
     """Process the audio input: Convert to text, generate response, and convert response to speech"""
     # Convert speech to text using Wav2Vec 2.0
     text = speech_to_text(audio_file)
     print(f"User said: {text}")
+    # Check if the user gave a command for an action (e.g., open YouTube or play music)
+    action_response = execute_action(text)
+    if action_response:
+        # If it's an action, return it directly
+        bot_response = action_response
+    else:
+        # Generate a conversational response using DialoGPT
+        bot_response = generate_response(text)
     print(f"Bot response: {bot_response}")
     # Convert the bot's response to speech using gTTS