dschandra commited on
Commit
a298e65
·
verified ·
1 Parent(s): c6550ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -4
app.py CHANGED
@@ -1,10 +1,11 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
4
  from gtts import gTTS
5
  import os
6
  import librosa
7
- from transformers import pipeline
 
8
 
9
  # Load Wav2Vec2 model and processor for speech-to-text
10
  processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
@@ -13,6 +14,9 @@ model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
13
  # Hugging Face conversational model (DialoGPT) for generating responses
14
  conversational_pipeline = pipeline("text-generation", model="microsoft/DialoGPT-medium")
15
 
 
 
 
16
  def speech_to_text(audio_file):
17
  """Convert speech in audio file to text using Wav2Vec2"""
18
  audio_input, _ = librosa.load(audio_file, sr=16000) # Load the audio
@@ -33,14 +37,39 @@ def generate_response(text):
33
  response = conversational_pipeline(text, max_length=50)
34
  return response[0]['generated_text']
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  def process_audio(audio_file):
37
  """Process the audio input: Convert to text, generate response, and convert response to speech"""
38
  # Convert speech to text using Wav2Vec 2.0
39
  text = speech_to_text(audio_file)
40
  print(f"User said: {text}")
41
 
42
- # Get the bot's response
43
- bot_response = generate_response(text)
 
 
 
 
 
 
 
44
  print(f"Bot response: {bot_response}")
45
 
46
  # Convert the bot's response to speech using gTTS
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline
4
  from gtts import gTTS
5
  import os
6
  import librosa
7
+ import webbrowser
8
+ import random
9
 
10
  # Load Wav2Vec2 model and processor for speech-to-text
11
  processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")
 
14
  # Hugging Face conversational model (DialoGPT) for generating responses
15
  conversational_pipeline = pipeline("text-generation", model="microsoft/DialoGPT-medium")
16
 
17
+ # Load the question answering model for specific commands
18
+ qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
19
+
20
  def speech_to_text(audio_file):
21
  """Convert speech in audio file to text using Wav2Vec2"""
22
  audio_input, _ = librosa.load(audio_file, sr=16000) # Load the audio
 
37
  response = conversational_pipeline(text, max_length=50)
38
  return response[0]['generated_text']
39
 
40
+ def execute_action(command):
41
+ """Execute actions like opening YouTube or playing music based on the user's command"""
42
+ command = command.lower()
43
+
44
+ if 'youtube' in command or 'open youtube' in command:
45
+ webbrowser.open('https://www.youtube.com')
46
+ return "Opening YouTube..."
47
+
48
+ elif 'play music' in command or 'play song' in command:
49
+ # Playing a random song (or you can modify to play a specific song)
50
+ songs = ["song1.mp3", "song2.mp3", "song3.mp3"] # Replace with actual file names
51
+ song = random.choice(songs)
52
+ os.system(f"mpg321 {song}") # Use your preferred way to play music
53
+ return f"Playing music: {song}"
54
+
55
+ else:
56
+ return "Sorry, I don't understand that command."
57
+
58
  def process_audio(audio_file):
59
  """Process the audio input: Convert to text, generate response, and convert response to speech"""
60
  # Convert speech to text using Wav2Vec 2.0
61
  text = speech_to_text(audio_file)
62
  print(f"User said: {text}")
63
 
64
+ # Check if the user gave a command for an action (e.g., open YouTube or play music)
65
+ action_response = execute_action(text)
66
+ if action_response:
67
+ # If it's an action, return it directly
68
+ bot_response = action_response
69
+ else:
70
+ # Generate a conversational response using DialoGPT
71
+ bot_response = generate_response(text)
72
+
73
  print(f"Bot response: {bot_response}")
74
 
75
  # Convert the bot's response to speech using gTTS