ObindiG commited on
Commit
09e6373
·
verified ·
1 Parent(s): 68c51b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -61
app.py CHANGED
@@ -1,67 +1,58 @@
1
  import os
2
  import google.generativeai as genai
3
  import speech_recognition as sr
4
- import pyttsx3
5
  from dotenv import load_dotenv
6
- import tkinter as tk
7
- from tkinter import messagebox
 
8
 
9
  # Load environment variables
10
  load_dotenv()
11
 
12
- # Initialize text-to-speech engine
13
- engine = pyttsx3.init()
 
 
 
 
 
14
 
15
- def speak(text):
16
- """Use text-to-speech to speak the given text."""
17
- engine.say(text)
18
- engine.runAndWait()
19
-
20
- def recognize_speech(timeout=5):
21
- """Capture and recognize speech from the microphone with a timeout."""
22
  recognizer = sr.Recognizer()
23
- with sr.Microphone() as source:
24
- print("Listening for a wake-up command...")
25
- audio = recognizer.listen(source, timeout=timeout)
26
- print("Audio captured.")
27
- try:
28
- command = recognizer.recognize_google(audio)
29
- print(f"You said: {command}")
30
- return command
31
- except sr.UnknownValueError:
32
- print("Could not understand audio.")
33
- return None
34
- except sr.RequestError:
35
- speak("Sorry, there was an error with the speech recognition service.")
36
- print("Request error.")
37
- return None
 
38
 
39
  def process_command(command):
40
  """Generate a response based on the voice command using the AI model."""
41
  if command:
42
  response = model.generate_content([command])
43
  reply = response.text.strip()
44
- speak(reply)
45
  print(f"AI Response: {reply}")
46
  return reply
47
 
48
- # GUI Functions
49
- def start_listening():
50
- command = recognize_speech(timeout=5)
51
-
52
- if command and wake_word in command.lower():
53
- speak("How can I assist you?")
54
- result_label.config(text="Wake word detected! Listening for further commands...")
55
- command = recognize_speech(timeout=5)
56
- if command:
57
- if "stop listening" in command.lower():
58
- speak("Goodbye.")
59
- result_label.config(text="Voice assistant stopped.")
60
- else:
61
- response = process_command(command)
62
- result_label.config(text=f"Response: {response}")
63
- else:
64
- result_label.config(text="Wake word not detected.")
65
 
66
  # Main Code with Generative AI Setup
67
  api_key = os.getenv("MY_API_KEY")
@@ -85,21 +76,35 @@ model = genai.GenerativeModel(
85
  generation_config=generation_config,
86
  )
87
 
88
- wake_word = "sema"
89
-
90
- # Create the GUI
91
- root = tk.Tk()
92
- root.title("Sema Voice Assistant")
93
 
94
- # Create UI elements
95
- title_label = tk.Label(root, text="SEMA AI", font=("Arial", 16))
96
- title_label.pack(pady=10)
97
-
98
- start_button = tk.Button(root, text="Start Listening", command=start_listening, font=("Arial", 14))
99
- start_button.pack(pady=10)
100
-
101
- result_label = tk.Label(root, text="", font=("Arial", 12))
102
- result_label.pack(pady=10)
103
 
104
- # Run the GUI loop
105
- root.mainloop()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import google.generativeai as genai
3
  import speech_recognition as sr
4
+ from gtts import gTTS # Replacing pyttsx3 with gTTS for text-to-speech
5
  from dotenv import load_dotenv
6
+ import gradio as gr
7
+ import tempfile
8
+ from pydub import AudioSegment # Importing for audio conversion
9
 
10
  # Load environment variables
11
  load_dotenv()
12
 
13
+ def speak_and_save(text):
14
+ """Use gTTS to speak the given text and save it as an audio file."""
15
+ tts = gTTS(text)
16
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
17
+ audio_path = fp.name
18
+ tts.save(audio_path)
19
+ return audio_path
20
 
21
+ def recognize_speech_from_audio(audio_file):
22
+ """Capture and recognize speech from the audio file."""
 
 
 
 
 
23
  recognizer = sr.Recognizer()
24
+
25
+ # Debug print to check if audio file exists
26
+ print(f"Processing audio file: {audio_file}")
27
+
28
+ with sr.AudioFile(audio_file) as source:
29
+ audio = recognizer.record(source)
30
+ try:
31
+ command = recognizer.recognize_google(audio)
32
+ print(f"You said: {command}")
33
+ return command
34
+ except sr.UnknownValueError:
35
+ print("Could not understand audio.")
36
+ return None
37
+ except sr.RequestError:
38
+ print("Error with the speech recognition service.")
39
+ return None
40
 
41
  def process_command(command):
42
  """Generate a response based on the voice command using the AI model."""
43
  if command:
44
  response = model.generate_content([command])
45
  reply = response.text.strip()
 
46
  print(f"AI Response: {reply}")
47
  return reply
48
 
49
+ # Convert any audio file to WAV format to ensure compatibility with speech_recognition
50
+ def convert_to_wav(audio_path):
51
+ """Convert any audio file to a WAV format."""
52
+ audio = AudioSegment.from_file(audio_path)
53
+ wav_path = tempfile.mktemp(suffix=".wav")
54
+ audio.export(wav_path, format="wav")
55
+ return wav_path
 
 
 
 
 
 
 
 
 
 
56
 
57
  # Main Code with Generative AI Setup
58
  api_key = os.getenv("MY_API_KEY")
 
76
  generation_config=generation_config,
77
  )
78
 
79
+ wake_word = "hello"
 
 
 
 
80
 
81
+ def assistant(audio):
82
+ # Check if the audio file path is provided
83
+ print(f"Audio file received: {audio}")
84
+
85
+ # Check if the file exists before processing
86
+ if not audio or not os.path.exists(audio):
87
+ print(f"Audio file does not exist or is not provided: {audio}")
88
+ return "No audio provided.", None
 
89
 
90
+ # Convert to WAV format before processing
91
+ audio_wav = convert_to_wav(audio)
92
+
93
+ # Process the speech from the audio
94
+ command = recognize_speech_from_audio(audio_wav)
95
+
96
+ if command and wake_word in command.lower():
97
+ response_text = process_command(command)
98
+ audio_response = speak_and_save(response_text)
99
+ return response_text, audio_response
100
+ else:
101
+ return "Wake word not detected.", None
102
+
103
+ # Gradio Interface
104
+ gr.Interface(
105
+ fn=assistant, # Function to call when the interface is run
106
+ inputs=gr.Audio(type="filepath"), # Audio input, expecting a file path from the microphone
107
+ outputs=[gr.Textbox(), gr.Audio(type="filepath", label="Response Audio")], # Outputs text and the response audio
108
+ title="Sema Voice Assistant",
109
+ live=True # Automatically submit the input after recording
110
+ ).launch(share=True)