Spaces:

ObindiG
/

laocta1

Build error

App Files Files Community

ObindiG commited on Sep 13, 2024

Commit

09e6373

verified ·

1 Parent(s): 68c51b8

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -61

app.py CHANGED Viewed

@@ -1,67 +1,58 @@
 import os
 import google.generativeai as genai
 import speech_recognition as sr
-import pyttsx3
 from dotenv import load_dotenv
-import tkinter as tk
-from tkinter import messagebox
 # Load environment variables
 load_dotenv()
-# Initialize text-to-speech engine
-engine = pyttsx3.init()
-def speak(text):
-    """Use text-to-speech to speak the given text."""
-    engine.say(text)
-    engine.runAndWait()
-def recognize_speech(timeout=5):
-    """Capture and recognize speech from the microphone with a timeout."""
     recognizer = sr.Recognizer()
-    with sr.Microphone() as source:
-        print("Listening for a wake-up command...")
-        audio = recognizer.listen(source, timeout=timeout)
-        print("Audio captured.")
-        try:
-            command = recognizer.recognize_google(audio)
-            print(f"You said: {command}")
-            return command
-        except sr.UnknownValueError:
-            print("Could not understand audio.")
-            return None
-        except sr.RequestError:
-            speak("Sorry, there was an error with the speech recognition service.")
-            print("Request error.")
-            return None
 def process_command(command):
     """Generate a response based on the voice command using the AI model."""
     if command:
         response = model.generate_content([command])
         reply = response.text.strip()
-        speak(reply)
         print(f"AI Response: {reply}")
         return reply
-# GUI Functions
-def start_listening():
-    command = recognize_speech(timeout=5)
-    if command and wake_word in command.lower():
-        speak("How can I assist you?")
-        result_label.config(text="Wake word detected! Listening for further commands...")
-        command = recognize_speech(timeout=5)
-        if command:
-            if "stop listening" in command.lower():
-                speak("Goodbye.")
-                result_label.config(text="Voice assistant stopped.")
-            else:
-                response = process_command(command)
-                result_label.config(text=f"Response: {response}")
-    else:
-        result_label.config(text="Wake word not detected.")
 # Main Code with Generative AI Setup
 api_key = os.getenv("MY_API_KEY")
@@ -85,21 +76,35 @@ model = genai.GenerativeModel(
     generation_config=generation_config,
 )
-wake_word = "sema"
-# Create the GUI
-root = tk.Tk()
-root.title("Sema Voice Assistant")
-# Create UI elements
-title_label = tk.Label(root, text="SEMA AI", font=("Arial", 16))
-title_label.pack(pady=10)
-start_button = tk.Button(root, text="Start Listening", command=start_listening, font=("Arial", 14))
-start_button.pack(pady=10)
-result_label = tk.Label(root, text="", font=("Arial", 12))
-result_label.pack(pady=10)
-# Run the GUI loop
-root.mainloop()

 import os
 import google.generativeai as genai
 import speech_recognition as sr
+from gtts import gTTS  # Replacing pyttsx3 with gTTS for text-to-speech
 from dotenv import load_dotenv
+import gradio as gr
+import tempfile
+from pydub import AudioSegment  # Importing for audio conversion
 # Load environment variables
 load_dotenv()
+def speak_and_save(text):
+    """Use gTTS to speak the given text and save it as an audio file."""
+    tts = gTTS(text)
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
+        audio_path = fp.name
+    tts.save(audio_path)
+    return audio_path
+def recognize_speech_from_audio(audio_file):
+    """Capture and recognize speech from the audio file."""
     recognizer = sr.Recognizer()
+    # Debug print to check if audio file exists
+    print(f"Processing audio file: {audio_file}")
+    with sr.AudioFile(audio_file) as source:
+        audio = recognizer.record(source)
+    try:
+        command = recognizer.recognize_google(audio)
+        print(f"You said: {command}")
+        return command
+    except sr.UnknownValueError:
+        print("Could not understand audio.")
+        return None
+    except sr.RequestError:
+        print("Error with the speech recognition service.")
+        return None
 def process_command(command):
     """Generate a response based on the voice command using the AI model."""
     if command:
         response = model.generate_content([command])
         reply = response.text.strip()
         print(f"AI Response: {reply}")
         return reply
+# Convert any audio file to WAV format to ensure compatibility with speech_recognition
+def convert_to_wav(audio_path):
+    """Convert any audio file to a WAV format."""
+    audio = AudioSegment.from_file(audio_path)
+    wav_path = tempfile.mktemp(suffix=".wav")
+    audio.export(wav_path, format="wav")
+    return wav_path
 # Main Code with Generative AI Setup
 api_key = os.getenv("MY_API_KEY")
     generation_config=generation_config,
 )
+wake_word = "hello"
+def assistant(audio):
+    # Check if the audio file path is provided
+    print(f"Audio file received: {audio}")
+    # Check if the file exists before processing
+    if not audio or not os.path.exists(audio):
+        print(f"Audio file does not exist or is not provided: {audio}")
+        return "No audio provided.", None
+    # Convert to WAV format before processing
+    audio_wav = convert_to_wav(audio)
+    # Process the speech from the audio
+    command = recognize_speech_from_audio(audio_wav)
+    if command and wake_word in command.lower():
+        response_text = process_command(command)
+        audio_response = speak_and_save(response_text)
+        return response_text, audio_response
+    else:
+        return "Wake word not detected.", None
+# Gradio Interface
+gr.Interface(
+    fn=assistant,  # Function to call when the interface is run
+    inputs=gr.Audio(type="filepath"),  # Audio input, expecting a file path from the microphone
+    outputs=[gr.Textbox(), gr.Audio(type="filepath", label="Response Audio")],  # Outputs text and the response audio
+    title="Sema Voice Assistant",
+    live=True  # Automatically submit the input after recording
+).launch(share=True)