Spaces:

dschandra
/

Voicebot

Sleeping

App Files Files Community

dschandra commited on Dec 26, 2024

Commit

3b0cc06

verified ·

1 Parent(s): 3560f6a

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -83

app.py CHANGED Viewed

@@ -1,86 +1,68 @@
-import assemblyai as aai
-from elevenlabs import generate, stream
-from openai import OpenAI
-class AI_Assistant:
-    def __init__(self):
-        aai.settings.api_key = "ASSEMBLYAI-API-KEY"
-        self.openai_client = OpenAI(api_key="OPENAI-API-KEY")
-        self.elevenlabs_api_key = "ELEVENLABS-API-KEY"
-        self.transcriber = None
-        # Context for food ordering in a restaurant
-        self.full_transcript = [
-            {"role": "system", "content": "You are a virtual assistant for a restaurant. Help customers with food ordering, menu inquiries, and table reservations."},
-        ]
-    def start_transcription(self):
-        self.transcriber = aai.RealtimeTranscriber(
-            sample_rate=16000,
-            on_data=self.on_data,
-            on_error=self.on_error,
-            on_open=self.on_open,
-            on_close=self.on_close,
-            end_utterance_silence_threshold=1000,
-        )
-        self.transcriber.connect()
-        microphone_stream = aai.extras.MicrophoneStream(sample_rate=16000)
-        self.transcriber.stream(microphone_stream)
-    def stop_transcription(self):
-        if self.transcriber:
-            self.transcriber.close()
-            self.transcriber = None
-    def on_open(self, session_opened: aai.RealtimeSessionOpened):
-        print("Session ID:", session_opened.session_id)
-    def on_data(self, transcript: aai.RealtimeTranscript):
-        if not transcript.text:
-            return
-        if isinstance(transcript, aai.RealtimeFinalTranscript):
-            self.generate_ai_response(transcript)
-        else:
-            print(transcript.text, end="\r")
-    def on_error(self, error: aai.RealtimeError):
-        print("An error occurred:", error)
-    def on_close(self):
-        print("Session closed.")
-    def generate_ai_response(self, transcript):
-        self.stop_transcription()
-        self.full_transcript.append({"role": "user", "content": transcript.text})
-        print(f"\nCustomer: {transcript.text}\n")
-        response = self.openai_client.chat.completions.create(
-            model="gpt-3.5-turbo",
-            messages=self.full_transcript
         )
-        ai_response = response.choices[0].message.content
-        self.generate_audio(ai_response)
-        self.start_transcription()
-        print("\nListening for the next input...\n")
-    def generate_audio(self, text):
-        self.full_transcript.append({"role": "assistant", "content": text})
-        print(f"\nAI Assistant: {text}")
-        audio_stream = generate(
-            api_key=self.elevenlabs_api_key,
-            text=text,
-            voice="Rachel",
-            stream=True
-        )
-        stream(audio_stream)
 if __name__ == "__main__":
-    greeting = "Welcome to Gourmet Bistro! My name is Sandy. How may I assist you today?"
-    ai_assistant = AI_Assistant()
-    ai_assistant.generate_audio(greeting)
-    ai_assistant.start_transcription()

+import gradio as gr
+from gtts import gTTS
+import openai
+import speech_recognition as sr
+import os
+# Set OpenAI API Key
+openai.api_key = "YOUR_OPENAI_API_KEY"  # Replace with your OpenAI API Key
+# Text-to-Speech Function
+def text_to_speech(response_text):
+    tts = gTTS(response_text, lang="en")
+    audio_file = "response.mp3"
+    tts.save(audio_file)
+    return audio_file
+# Speech Recognition Function
+def speech_to_text(audio_file):
+    recognizer = sr.Recognizer()
+    with sr.AudioFile(audio_file) as source:
+        audio_data = recognizer.record(source)
+        try:
+            return recognizer.recognize_google(audio_data)
+        except sr.UnknownValueError:
+            return "I'm sorry, I couldn't understand that. Could you repeat?"
+        except sr.RequestError:
+            return "There was an error with the speech recognition service."
+# Chatbot Logic using OpenAI GPT
+def chatbot_response(user_input):
+    try:
+        response = openai.Completion.create(
+            engine="text-davinci-003",  # Use a powerful GPT model
+            prompt=f"User: {user_input}\nChatbot:",
+            max_tokens=150,
+            temperature=0.7,
         )
+        return response.choices[0].text.strip()
+    except Exception as e:
+        return f"Error generating response: {e}"
+# Gradio Interface Logic
+def process_interaction(audio_file):
+    # Convert user speech to text
+    user_text = speech_to_text(audio_file)
+    if "Error" in user_text or "sorry" in user_text:
+        return user_text, None
+    # Get chatbot response
+    chatbot_reply = chatbot_response(user_text)
+    # Convert chatbot reply to speech
+    chatbot_audio = text_to_speech(chatbot_reply)
+    return chatbot_reply, chatbot_audio
+# Gradio Interface
+interface = gr.Interface(
+    fn=process_interaction,
+    inputs=gr.Audio(source="microphone", type="filepath"),
+    outputs=[gr.Textbox(label="Chatbot Reply"), gr.Audio(label="Chatbot Voice Reply")],
+    title="Face-to-Face Chatbot",
+    description="Talk to this chatbot like you're having a real conversation! Speak into your microphone to start.",
+    live=True,
+)
 if __name__ == "__main__":
+    interface.launch()