Spaces:

dschandra
/

Voicebot

Sleeping

App Files Files Community

dschandra commited on Dec 26, 2024

Commit

92f2b60

verified ·

1 Parent(s): 749d0c1

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -51

app.py CHANGED Viewed

@@ -1,68 +1,58 @@
-import gradio as gr
 from gtts import gTTS
-import openai
-import speech_recognition as sr
 import os
-# Set OpenAI API Key
-openai.api_key = "YOUR_OPENAI_API_KEY"  # Replace with your OpenAI API Key
-# Text-to-Speech Function
-def text_to_speech(response_text):
-    tts = gTTS(response_text, lang="en")
     audio_file = "response.mp3"
     tts.save(audio_file)
     return audio_file
-# Speech Recognition Function
-def speech_to_text(audio_file):
-    recognizer = sr.Recognizer()
-    with sr.AudioFile(audio_file) as source:
-        audio_data = recognizer.record(source)
-        try:
-            return recognizer.recognize_google(audio_data)
-        except sr.UnknownValueError:
-            return "I'm sorry, I couldn't understand that. Could you repeat?"
-        except sr.RequestError:
-            return "There was an error with the speech recognition service."
-# Chatbot Logic using OpenAI GPT
-def chatbot_response(user_input):
     try:
-        response = openai.Completion.create(
-            engine="text-davinci-003",  # Use a powerful GPT model
-            prompt=f"User: {user_input}\nChatbot:",
-            max_tokens=150,
-            temperature=0.7,
-        )
-        return response.choices[0].text.strip()
     except Exception as e:
-        return f"Error generating response: {e}"
-# Gradio Interface Logic
-def process_interaction(audio_file):
-    # Convert user speech to text
-    user_text = speech_to_text(audio_file)
-    if "Error" in user_text or "sorry" in user_text:
-        return user_text, None
-    # Get chatbot response
-    chatbot_reply = chatbot_response(user_text)
-    # Convert chatbot reply to speech
-    chatbot_audio = text_to_speech(chatbot_reply)
-    return chatbot_reply, chatbot_audio
 # Gradio Interface
-interface = gr.Interface(
-    fn=process_interaction,
-    inputs=gr.Audio(source="microphone", type="filepath"),
-    outputs=[gr.Textbox(label="Chatbot Reply"), gr.Audio(label="Chatbot Voice Reply")],
-    title="Face-to-Face Chatbot",
-    description="Talk to this chatbot like you're having a real conversation! Speak into your microphone to start.",
-    live=True,
 )
 if __name__ == "__main__":
-    interface.launch()

+from transformers import pipeline
 from gtts import gTTS
+import gradio as gr
 import os
+# Initialize Whisper pipeline for speech-to-text
+pipe = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3-turbo")
+# Menu for the restaurant
+menu = {
+    "Starters": ["Soup", "Spring Rolls"],
+    "Main Course": ["Paneer Butter Masala", "Chicken Curry", "Veg Biryani"],
+    "Breads": ["Roti", "Naan", "Paratha"],
+    "Desserts": ["Gulab Jamun", "Ice Cream"],
+    "Drinks": ["Mango Lassi", "Soda", "Water"]
+}
+# Function to convert text to speech
+def text_to_speech(text):
+    tts = gTTS(text, lang="en")
     audio_file = "response.mp3"
     tts.save(audio_file)
     return audio_file
+# Chatbot logic
+def chatbot_conversation(audio_file):
+    # Speech-to-text using Whisper
     try:
+        transcription = pipe(audio_file)["text"]
     except Exception as e:
+        return f"Error: {e}", None
+    # Generate a response based on transcription
+    if "menu" in transcription.lower():
+        response = "Our menu categories are: " + ", ".join(menu.keys())
+    elif "order" in transcription.lower():
+        response = "What would you like to order? We have " + ", ".join(menu["Main Course"])
+    elif "thank you" in transcription.lower():
+        response = "You're welcome! Enjoy your meal!"
+    else:
+        response = "I'm sorry, I didn't understand that. Could you please repeat?"
+    # Convert response to audio
+    audio_response = text_to_speech(response)
+    return response, audio_response
 # Gradio Interface
+iface = gr.Interface(
+    fn=chatbot_conversation,
+    inputs=gr.Audio(type="filepath"),
+    outputs=[gr.Textbox(label="Transcription"), gr.Audio(label="Response Audio")],
+    title="Restaurant Chatbot with Whisper ASR",
+    description="Speak to the chatbot and get a response!",
 )
 if __name__ == "__main__":
+    iface.launch()