Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import pipeline | |
| from gtts import gTTS | |
| import os | |
| import numpy as np | |
| # Initialize the speech recognition pipeline | |
| asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-large") | |
| # Conversation history and context | |
| conversation_history = [] | |
| context = {"last_action": None, "order": []} | |
| # Menu for the restaurant | |
| menu = { | |
| "Starters": ["Soup", "Spring Rolls"], | |
| "Main Course": ["Paneer Butter Masala", "Chicken Curry", "Veg Biryani"], | |
| "Breads": ["Roti", "Naan", "Paratha"], | |
| "Desserts": ["Gulab Jamun", "Ice Cream"], | |
| "Drinks": ["Mango Lassi", "Soda", "Water"] | |
| } | |
| # Text-to-Speech Function | |
| def speak_and_save(text, filename="response.mp3"): | |
| tts = gTTS(text=text, lang='en') | |
| tts.save(filename) | |
| return filename | |
| # Process the audio file and generate response | |
| def process_order(audio_file_path): | |
| if audio_file_path is None: | |
| raise ValueError("Audio file path is None. Please provide a valid path.") | |
| # Recognize speech | |
| try: | |
| transcript = asr_pipeline(audio_file_path)["text"] | |
| except Exception as e: | |
| return f"Error in speech recognition: {e}", None | |
| # Process the recognized text | |
| global context | |
| user_input = transcript.lower() | |
| conversation_history.append(f"Customer: {user_input}") | |
| response = "" | |
| if context["last_action"] is None: | |
| response = "Welcome to our restaurant! How can I assist you today?" | |
| context["last_action"] = "greet" | |
| elif "menu" in user_input: | |
| response = "Here is our menu:\n" | |
| for category, items in menu.items(): | |
| response += f"{category}: {', '.join(items)}\n" | |
| response += "What would you like to order?" | |
| context["last_action"] = "show_menu" | |
| elif "order" in user_input or any(item.lower() in user_input for item in sum(menu.values(), [])): | |
| for category, items in menu.items(): | |
| for item in items: | |
| if item.lower() in user_input: | |
| context["order"].append(item) | |
| response = f"I have added {', '.join(context['order'])} to your order. Would you like anything else?" | |
| context["last_action"] = "place_order" | |
| elif "no" in user_input or "that's it" in user_input: | |
| response = f"Your final order is: {', '.join(context['order'])}. Thank you for your order. Your food will arrive shortly." | |
| context["last_action"] = "final_order" | |
| context["order"] = [] # Reset the order | |
| else: | |
| response = "I'm not sure what you meant. Could you clarify?" | |
| conversation_history.append(f"AI: {response}") | |
| audio_response_path = speak_and_save(response) | |
| return response, audio_response_path | |
| # Save Conversation History | |
| def save_conversation(): | |
| with open("conversation_history.txt", "w") as f: | |
| f.write("\n".join(conversation_history)) | |
| return "Conversation history saved successfully!" | |
| # Gradio Interface | |
| def create_interface(): | |
| return gr.Interface( | |
| fn=process_order, | |
| inputs=gr.Audio(type="filepath", label="Your Voice Input"), | |
| outputs=[ | |
| gr.Textbox(label="Text Response"), | |
| gr.Audio(label="Audio Response") | |
| ], | |
| title="Restaurant Voice Assistant", | |
| description="Talk to our voice assistant to place your order or ask about the menu!", | |
| live=True | |
| ) | |
| if __name__ == "__main__": | |
| try: | |
| app = create_interface() | |
| app.launch() | |
| finally: | |
| save_conversation() | |