import gradio as gr import sounddevice as sd import numpy as np from scipy.io.wavfile import write import tempfile from gtts import gTTS import os import json import speech_recognition as sr # Store cart in a temporary storage cart = [] # Define the menu items dynamically menu_items = { "Pizza": 10.99, "Burger": 8.49, "Pasta": 12.99, "Salad": 7.99, "Soda": 2.49 } def generate_voice_response(text): tts = gTTS(text) temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") temp_file.close() tts.save(temp_file.name) return temp_file.name def calculate_total(cart): return sum(menu_items[item] for item in cart) def record_audio(duration=5): # Record audio for a fixed duration samplerate = 44100 # Sample rate print("Recording...") audio = sd.rec(int(samplerate * duration), samplerate=samplerate, channels=1, dtype="int16") sd.wait() # Wait until recording is finished temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") write(temp_wav.name, samplerate, audio) print("Recording finished.") return temp_wav.name def process_audio(audio_path, state_json): global cart state = json.loads(state_json) if state_json else {} response = "" # Convert audio to text recognizer = sr.Recognizer() with sr.AudioFile(audio_path) as source: try: input_text = recognizer.recognize_google(recognizer.record(source)).lower() except sr.UnknownValueError: input_text = "" if not state.get("menu_shown", False): # Show menu dynamically response = "Welcome to our restaurant! Here is our menu:\n" for item in menu_items.keys(): response += f"{item}\n" response += "\nPlease tell me the item you would like to add to your cart or ask for the price of an item." state["menu_shown"] = True elif "price of" in input_text: # Handle price queries matched_items = [item for item in menu_items if item.lower() in input_text] if len(matched_items) == 1: item = matched_items[0] response = f"The price of {item} is ${menu_items[item]:.2f}." elif len(matched_items) > 1: response = f"I detected multiple items in your input: {', '.join(matched_items)}. Please ask for the price of one item at a time." else: response = "I couldn't find that item on the menu. Please ask for an item available in the menu." elif any(item.lower() in input_text for item in menu_items): # Match the input text with menu items matched_items = [item for item in menu_items if item.lower() in input_text and item not in state.get("current_items", [])] if len(matched_items) == 1: item = matched_items[0] cart.append(item) state.setdefault("current_items", []).append(item) response = f"{item} has been added to your cart. Your current cart includes:\n" for cart_item in cart: response += f"- {cart_item}\n" response += "\nWould you like to add anything else?" elif len(matched_items) > 1: response = f"I detected multiple items in your input: {', '.join(matched_items)}. Please mention one item at a time." elif "menu" in input_text: response = "Here is our menu again:\n" for item in menu_items.keys(): response += f"{item}\n" response += "\nWhat would you like to add to your cart or ask about?" elif "final order" in input_text or "submit order" in input_text: if cart: total = calculate_total(cart) response = "Your final order includes:\n" for item in cart: response += f"- {item}\n" response += f"\nTotal amount: ${total:.2f}.\nThank you for ordering!" cart = [] # Clear cart after finalizing order state["current_items"] = [] # Clear current cycle tracking else: response = "Your cart is empty. Would you like to order something?" else: response = "I didn’t quite catch that. Please tell me what you’d like to order or ask about." voice_path = generate_voice_response(response) return response, voice_path, json.dumps(state) def record_and_process(state_json): audio_path = record_audio() return process_audio(audio_path, state_json) with gr.Blocks() as demo: state = gr.State(value=json.dumps({})) with gr.Row(): button = gr.Button("Start Recording") output_text = gr.Textbox(label="Response Text", interactive=False) with gr.Row(): voice_output = gr.Audio(label="Response Audio", autoplay=True) button.click(record_and_process, inputs=state, outputs=[output_text, voice_output, state]) demo.launch()