File size: 4,864 Bytes
af704d1
8924535
 
 
 
63c3e87
2b6046a
716ff70
f7d4d31
cf344c7
f7d4d31
 
 
 
cf344c7
af704d1
 
 
 
 
a941958
cf344c7
f7d4d31
 
 
 
 
 
cf344c7
716ff70
41ce285
716ff70
8924535
 
 
 
 
 
 
 
 
 
 
 
2b6046a
716ff70
2b6046a
716ff70
8924535
 
 
 
 
 
 
716ff70
 
 
 
ac47bc5
 
 
716ff70
ac47bc5
 
 
 
 
 
 
 
 
 
 
bea0744
 
 
 
 
8924535
bea0744
 
ac47bc5
 
bea0744
 
ac47bc5
 
 
 
 
 
 
41ce285
ac47bc5
 
 
41ce285
ac47bc5
 
716ff70
ac47bc5
 
 
716ff70
 
 
f7d4d31
8924535
 
 
 
f7d4d31
716ff70
f7d4d31
 
8924535
41ce285
e0d893e
ff2f5a3
716ff70
da1755e
8924535
e0d893e
f7d4d31
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import gradio as gr
import sounddevice as sd
import numpy as np
from scipy.io.wavfile import write
import tempfile
from gtts import gTTS
import os
import json
import speech_recognition as sr

# Store cart in a temporary storage
cart = []

# Define the menu items dynamically
menu_items = {
    "Pizza": 10.99,
    "Burger": 8.49,
    "Pasta": 12.99,
    "Salad": 7.99,
    "Soda": 2.49
}

def generate_voice_response(text):
    tts = gTTS(text)
    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
    temp_file.close()
    tts.save(temp_file.name)
    return temp_file.name

def calculate_total(cart):
    return sum(menu_items[item] for item in cart)

def record_audio(duration=5):
    # Record audio for a fixed duration
    samplerate = 44100  # Sample rate
    print("Recording...")
    audio = sd.rec(int(samplerate * duration), samplerate=samplerate, channels=1, dtype="int16")
    sd.wait()  # Wait until recording is finished
    temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
    write(temp_wav.name, samplerate, audio)
    print("Recording finished.")
    return temp_wav.name

def process_audio(audio_path, state_json):
    global cart
    state = json.loads(state_json) if state_json else {}
    response = ""

    # Convert audio to text
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio_path) as source:
        try:
            input_text = recognizer.recognize_google(recognizer.record(source)).lower()
        except sr.UnknownValueError:
            input_text = ""

    if not state.get("menu_shown", False):
        # Show menu dynamically
        response = "Welcome to our restaurant! Here is our menu:\n"
        for item in menu_items.keys():
            response += f"{item}\n"
        response += "\nPlease tell me the item you would like to add to your cart or ask for the price of an item."
        state["menu_shown"] = True
    elif "price of" in input_text:
        # Handle price queries
        matched_items = [item for item in menu_items if item.lower() in input_text]
        if len(matched_items) == 1:
            item = matched_items[0]
            response = f"The price of {item} is ${menu_items[item]:.2f}."
        elif len(matched_items) > 1:
            response = f"I detected multiple items in your input: {', '.join(matched_items)}. Please ask for the price of one item at a time."
        else:
            response = "I couldn't find that item on the menu. Please ask for an item available in the menu."
    elif any(item.lower() in input_text for item in menu_items):
        # Match the input text with menu items
        matched_items = [item for item in menu_items if item.lower() in input_text and item not in state.get("current_items", [])]
        if len(matched_items) == 1:
            item = matched_items[0]
            cart.append(item)
            state.setdefault("current_items", []).append(item)
            response = f"{item} has been added to your cart. Your current cart includes:\n"
            for cart_item in cart:
                response += f"- {cart_item}\n"
            response += "\nWould you like to add anything else?"
        elif len(matched_items) > 1:
            response = f"I detected multiple items in your input: {', '.join(matched_items)}. Please mention one item at a time."
    elif "menu" in input_text:
        response = "Here is our menu again:\n"
        for item in menu_items.keys():
            response += f"{item}\n"
        response += "\nWhat would you like to add to your cart or ask about?"
    elif "final order" in input_text or "submit order" in input_text:
        if cart:
            total = calculate_total(cart)
            response = "Your final order includes:\n"
            for item in cart:
                response += f"- {item}\n"
            response += f"\nTotal amount: ${total:.2f}.\nThank you for ordering!"
            cart = []  # Clear cart after finalizing order
            state["current_items"] = []  # Clear current cycle tracking
        else:
            response = "Your cart is empty. Would you like to order something?"
    else:
        response = "I didn’t quite catch that. Please tell me what you’d like to order or ask about."

    voice_path = generate_voice_response(response)
    return response, voice_path, json.dumps(state)

def record_and_process(state_json):
    audio_path = record_audio()
    return process_audio(audio_path, state_json)

with gr.Blocks() as demo:
    state = gr.State(value=json.dumps({}))

    with gr.Row():
        button = gr.Button("Start Recording")
        output_text = gr.Textbox(label="Response Text", interactive=False)

    with gr.Row():
        voice_output = gr.Audio(label="Response Audio", autoplay=True)

    button.click(record_and_process, inputs=state, outputs=[output_text, voice_output, state])

demo.launch()