File size: 2,491 Bytes
21ca779
0ab017b
 
21ca779
0ab017b
 
21ca779
0ab017b
 
 
 
 
21ca779
0ab017b
 
 
 
 
 
 
 
21ca779
0ab017b
 
 
 
21ca779
0ab017b
 
 
 
 
 
21ca779
 
0ab017b
21ca779
0ab017b
21ca779
0ab017b
 
 
 
21ca779
0ab017b
 
 
 
21ca779
0ab017b
 
21ca779
0ab017b
21ca779
0ab017b
 
21ca779
0ab017b
21ca779
0ab017b
 
21ca779
0ab017b
21ca779
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import gradio as gr
import numpy as np
from transformers import pipeline

# Load ASR once (lightweight model for demos)
asr = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")

def transcribe(audio):
    """
    Gradio returns (sample_rate, numpy_array) when type="numpy"
    """
    if audio is None:
        return ""
    sr, y = audio
    if y is None or len(y) == 0:
        return ""

    # Convert to float32 mono
    y = y.astype(np.float32)
    if y.ndim == 2:  # stereo -> mono
        y = y.mean(axis=1)

    out = asr({"sampling_rate": sr, "raw": y})
    return out.get("text", "").strip()

def concierge_reply(user_text: str) -> str:
    t = (user_text or "").lower()
    if "menu" in t or "dinner" in t:
        return "Here is the dinner menu (demo). Would you like vegetarian or non-veg options?"
    if "restaurant" in t:
        return "The restaurant is on Level 2 (demo). Dinner is 7:00 PM – 10:30 PM."
    if "bar" in t and ("close" in t or "closing" in t):
        return "The bar closes at 11:00 PM (demo). Last order at 10:45 PM."
    if "towel" in t:
        return "Sure — I can place a request for extra towels. How many towels do you need?"
    return "I can help with menus, timings, facilities, and service requests. What would you like to know?"

def handle(mode, typed_text, audio):
    if mode == "Voice":
        text = transcribe(audio)
        if not text:
            return "", "❌ Couldn’t understand audio. Please try again."
        return text, concierge_reply(text)
    else:
        text = (typed_text or "").strip()
        if not text:
            return "", "❌ Please type a question."
        return "", concierge_reply(text)

with gr.Blocks(title="Guest Experience Agent (Text + Voice)") as demo:
    gr.Markdown("# 🏨 Guest Experience Agent (Prototype)\nText or Voice concierge for hotel info, menus, and requests.")

    mode = gr.Radio(["Text", "Voice"], value="Text", label="Interaction Mode")

    typed = gr.Textbox(label="Type your question", placeholder="e.g., Can I see the dinner menu?")
    audio = gr.Audio(sources=["microphone", "upload"], type="numpy", label="Speak your question (Voice mode)")

    btn = gr.Button("Ask", variant="primary")

    transcribed = gr.Textbox(label="Transcribed Text (Voice)", interactive=False)
    answer = gr.Textbox(label="Agent Response", lines=6, interactive=False)

    btn.click(handle, inputs=[mode, typed, audio], outputs=[transcribed, answer])

demo.launch()