MBG0903's picture
Update app.py
0ab017b verified
import gradio as gr
import numpy as np
from transformers import pipeline
# Load ASR once (lightweight model for demos)
asr = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
def transcribe(audio):
"""
Gradio returns (sample_rate, numpy_array) when type="numpy"
"""
if audio is None:
return ""
sr, y = audio
if y is None or len(y) == 0:
return ""
# Convert to float32 mono
y = y.astype(np.float32)
if y.ndim == 2: # stereo -> mono
y = y.mean(axis=1)
out = asr({"sampling_rate": sr, "raw": y})
return out.get("text", "").strip()
def concierge_reply(user_text: str) -> str:
t = (user_text or "").lower()
if "menu" in t or "dinner" in t:
return "Here is the dinner menu (demo). Would you like vegetarian or non-veg options?"
if "restaurant" in t:
return "The restaurant is on Level 2 (demo). Dinner is 7:00 PM – 10:30 PM."
if "bar" in t and ("close" in t or "closing" in t):
return "The bar closes at 11:00 PM (demo). Last order at 10:45 PM."
if "towel" in t:
return "Sure — I can place a request for extra towels. How many towels do you need?"
return "I can help with menus, timings, facilities, and service requests. What would you like to know?"
def handle(mode, typed_text, audio):
if mode == "Voice":
text = transcribe(audio)
if not text:
return "", "❌ Couldn’t understand audio. Please try again."
return text, concierge_reply(text)
else:
text = (typed_text or "").strip()
if not text:
return "", "❌ Please type a question."
return "", concierge_reply(text)
with gr.Blocks(title="Guest Experience Agent (Text + Voice)") as demo:
gr.Markdown("# 🏨 Guest Experience Agent (Prototype)\nText or Voice concierge for hotel info, menus, and requests.")
mode = gr.Radio(["Text", "Voice"], value="Text", label="Interaction Mode")
typed = gr.Textbox(label="Type your question", placeholder="e.g., Can I see the dinner menu?")
audio = gr.Audio(sources=["microphone", "upload"], type="numpy", label="Speak your question (Voice mode)")
btn = gr.Button("Ask", variant="primary")
transcribed = gr.Textbox(label="Transcribed Text (Voice)", interactive=False)
answer = gr.Textbox(label="Agent Response", lines=6, interactive=False)
btn.click(handle, inputs=[mode, typed, audio], outputs=[transcribed, answer])
demo.launch()