import gradio as gr import numpy as np from transformers import pipeline # Load ASR once (lightweight model for demos) asr = pipeline("automatic-speech-recognition", model="openai/whisper-tiny") def transcribe(audio): """ Gradio returns (sample_rate, numpy_array) when type="numpy" """ if audio is None: return "" sr, y = audio if y is None or len(y) == 0: return "" # Convert to float32 mono y = y.astype(np.float32) if y.ndim == 2: # stereo -> mono y = y.mean(axis=1) out = asr({"sampling_rate": sr, "raw": y}) return out.get("text", "").strip() def concierge_reply(user_text: str) -> str: t = (user_text or "").lower() if "menu" in t or "dinner" in t: return "Here is the dinner menu (demo). Would you like vegetarian or non-veg options?" if "restaurant" in t: return "The restaurant is on Level 2 (demo). Dinner is 7:00 PM – 10:30 PM." if "bar" in t and ("close" in t or "closing" in t): return "The bar closes at 11:00 PM (demo). Last order at 10:45 PM." if "towel" in t: return "Sure — I can place a request for extra towels. How many towels do you need?" return "I can help with menus, timings, facilities, and service requests. What would you like to know?" def handle(mode, typed_text, audio): if mode == "Voice": text = transcribe(audio) if not text: return "", "❌ Couldn’t understand audio. Please try again." return text, concierge_reply(text) else: text = (typed_text or "").strip() if not text: return "", "❌ Please type a question." return "", concierge_reply(text) with gr.Blocks(title="Guest Experience Agent (Text + Voice)") as demo: gr.Markdown("# 🏨 Guest Experience Agent (Prototype)\nText or Voice concierge for hotel info, menus, and requests.") mode = gr.Radio(["Text", "Voice"], value="Text", label="Interaction Mode") typed = gr.Textbox(label="Type your question", placeholder="e.g., Can I see the dinner menu?") audio = gr.Audio(sources=["microphone", "upload"], type="numpy", label="Speak your question (Voice mode)") btn = gr.Button("Ask", variant="primary") transcribed = gr.Textbox(label="Transcribed Text (Voice)", interactive=False) answer = gr.Textbox(label="Agent Response", lines=6, interactive=False) btn.click(handle, inputs=[mode, typed, audio], outputs=[transcribed, answer]) demo.launch()