import gradio as gr
import numpy as np
from transformers import pipeline

# Load ASR once (lightweight model for demos)
asr = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")

def transcribe(audio):
    """
    Gradio returns (sample_rate, numpy_array) when type="numpy"
    """
    if audio is None:
        return ""
    sr, y = audio
    if y is None or len(y) == 0:
        return ""

    # Convert to float32 mono
    y = y.astype(np.float32)
    if y.ndim == 2:  # stereo -> mono
        y = y.mean(axis=1)

    out = asr({"sampling_rate": sr, "raw": y})
    return out.get("text", "").strip()

def concierge_reply(user_text: str) -> str:
    t = (user_text or "").lower()
    if "menu" in t or "dinner" in t:
        return "Here is the dinner menu (demo). Would you like vegetarian or non-veg options?"
    if "restaurant" in t:
        return "The restaurant is on Level 2 (demo). Dinner is 7:00 PM – 10:30 PM."
    if "bar" in t and ("close" in t or "closing" in t):
        return "The bar closes at 11:00 PM (demo). Last order at 10:45 PM."
    if "towel" in t:
        return "Sure — I can place a request for extra towels. How many towels do you need?"
    return "I can help with menus, timings, facilities, and service requests. What would you like to know?"

def handle(mode, typed_text, audio):
    if mode == "Voice":
        text = transcribe(audio)
        if not text:
            return "", "❌ Couldn’t understand audio. Please try again."
        return text, concierge_reply(text)
    else:
        text = (typed_text or "").strip()
        if not text:
            return "", "❌ Please type a question."
        return "", concierge_reply(text)

with gr.Blocks(title="Guest Experience Agent (Text + Voice)") as demo:
    gr.Markdown("# 🏨 Guest Experience Agent (Prototype)\nText or Voice concierge for hotel info, menus, and requests.")

    mode = gr.Radio(["Text", "Voice"], value="Text", label="Interaction Mode")

    typed = gr.Textbox(label="Type your question", placeholder="e.g., Can I see the dinner menu?")
    audio = gr.Audio(sources=["microphone", "upload"], type="numpy", label="Speak your question (Voice mode)")

    btn = gr.Button("Ask", variant="primary")

    transcribed = gr.Textbox(label="Transcribed Text (Voice)", interactive=False)
    answer = gr.Textbox(label="Agent Response", lines=6, interactive=False)

    btn.click(handle, inputs=[mode, typed, audio], outputs=[transcribed, answer])

demo.launch()