MBG0903 commited on
Commit
21ca779
·
verified ·
1 Parent(s): 5b6409a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -0
app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from faster_whisper import WhisperModel
3
+
4
+ # ---- Load Whisper once (choose "tiny" for speed; "base" for better accuracy) ----
5
+ ASR_MODEL_SIZE = "tiny"
6
+ asr_model = WhisperModel(ASR_MODEL_SIZE, device="cpu", compute_type="int8")
7
+
8
+ def transcribe_audio(audio_path: str) -> str:
9
+ if not audio_path:
10
+ return ""
11
+ segments, _info = asr_model.transcribe(audio_path, vad_filter=True)
12
+ text = " ".join(seg.text.strip() for seg in segments).strip()
13
+ return text
14
+
15
+ # ---- Your concierge brain (v1 = rule/KB; later connect OpenAI/RAG) ----
16
+ def generate_reply(user_text: str) -> str:
17
+ t = (user_text or "").lower()
18
+ if "restaurant" in t or "dinner" in t or "menu" in t:
19
+ return (
20
+ "Restaurant info:\n"
21
+ "• Dinner: 7:00 PM – 10:30 PM\n"
22
+ "• You can view today’s dinner menu here (demo): Veg options, grills, noodles, desserts.\n"
23
+ "Would you like vegetarian items or something spicy?"
24
+ )
25
+ if "bar" in t or "close" in t:
26
+ return "The bar closes at 11:00 PM (demo). Would you like the last-order time as well?"
27
+ if "towel" in t:
28
+ return "Sure — I can place a request for extra towels. How many towels do you need?"
29
+ return "I can help with timings, directions, menus, and service requests. What would you like to know?"
30
+
31
+ def handle_concierge(mode: str, text_input: str, audio_input: str):
32
+ if mode == "Voice":
33
+ user_text = transcribe_audio(audio_input)
34
+ if not user_text:
35
+ return "", "❌ I couldn't hear anything. Please try again.", ""
36
+ reply = generate_reply(user_text)
37
+ return user_text, reply, reply
38
+ else:
39
+ user_text = (text_input or "").strip()
40
+ if not user_text:
41
+ return "", "❌ Please type a question.", ""
42
+ reply = generate_reply(user_text)
43
+ return user_text, reply, reply
44
+
45
+ with gr.Blocks(title="AI Digital Concierge (Text + Voice)") as demo:
46
+ gr.Markdown("# 🏨 AI Digital Concierge (Prototype)\nText or Voice — ask about facilities, menus, timings, and requests.")
47
+
48
+ mode = gr.Radio(["Text", "Voice"], value="Text", label="Choose interaction mode")
49
+
50
+ # Inputs
51
+ text_in = gr.Textbox(label="Type your question (Text mode)", placeholder="e.g., Can I see the dinner menu?")
52
+ audio_in = gr.Audio(label="Speak your question (Voice mode)", sources=["microphone", "upload"], type="filepath")
53
+
54
+ ask_btn = gr.Button("Ask Concierge", variant="primary")
55
+
56
+ # Outputs
57
+ transcribed = gr.Textbox(label="Transcribed text (Voice mode)", interactive=False)
58
+ response = gr.Textbox(label="Concierge Response", lines=6, interactive=False, elem_id="agent_response")
59
+ response_state = gr.State("") # holds latest response for speak button
60
+
61
+ # Browser-side TTS (no server TTS)
62
+ gr.HTML("""
63
+ <div style="margin-top: 8px;">
64
+ <button
65
+ style="padding:10px 14px; border-radius:10px; border:1px solid #ccc; cursor:pointer;"
66
+ onclick="
67
+ const t = document.getElementById('agent_response')?.value || '';
68
+ if (!t) { alert('No response to read yet.'); return; }
69
+ window.speechSynthesis.cancel();
70
+ const u = new SpeechSynthesisUtterance(t);
71
+ u.rate = 1.0; u.pitch = 1.0; u.lang = 'en-US';
72
+ window.speechSynthesis.speak(u);
73
+ "
74
+ >
75
+ 🔊 Speak Response
76
+ </button>
77
+ <button
78
+ style="padding:10px 14px; border-radius:10px; border:1px solid #ccc; cursor:pointer; margin-left:8px;"
79
+ onclick="window.speechSynthesis.cancel();"
80
+ >
81
+ ⏹ Stop
82
+ </button>
83
+ <div style="font-size:12px; color:#666; margin-top:6px;">
84
+ Voice output uses your browser’s speech engine (fast, no server load).
85
+ </div>
86
+ </div>
87
+ """)
88
+
89
+ ask_btn.click(
90
+ handle_concierge,
91
+ inputs=[mode, text_in, audio_in],
92
+ outputs=[transcribed, response, response_state],
93
+ )
94
+
95
+ demo.launch()