""" PregoPal - Gradio 界面构建(现代UI版) ====================================== 3 个子页面 + 中英文切换 + 全双工语音对话(通过 llama-server omni 后端) 全双工工作流: 用户点击录音 → Gradio Audio 组件录制 → voice_helper.chat_voice() → llama-server omni_init/prefill/decode → AI文本+TTS音频 → 显示在聊天框 + 播放语音 """ import datetime import gradio as gr import time import asyncio import traceback import numpy as np from modules.voiceprint import VoiceprintManager from modules.meal_recommender import MealRecommender from modules.diet_logger import DietLogger from modules.nutrition_analyzer import NutritionAnalyzer from modules.family_manager import RecipeManager, PreferenceManager, MemoryManager from modules.diet_extractor import DietExtractor from modules.nutrition_standards import DRIsParser from plugins.three_day_summary import ThreeDaySummaryPlugin from plugins.base import LoopContext from utils import ( t, ZH, EN, get_home_cards, CUSTOM_CSS, render_family_recipes_html, render_family_preferences_html, render_family_memories_html, render_nutrition_report_html, ) from api.voice_helper import chat_text, chat_voice, omni_status # ============================================================ # 全局实例(单例) # ============================================================ # ============================================================ # 全双工语音全局状态 # ============================================================ _DUPLEX_ACTIVE = False # True=正在全双工对话 _AUDIO_BUF = [] # streaming 音频缓冲 _AUDIO_SR = 16000 # 当前缓冲采样率 _SPEECH_SECONDS = 0.0 # 累计有效语音时长 _SILENT_SECONDS = 0.0 # 连续静音时长 _LAST_CHUNK_LOG = 0.0 _TARGET_SR = 16000 _VAD_RMS_THRESHOLD = 0.008 _MIN_SPEECH_SECONDS = 0.35 _MAX_UTTERANCE_SECONDS = 12.0 _SILENCE_TO_SEND_SECONDS = 0.85 voiceprint_mgr = VoiceprintManager() meal_recommender = MealRecommender() diet_logger = DietLogger() nutrition_analyzer = NutritionAnalyzer() # ============================================================ # 全双工语音会话(核心函数) # ============================================================ def _rms(chunk) -> float: if chunk is None or len(chunk) == 0: return 0.0 return float(np.sqrt(np.mean(np.square(chunk, dtype=np.float32)))) def _is_silent(chunk, thr=_VAD_RMS_THRESHOLD): return _rms(chunk) < thr def _reset_duplex_buffers(): global _AUDIO_BUF, _AUDIO_SR, _SPEECH_SECONDS, _SILENT_SECONDS _AUDIO_BUF = [] _AUDIO_SR = _TARGET_SR _SPEECH_SECONDS = 0.0 _SILENT_SECONDS = 0.0 def _keep_audio(): """Keep the current Gradio audio value instead of clearing playback.""" return gr.update() def toggle_duplex(*args): """点按切换全双工对话状态""" global _DUPLEX_ACTIVE _DUPLEX_ACTIVE = not _DUPLEX_ACTIVE if not _DUPLEX_ACTIVE: _reset_duplex_buffers() return _DUPLEX_ACTIVE def handle_stream_chunk(audio_chunk, chat_history): """处理 streaming 音频块: VAD -> 缓冲 -> 静音后调后端""" global _DUPLEX_ACTIVE, _AUDIO_BUF, _AUDIO_SR, _SPEECH_SECONDS, _SILENT_SECONDS, _LAST_CHUNK_LOG if not _DUPLEX_ACTIVE: return chat_history, _thinking_html("✅ 已就绪"), _keep_audio() if not chat_history: chat_history = [] audio_np = None sr = _TARGET_SR if audio_chunk is not None and isinstance(audio_chunk, tuple) and len(audio_chunk) == 2: sr, arr = audio_chunk if arr is None: return chat_history, _thinking_html("🎤 聆听中..."), _keep_audio() if arr.dtype.kind == 'i': audio_np = arr.astype(np.float32) / 32768.0 else: audio_np = arr.astype(np.float32) if len(audio_np.shape) > 1: audio_np = audio_np.mean(axis=1) if audio_np is None or len(audio_np) == 0: return chat_history, _thinking_html("🎤 聆听中..."), _keep_audio() _AUDIO_SR = int(sr or _TARGET_SR) chunk_seconds = len(audio_np) / max(float(_AUDIO_SR), 1.0) chunk_rms = _rms(audio_np) now = time.time() if now - _LAST_CHUNK_LOG > 2.0: print(f"[PregoPal Duplex] stream chunk sr={_AUDIO_SR} dur={chunk_seconds:.2f}s rms={chunk_rms:.4f}", flush=True) _LAST_CHUNK_LOG = now if _is_silent(audio_np): _SILENT_SECONDS += chunk_seconds if ( _SILENT_SECONDS >= _SILENCE_TO_SEND_SECONDS and _SPEECH_SECONDS >= _MIN_SPEECH_SECONDS and len(_AUDIO_BUF) > 0 ): full = np.concatenate(_AUDIO_BUF) buffered_sr = _AUDIO_SR print( f"[PregoPal Duplex] utterance ready dur={len(full)/max(buffered_sr,1):.2f}s sr={buffered_sr}", flush=True, ) _reset_duplex_buffers() return _call_duplex_backend(full, chat_history, sr=buffered_sr) return chat_history, _thinking_html("🎤 聆听中..."), _keep_audio() _SPEECH_SECONDS += chunk_seconds _SILENT_SECONDS = 0.0 _AUDIO_BUF.append(audio_np) total_seconds = sum(len(x) for x in _AUDIO_BUF) / max(float(_AUDIO_SR), 1.0) if total_seconds >= _MAX_UTTERANCE_SECONDS: full = np.concatenate(_AUDIO_BUF) buffered_sr = _AUDIO_SR print(f"[PregoPal Duplex] max utterance reached dur={total_seconds:.2f}s", flush=True) _reset_duplex_buffers() return _call_duplex_backend(full, chat_history, sr=buffered_sr) return chat_history, _thinking_html("🔊 正在听..."), _keep_audio() def _call_duplex_backend(audio_np, chat_history, sr=_TARGET_SR): try: import soundfile as sf, tempfile, os, base64 if len(audio_np.shape) > 1: audio_np = audio_np.mean(axis=1) if int(sr) != _TARGET_SR: try: import librosa audio_np = librosa.resample(audio_np, orig_sr=int(sr), target_sr=_TARGET_SR) sr = _TARGET_SR except Exception as e: print(f"[PregoPal Duplex] resample failed, using original sr={sr}: {e}", flush=True) audio_np = np.clip(audio_np.astype(np.float32), -1.0, 1.0) fd, path = tempfile.mkstemp(suffix=".wav"); os.close(fd) try: sf.write(path, audio_np, int(sr), format="WAV", subtype="PCM_16") result = chat_voice(path) finally: try: os.remove(path) except: pass if result.get("success"): print( f"[PregoPal Duplex] backend success round={result.get('round')} " f"audio_files={result.get('audio_files')} audio_b64_len={len(result.get('audio_base64') or '')}", flush=True, ) ai_text = result.get("text", "").strip() if not ai_text or len(ai_text) < 2: sp = "你是PregoPal孕期营养健康顾问。请用中文简短回答。" msgs = [{"role": "system", "content": sp}] for h in chat_history[-4:]: msgs.append({"role": "user", "content": h[0] if h[0] else "..."}) msgs.append({"role": "assistant", "content": h[1] if h[1] else "..."}) msgs.append({"role": "user", "content": "我刚刚和你说话(语音输入)"}) ai_text = chat_text(msgs) chat_history.append(["🗣️ 您", ai_text]) if "[EXTRACT_DIET]" in ai_text: from modules.diet_extractor import DietExtractor e = DietExtractor() extracted = e.extract_all(ai_text) diet_logger.log_diet(member_id="ai", member_name="AI识别", meals=extracted.get("meals", {}), notes=f"语音对话: {datetime.date.today()}") if result.get("audio_base64"): fd_out, out_path = tempfile.mkstemp(suffix=".wav") os.close(fd_out) with open(out_path, "wb") as f: f.write(base64.b64decode(result["audio_base64"])) return (chat_history, _thinking_html("🎤 聆听中..."), out_path) return chat_history, _thinking_html("🎤 聆听中..."), _keep_audio() else: return chat_history, _thinking_html("🎤 聆听中..."), _keep_audio() except Exception as e: traceback.print_exc() return chat_history, _thinking_html(f"错误: {str(e)[:50]}"), _keep_audio() def _thinking_html(text: str) -> str: """AI 思考状态的 HTML""" color = "#6A1B9A" if "思考" in text or "Thinking" in text else "#888" animation = "" if "思考" in text or "Thinking" in text or "聆听" in text or "Listening" in text: animation = '.'.replace(".", '.') return f"""
{text}
""" # ============================================================ # 工具:首页语言文本 # ============================================================ def _home_texts(lang): """首页语言文本""" if lang == "zh": return { "title": "# 🌸 欢迎来到 PregoPal", "subtitle": "### 你的孕期AI伴侣,时刻陪伴在你身边", "tap_speak": "🎙️ 点击说话", "tap_hint": "🎤 按住录音,松开发送", "trimester_label": "孕期阶段", "nutrition_focus": "营养关注", "today_diet": "昨日饮食", "family_recipe": "家庭菜谱", "weight_label": "体重管理", "thinking_label": "💬 AI 对话", "thinking_waiting": "等待对话中...", "recent_title": "最近的饮食记录", "meal_count_unit": "餐", "no_data": "暂无数据", "no_record": "还没有记录,开始对话吧!", "recipes_unit": "道家常菜", "no_recipe": "还没有菜谱", } else: return { "title": "# 🌸 Welcome to PregoPal", "subtitle": "### Your AI pregnancy companion, always by your side", "tap_speak": "🎙️ Tap to Speak", "tap_hint": "🎤 Hold to record, release to send", "trimester_label": "Trimester", "nutrition_focus": "Nutrition Focus", "today_diet": "Yesterday's Diet", "family_recipe": "Family Recipes", "weight_label": "Weight", "thinking_label": "💬 AI Chat", "thinking_waiting": "Waiting for conversation...", "recent_title": "Recent Diet Records", "meal_count_unit": " meals", "no_data": "No data", "no_record": "No records yet. Start a conversation!", "recipes_unit": " recipes", "no_recipe": "No recipes yet", } # ============================================================ # 工具:最近记录渲染为 HTML 表格 # ============================================================ def _render_recent_records_html(records, lang="zh"): """用 HTML 表格展示最近饮食记录""" if not records: T = _home_texts(lang) return f'
{T["no_record"]}
' rows = "" for r in records[-6:]: date = r.get("date", "")[-5:] if r.get("date") else "" member = r.get("member_name", "") or "" meals = r.get("meals", {}) meal_parts = [] for meal_time, food in meals.items(): meal_parts.append(f"{meal_time} {food}") meal_str = " | ".join(meal_parts) if meal_parts else "—" rows += f""" {date} {member} {meal_str} """ label_date = "日期" if lang == "zh" else "Date" label_member = "成员" if lang == "zh" else "Member" label_meals = "餐食" if lang == "zh" else "Meals" return f"""
{rows}
{label_date} {label_member} {label_meals}
""" # ============================================================ # Tab 1: 🏠 首页内容 # ============================================================ def _home_content(loop, lang): """首页组件 — 含全双工语音对话""" T = _home_texts(lang) cards = get_home_cards(loop) # 营养关注 nutrition_display = "、".join(cards["focus_nutrients"][:5]) if cards["focus_nutrients"] else T["no_data"] if cards["recommended_foods"]: food_str = "、".join(cards["recommended_foods"][:5]) nutrition_display += f"\n\n🍽️ 推荐:{food_str}" # 昨日饮食 diet_display = cards["yesterday_summary"] if cards["meal_count"] > 0: diet_display = f"{diet_display}\n({cards['meal_count']}{T['meal_count_unit']})" # 家庭菜谱 if cards["recipe_count"] > 0: recipe_display = f"{cards['recipe_count']}{T['recipes_unit']}:{'、'.join(cards['recipe_names'])}" else: recipe_display = T["no_recipe"] # 体重 weight_display = cards["weight_status"] if cards["weight_trend"]: weight_display += f" | {cards['weight_trend']}" records = diet_logger.get_recent_records(days=3) recent_html = _render_recent_records_html(records, lang) with gr.Column(elem_classes=["home-container"]): gr.Markdown(T["title"]) gr.Markdown(T["subtitle"]) # 全双工语音对话区 — Gradio Microphone streaming gr.HTML("""""") with gr.Group(elem_classes=["voice-duplex-panel"]): with gr.Row(): # 左侧:控制按钮 + Gradio microphone streaming + 回复播放 with gr.Column(scale=1, min_width=320): duplex_btn = gr.Button( "🎙️ 点击开始全双工对话", elem_classes=["duplex-btn"], variant="primary", size="lg", ) audio_input = gr.Microphone( type="numpy", streaming=True, label="", show_label=False, container=True, elem_classes=["voice-input"], recording=False, interactive=True, ) audio_output = gr.Audio( type="filepath", autoplay=True, visible=True, label="🔊 AI 回复", show_label=False, container=True, elem_classes=["voice-output"], ) # 右侧:对话显示 with gr.Column(scale=2, min_width=400): chat_box = gr.Chatbot( label=T.get("thinking_label", "💬 AI 对话"), height=320, show_label=True, elem_classes=["chat-box"], ) # 状态显示 thinking_display = gr.HTML( _thinking_html("🔄 点击「开始对话」进入全双工语音交互" if lang == "zh" else "🔄 Tap 'Start' for duplex voice") ) # 卡片行 1 with gr.Row(): with gr.Column(scale=1): with gr.Group(elem_classes=["home-card", "card-trimester"]): gr.Markdown(f"### 📅 {T['trimester_label']}") gr.Markdown(f"**{cards['trimester']}**") with gr.Column(scale=1): with gr.Group(elem_classes=["home-card", "card-nutrition"]): gr.Markdown(f"### 🥗 {T['nutrition_focus']}") gr.Markdown(nutrition_display) with gr.Column(scale=1): with gr.Group(elem_classes=["home-card", "card-diet"]): gr.Markdown(f"### 🍽️ {T['today_diet']}") gr.Markdown(diet_display) # 卡片行 2 with gr.Row(): with gr.Column(scale=1): with gr.Group(elem_classes=["home-card", "card-recipe"]): gr.Markdown(f"### 🍳 {T['family_recipe']}") gr.Markdown(recipe_display) with gr.Column(scale=1): with gr.Group(elem_classes=["home-card", "card-weight"]): gr.Markdown(f"### ⚖️ {T['weight_label']}") gr.Markdown(weight_display) # 最近饮食记录 with gr.Group(elem_classes=["home-card"]): gr.Markdown(f"### 📋 {T['recent_title']}") gr.HTML(recent_html) # ── 事件绑定(全双工)── def toggle_duplex_v2(state): global _DUPLEX_ACTIVE _DUPLEX_ACTIVE = not _DUPLEX_ACTIVE if not _DUPLEX_ACTIVE: _reset_duplex_buffers() return ( gr.update(value="🎙️ 点击开始全双工对话"), _thinking_html("✅ 已退出全双工模式" if lang == "zh" else "✅ Duplex ended"), gr.update(recording=False), False, ) _reset_duplex_buffers() return ( gr.update(value="🔴 结束全双工对话"), _thinking_html("🎤 正在持续聆听,说完停顿一下我会自动回复" if lang == "zh" else "🎤 Listening continuously"), gr.update(recording=True), True, ) duplex_state = gr.State(value=False) duplex_btn.click( fn=toggle_duplex_v2, inputs=[duplex_state], outputs=[duplex_btn, thinking_display, audio_input, duplex_state], queue=False, ) # streaming VAD audio_input.stream( fn=handle_stream_chunk, inputs=[audio_input, chat_box], outputs=[chat_box, thinking_display, audio_output], stream_every=0.3, ) # ============================================================ # Tab 2: 👨‍👩‍👧‍👦 家庭饮食习惯 # ============================================================ def _family_content(lang): """家庭饮食习惯组件 — 纯展示模式""" with gr.Column(elem_classes=["glass-card"]): gr.Markdown(f"## {t('family_title', lang)}") gr.Markdown(f"*{t('family_subtitle', lang)}*") with gr.Tabs(selected=0): with gr.Tab(t("tab_recipes", lang)): recipes_html = gr.HTML(render_family_recipes_html(lang)) refresh_recipe_btn = gr.Button(t("btn_refresh", lang), size="sm") refresh_recipe_btn.click( fn=lambda: render_family_recipes_html(lang), outputs=recipes_html, ) with gr.Tab(t("tab_preferences", lang)): prefs_html = gr.HTML(render_family_preferences_html(lang)) refresh_pref_btn = gr.Button(t("btn_refresh", lang), size="sm") refresh_pref_btn.click( fn=lambda: render_family_preferences_html(lang), outputs=prefs_html, ) with gr.Tab(t("tab_memory", lang)): mem_html = gr.HTML(render_family_memories_html(lang)) refresh_mem_btn = gr.Button(t("btn_refresh", lang), size="sm") refresh_mem_btn.click( fn=lambda: render_family_memories_html(lang), outputs=mem_html, ) # ============================================================ # Tab 3: 📈 营养报告 # ============================================================ def _report_content(lang): """营养报告组件""" _last_change = [0.0] with gr.Column(): with gr.Row(): with gr.Column(scale=2, min_width=200): analysis_days = gr.Slider( label=t("analysis_days", lang), minimum=1, maximum=30, value=7, step=1, ) with gr.Column(scale=1, min_width=160): trimester_select = gr.Radio( label=t("trimester_label", lang), choices=ZH["trimester_choices"] if lang == "zh" else EN["trimester_choices"], value=ZH["trimester_choices"][1] if lang == "zh" else EN["trimester_choices"][1], ) report_dashboard = gr.HTML( f'
' f'调整上方参数自动生成报告
' ) def generate_full_report(days, trimester): records = diet_logger.get_recent_records(days=int(days)) analysis = nutrition_analyzer.analyze_diet(records) try: ctx = LoopContext() ctx.briefing["trimester"] = trimester asyncio.run(ThreeDaySummaryPlugin().run(ctx)) deficit_data = ctx.briefing.get("three_day_summary", {}) except Exception: deficit_data = {} return render_nutrition_report_html(analysis, days=int(days), lang=lang, deficit_data=deficit_data) def on_slider_change(days, trimester): _last_change[0] = time.time() time.sleep(0.8) if time.time() - _last_change[0] < 0.9: return generate_full_report(days, trimester) return gr.skip() analysis_days.change( fn=on_slider_change, inputs=[analysis_days, trimester_select], outputs=[report_dashboard], ) trimester_select.change( fn=on_slider_change, inputs=[analysis_days, trimester_select], outputs=[report_dashboard], ) # ============================================================ # 主入口 # ============================================================ def create_app(loop=None): """创建主应用(3 Tab + 中英文切换 + 全双工语音)""" with gr.Blocks( title="PregoPal - 孕期陪护AI助手", ) as demo: lang_state = gr.State(value="zh") # 顶部标题 with gr.Row(): with gr.Column(scale=4): gr.HTML("""

🌸 PregoPal

孕期陪护AI助手 — 全双工语音对话
Pregnancy Companion AI — Full Duplex Voice Chat

""") with gr.Column(scale=1, min_width=120): lang_selector = gr.Radio( label="🌐 Language", choices=["中文", "English"], value="中文", interactive=True, ) def switch_lang(choice): return "zh" if choice == "中文" else "en" lang_selector.change(fn=switch_lang, inputs=[lang_selector], outputs=[lang_state]) @gr.render(inputs=[lang_state]) def render_all(lang): with gr.Tabs(): with gr.Tab(f"🏠 {t('tab_home_suffix', lang)}", id="tab_home"): _home_content(loop, lang) with gr.Tab(f"👨‍👩‍👧‍👦 {t('tab_family_suffix', lang)}", id="tab_family"): _family_content(lang) with gr.Tab(f"📈 {t('tab_report_suffix', lang)}", id="tab_report"): _report_content(lang) return demo