PregoPal / ui /app_builder.py
J.B-Lin
Fix Gradio duplex audio playback clearing
0598330
Raw
History Blame Contribute Delete
25.9 kB
"""
PregoPal - Gradio 界面构建(现代UI版)
======================================
3 个子页面 + 中英文切换 + 全双工语音对话(通过 llama-server omni 后端)
全双工工作流:
用户点击录音 → Gradio Audio 组件录制 → voice_helper.chat_voice() →
llama-server omni_init/prefill/decode → AI文本+TTS音频 →
显示在聊天框 + 播放语音
"""
import datetime
import gradio as gr
import time
import asyncio
import traceback
import numpy as np
from modules.voiceprint import VoiceprintManager
from modules.meal_recommender import MealRecommender
from modules.diet_logger import DietLogger
from modules.nutrition_analyzer import NutritionAnalyzer
from modules.family_manager import RecipeManager, PreferenceManager, MemoryManager
from modules.diet_extractor import DietExtractor
from modules.nutrition_standards import DRIsParser
from plugins.three_day_summary import ThreeDaySummaryPlugin
from plugins.base import LoopContext
from utils import (
t, ZH, EN, get_home_cards, CUSTOM_CSS,
render_family_recipes_html,
render_family_preferences_html,
render_family_memories_html,
render_nutrition_report_html,
)
from api.voice_helper import chat_text, chat_voice, omni_status
# ============================================================
# 全局实例(单例)
# ============================================================
# ============================================================
# 全双工语音全局状态
# ============================================================
_DUPLEX_ACTIVE = False # True=正在全双工对话
_AUDIO_BUF = [] # streaming 音频缓冲
_AUDIO_SR = 16000 # 当前缓冲采样率
_SPEECH_SECONDS = 0.0 # 累计有效语音时长
_SILENT_SECONDS = 0.0 # 连续静音时长
_LAST_CHUNK_LOG = 0.0
_TARGET_SR = 16000
_VAD_RMS_THRESHOLD = 0.008
_MIN_SPEECH_SECONDS = 0.35
_MAX_UTTERANCE_SECONDS = 12.0
_SILENCE_TO_SEND_SECONDS = 0.85
voiceprint_mgr = VoiceprintManager()
meal_recommender = MealRecommender()
diet_logger = DietLogger()
nutrition_analyzer = NutritionAnalyzer()
# ============================================================
# 全双工语音会话(核心函数)
# ============================================================
def _rms(chunk) -> float:
if chunk is None or len(chunk) == 0:
return 0.0
return float(np.sqrt(np.mean(np.square(chunk, dtype=np.float32))))
def _is_silent(chunk, thr=_VAD_RMS_THRESHOLD):
return _rms(chunk) < thr
def _reset_duplex_buffers():
global _AUDIO_BUF, _AUDIO_SR, _SPEECH_SECONDS, _SILENT_SECONDS
_AUDIO_BUF = []
_AUDIO_SR = _TARGET_SR
_SPEECH_SECONDS = 0.0
_SILENT_SECONDS = 0.0
def _keep_audio():
"""Keep the current Gradio audio value instead of clearing playback."""
return gr.update()
def toggle_duplex(*args):
"""点按切换全双工对话状态"""
global _DUPLEX_ACTIVE
_DUPLEX_ACTIVE = not _DUPLEX_ACTIVE
if not _DUPLEX_ACTIVE:
_reset_duplex_buffers()
return _DUPLEX_ACTIVE
def handle_stream_chunk(audio_chunk, chat_history):
"""处理 streaming 音频块: VAD -> 缓冲 -> 静音后调后端"""
global _DUPLEX_ACTIVE, _AUDIO_BUF, _AUDIO_SR, _SPEECH_SECONDS, _SILENT_SECONDS, _LAST_CHUNK_LOG
if not _DUPLEX_ACTIVE:
return chat_history, _thinking_html("✅ 已就绪"), _keep_audio()
if not chat_history:
chat_history = []
audio_np = None
sr = _TARGET_SR
if audio_chunk is not None and isinstance(audio_chunk, tuple) and len(audio_chunk) == 2:
sr, arr = audio_chunk
if arr is None:
return chat_history, _thinking_html("🎤 聆听中..."), _keep_audio()
if arr.dtype.kind == 'i':
audio_np = arr.astype(np.float32) / 32768.0
else:
audio_np = arr.astype(np.float32)
if len(audio_np.shape) > 1:
audio_np = audio_np.mean(axis=1)
if audio_np is None or len(audio_np) == 0:
return chat_history, _thinking_html("🎤 聆听中..."), _keep_audio()
_AUDIO_SR = int(sr or _TARGET_SR)
chunk_seconds = len(audio_np) / max(float(_AUDIO_SR), 1.0)
chunk_rms = _rms(audio_np)
now = time.time()
if now - _LAST_CHUNK_LOG > 2.0:
print(f"[PregoPal Duplex] stream chunk sr={_AUDIO_SR} dur={chunk_seconds:.2f}s rms={chunk_rms:.4f}", flush=True)
_LAST_CHUNK_LOG = now
if _is_silent(audio_np):
_SILENT_SECONDS += chunk_seconds
if (
_SILENT_SECONDS >= _SILENCE_TO_SEND_SECONDS
and _SPEECH_SECONDS >= _MIN_SPEECH_SECONDS
and len(_AUDIO_BUF) > 0
):
full = np.concatenate(_AUDIO_BUF)
buffered_sr = _AUDIO_SR
print(
f"[PregoPal Duplex] utterance ready dur={len(full)/max(buffered_sr,1):.2f}s sr={buffered_sr}",
flush=True,
)
_reset_duplex_buffers()
return _call_duplex_backend(full, chat_history, sr=buffered_sr)
return chat_history, _thinking_html("🎤 聆听中..."), _keep_audio()
_SPEECH_SECONDS += chunk_seconds
_SILENT_SECONDS = 0.0
_AUDIO_BUF.append(audio_np)
total_seconds = sum(len(x) for x in _AUDIO_BUF) / max(float(_AUDIO_SR), 1.0)
if total_seconds >= _MAX_UTTERANCE_SECONDS:
full = np.concatenate(_AUDIO_BUF)
buffered_sr = _AUDIO_SR
print(f"[PregoPal Duplex] max utterance reached dur={total_seconds:.2f}s", flush=True)
_reset_duplex_buffers()
return _call_duplex_backend(full, chat_history, sr=buffered_sr)
return chat_history, _thinking_html("🔊 正在听..."), _keep_audio()
def _call_duplex_backend(audio_np, chat_history, sr=_TARGET_SR):
try:
import soundfile as sf, tempfile, os, base64
if len(audio_np.shape) > 1:
audio_np = audio_np.mean(axis=1)
if int(sr) != _TARGET_SR:
try:
import librosa
audio_np = librosa.resample(audio_np, orig_sr=int(sr), target_sr=_TARGET_SR)
sr = _TARGET_SR
except Exception as e:
print(f"[PregoPal Duplex] resample failed, using original sr={sr}: {e}", flush=True)
audio_np = np.clip(audio_np.astype(np.float32), -1.0, 1.0)
fd, path = tempfile.mkstemp(suffix=".wav"); os.close(fd)
try:
sf.write(path, audio_np, int(sr), format="WAV", subtype="PCM_16")
result = chat_voice(path)
finally:
try: os.remove(path)
except: pass
if result.get("success"):
print(
f"[PregoPal Duplex] backend success round={result.get('round')} "
f"audio_files={result.get('audio_files')} audio_b64_len={len(result.get('audio_base64') or '')}",
flush=True,
)
ai_text = result.get("text", "").strip()
if not ai_text or len(ai_text) < 2:
sp = "你是PregoPal孕期营养健康顾问。请用中文简短回答。"
msgs = [{"role": "system", "content": sp}]
for h in chat_history[-4:]:
msgs.append({"role": "user", "content": h[0] if h[0] else "..."})
msgs.append({"role": "assistant", "content": h[1] if h[1] else "..."})
msgs.append({"role": "user", "content": "我刚刚和你说话(语音输入)"})
ai_text = chat_text(msgs)
chat_history.append(["🗣️ 您", ai_text])
if "[EXTRACT_DIET]" in ai_text:
from modules.diet_extractor import DietExtractor
e = DietExtractor()
extracted = e.extract_all(ai_text)
diet_logger.log_diet(member_id="ai", member_name="AI识别",
meals=extracted.get("meals", {}),
notes=f"语音对话: {datetime.date.today()}")
if result.get("audio_base64"):
fd_out, out_path = tempfile.mkstemp(suffix=".wav")
os.close(fd_out)
with open(out_path, "wb") as f:
f.write(base64.b64decode(result["audio_base64"]))
return (chat_history, _thinking_html("🎤 聆听中..."), out_path)
return chat_history, _thinking_html("🎤 聆听中..."), _keep_audio()
else:
return chat_history, _thinking_html("🎤 聆听中..."), _keep_audio()
except Exception as e:
traceback.print_exc()
return chat_history, _thinking_html(f"错误: {str(e)[:50]}"), _keep_audio()
def _thinking_html(text: str) -> str:
"""AI 思考状态的 HTML"""
color = "#6A1B9A" if "思考" in text or "Thinking" in text else "#888"
animation = ""
if "思考" in text or "Thinking" in text or "聆听" in text or "Listening" in text:
animation = '<span class="thinking-dot">.</span>'.replace(".",
'<span style="animation:blink 1.4s infinite">.</span>')
return f"""
<div class="thinking-box">
<div style="font-weight:600;color:{color};">{text}</div>
</div>
"""
# ============================================================
# 工具:首页语言文本
# ============================================================
def _home_texts(lang):
"""首页语言文本"""
if lang == "zh":
return {
"title": "# 🌸 欢迎来到 PregoPal",
"subtitle": "### 你的孕期AI伴侣,时刻陪伴在你身边",
"tap_speak": "🎙️ 点击说话",
"tap_hint": "🎤 按住录音,松开发送",
"trimester_label": "孕期阶段",
"nutrition_focus": "营养关注",
"today_diet": "昨日饮食",
"family_recipe": "家庭菜谱",
"weight_label": "体重管理",
"thinking_label": "💬 AI 对话",
"thinking_waiting": "等待对话中...",
"recent_title": "最近的饮食记录",
"meal_count_unit": "餐",
"no_data": "暂无数据",
"no_record": "还没有记录,开始对话吧!",
"recipes_unit": "道家常菜",
"no_recipe": "还没有菜谱",
}
else:
return {
"title": "# 🌸 Welcome to PregoPal",
"subtitle": "### Your AI pregnancy companion, always by your side",
"tap_speak": "🎙️ Tap to Speak",
"tap_hint": "🎤 Hold to record, release to send",
"trimester_label": "Trimester",
"nutrition_focus": "Nutrition Focus",
"today_diet": "Yesterday's Diet",
"family_recipe": "Family Recipes",
"weight_label": "Weight",
"thinking_label": "💬 AI Chat",
"thinking_waiting": "Waiting for conversation...",
"recent_title": "Recent Diet Records",
"meal_count_unit": " meals",
"no_data": "No data",
"no_record": "No records yet. Start a conversation!",
"recipes_unit": " recipes",
"no_recipe": "No recipes yet",
}
# ============================================================
# 工具:最近记录渲染为 HTML 表格
# ============================================================
def _render_recent_records_html(records, lang="zh"):
"""用 HTML 表格展示最近饮食记录"""
if not records:
T = _home_texts(lang)
return f'<div style="color:#999;padding:12px;text-align:center;font-size:15px;">{T["no_record"]}</div>'
rows = ""
for r in records[-6:]:
date = r.get("date", "")[-5:] if r.get("date") else ""
member = r.get("member_name", "") or ""
meals = r.get("meals", {})
meal_parts = []
for meal_time, food in meals.items():
meal_parts.append(f"<b>{meal_time}</b>&nbsp;{food}")
meal_str = "&nbsp;|&nbsp;".join(meal_parts) if meal_parts else "—"
rows += f"""
<tr>
<td style="white-space:nowrap;color:#E91E63;font-weight:500;">{date}</td>
<td style="white-space:nowrap;">{member}</td>
<td>{meal_str}</td>
</tr>"""
label_date = "日期" if lang == "zh" else "Date"
label_member = "成员" if lang == "zh" else "Member"
label_meals = "餐食" if lang == "zh" else "Meals"
return f"""
<div style="overflow-x:auto;">
<table style="width:100%;border-collapse:collapse;font-size:14px;font-family:inherit;">
<thead>
<tr style="background:#FCE4EC;color:#880E4F;">
<th style="padding:10px 12px;text-align:left;border-radius:8px 0 0 0;">{label_date}</th>
<th style="padding:10px 12px;text-align:left;">{label_member}</th>
<th style="padding:10px 12px;text-align:left;border-radius:0 8px 0 0;">{label_meals}</th>
</tr>
</thead>
<tbody>
{rows}
</tbody>
</table>
</div>
"""
# ============================================================
# Tab 1: 🏠 首页内容
# ============================================================
def _home_content(loop, lang):
"""首页组件 — 含全双工语音对话"""
T = _home_texts(lang)
cards = get_home_cards(loop)
# 营养关注
nutrition_display = "、".join(cards["focus_nutrients"][:5]) if cards["focus_nutrients"] else T["no_data"]
if cards["recommended_foods"]:
food_str = "、".join(cards["recommended_foods"][:5])
nutrition_display += f"\n\n🍽️ 推荐:{food_str}"
# 昨日饮食
diet_display = cards["yesterday_summary"]
if cards["meal_count"] > 0:
diet_display = f"{diet_display}\n({cards['meal_count']}{T['meal_count_unit']})"
# 家庭菜谱
if cards["recipe_count"] > 0:
recipe_display = f"{cards['recipe_count']}{T['recipes_unit']}{'、'.join(cards['recipe_names'])}"
else:
recipe_display = T["no_recipe"]
# 体重
weight_display = cards["weight_status"]
if cards["weight_trend"]:
weight_display += f" | {cards['weight_trend']}"
records = diet_logger.get_recent_records(days=3)
recent_html = _render_recent_records_html(records, lang)
with gr.Column(elem_classes=["home-container"]):
gr.Markdown(T["title"])
gr.Markdown(T["subtitle"])
# 全双工语音对话区 — Gradio Microphone streaming
gr.HTML("""<style>
.voice-duplex-panel {
background: linear-gradient(135deg, #fff7ed 0%, #f0fdfa 55%, #eef2ff 100%);
border: 1px solid rgba(15, 23, 42, 0.08);
border-radius: 18px;
padding: 18px;
margin: 12px 0;
box-shadow: 0 18px 40px rgba(15, 23, 42, 0.08);
}
.duplex-btn {
font-size: 1.05rem !important;
font-weight: 800 !important;
min-height: 52px !important;
}
.voice-input {
border-radius: 16px !important;
background: rgba(255, 255, 255, 0.72) !important;
}
.voice-input [data-testid="audio-upload"],
.voice-input .audio-upload,
.voice-input .file-preview {
display: none !important;
}
.voice-output audio {
width: 100% !important;
}
</style>""")
with gr.Group(elem_classes=["voice-duplex-panel"]):
with gr.Row():
# 左侧:控制按钮 + Gradio microphone streaming + 回复播放
with gr.Column(scale=1, min_width=320):
duplex_btn = gr.Button(
"🎙️ 点击开始全双工对话",
elem_classes=["duplex-btn"],
variant="primary",
size="lg",
)
audio_input = gr.Microphone(
type="numpy",
streaming=True,
label="",
show_label=False,
container=True,
elem_classes=["voice-input"],
recording=False,
interactive=True,
)
audio_output = gr.Audio(
type="filepath",
autoplay=True,
visible=True,
label="🔊 AI 回复",
show_label=False,
container=True,
elem_classes=["voice-output"],
)
# 右侧:对话显示
with gr.Column(scale=2, min_width=400):
chat_box = gr.Chatbot(
label=T.get("thinking_label", "💬 AI 对话"),
height=320,
show_label=True,
elem_classes=["chat-box"],
)
# 状态显示
thinking_display = gr.HTML(
_thinking_html("🔄 点击「开始对话」进入全双工语音交互" if lang == "zh" else "🔄 Tap 'Start' for duplex voice")
)
# 卡片行 1
with gr.Row():
with gr.Column(scale=1):
with gr.Group(elem_classes=["home-card", "card-trimester"]):
gr.Markdown(f"### 📅 {T['trimester_label']}")
gr.Markdown(f"**{cards['trimester']}**")
with gr.Column(scale=1):
with gr.Group(elem_classes=["home-card", "card-nutrition"]):
gr.Markdown(f"### 🥗 {T['nutrition_focus']}")
gr.Markdown(nutrition_display)
with gr.Column(scale=1):
with gr.Group(elem_classes=["home-card", "card-diet"]):
gr.Markdown(f"### 🍽️ {T['today_diet']}")
gr.Markdown(diet_display)
# 卡片行 2
with gr.Row():
with gr.Column(scale=1):
with gr.Group(elem_classes=["home-card", "card-recipe"]):
gr.Markdown(f"### 🍳 {T['family_recipe']}")
gr.Markdown(recipe_display)
with gr.Column(scale=1):
with gr.Group(elem_classes=["home-card", "card-weight"]):
gr.Markdown(f"### ⚖️ {T['weight_label']}")
gr.Markdown(weight_display)
# 最近饮食记录
with gr.Group(elem_classes=["home-card"]):
gr.Markdown(f"### 📋 {T['recent_title']}")
gr.HTML(recent_html)
# ── 事件绑定(全双工)──
def toggle_duplex_v2(state):
global _DUPLEX_ACTIVE
_DUPLEX_ACTIVE = not _DUPLEX_ACTIVE
if not _DUPLEX_ACTIVE:
_reset_duplex_buffers()
return (
gr.update(value="🎙️ 点击开始全双工对话"),
_thinking_html("✅ 已退出全双工模式" if lang == "zh" else "✅ Duplex ended"),
gr.update(recording=False),
False,
)
_reset_duplex_buffers()
return (
gr.update(value="🔴 结束全双工对话"),
_thinking_html("🎤 正在持续聆听,说完停顿一下我会自动回复" if lang == "zh" else "🎤 Listening continuously"),
gr.update(recording=True),
True,
)
duplex_state = gr.State(value=False)
duplex_btn.click(
fn=toggle_duplex_v2,
inputs=[duplex_state],
outputs=[duplex_btn, thinking_display, audio_input, duplex_state],
queue=False,
)
# streaming VAD
audio_input.stream(
fn=handle_stream_chunk,
inputs=[audio_input, chat_box],
outputs=[chat_box, thinking_display, audio_output],
stream_every=0.3,
)
# ============================================================
# Tab 2: 👨‍👩‍👧‍👦 家庭饮食习惯
# ============================================================
def _family_content(lang):
"""家庭饮食习惯组件 — 纯展示模式"""
with gr.Column(elem_classes=["glass-card"]):
gr.Markdown(f"## {t('family_title', lang)}")
gr.Markdown(f"*{t('family_subtitle', lang)}*")
with gr.Tabs(selected=0):
with gr.Tab(t("tab_recipes", lang)):
recipes_html = gr.HTML(render_family_recipes_html(lang))
refresh_recipe_btn = gr.Button(t("btn_refresh", lang), size="sm")
refresh_recipe_btn.click(
fn=lambda: render_family_recipes_html(lang),
outputs=recipes_html,
)
with gr.Tab(t("tab_preferences", lang)):
prefs_html = gr.HTML(render_family_preferences_html(lang))
refresh_pref_btn = gr.Button(t("btn_refresh", lang), size="sm")
refresh_pref_btn.click(
fn=lambda: render_family_preferences_html(lang),
outputs=prefs_html,
)
with gr.Tab(t("tab_memory", lang)):
mem_html = gr.HTML(render_family_memories_html(lang))
refresh_mem_btn = gr.Button(t("btn_refresh", lang), size="sm")
refresh_mem_btn.click(
fn=lambda: render_family_memories_html(lang),
outputs=mem_html,
)
# ============================================================
# Tab 3: 📈 营养报告
# ============================================================
def _report_content(lang):
"""营养报告组件"""
_last_change = [0.0]
with gr.Column():
with gr.Row():
with gr.Column(scale=2, min_width=200):
analysis_days = gr.Slider(
label=t("analysis_days", lang),
minimum=1, maximum=30, value=7, step=1,
)
with gr.Column(scale=1, min_width=160):
trimester_select = gr.Radio(
label=t("trimester_label", lang),
choices=ZH["trimester_choices"] if lang == "zh" else EN["trimester_choices"],
value=ZH["trimester_choices"][1] if lang == "zh" else EN["trimester_choices"][1],
)
report_dashboard = gr.HTML(
f'<div style="padding:40px;text-align:center;color:#bbb;font-size:16px;">'
f'调整上方参数自动生成报告</div>'
)
def generate_full_report(days, trimester):
records = diet_logger.get_recent_records(days=int(days))
analysis = nutrition_analyzer.analyze_diet(records)
try:
ctx = LoopContext()
ctx.briefing["trimester"] = trimester
asyncio.run(ThreeDaySummaryPlugin().run(ctx))
deficit_data = ctx.briefing.get("three_day_summary", {})
except Exception:
deficit_data = {}
return render_nutrition_report_html(analysis, days=int(days), lang=lang, deficit_data=deficit_data)
def on_slider_change(days, trimester):
_last_change[0] = time.time()
time.sleep(0.8)
if time.time() - _last_change[0] < 0.9:
return generate_full_report(days, trimester)
return gr.skip()
analysis_days.change(
fn=on_slider_change,
inputs=[analysis_days, trimester_select],
outputs=[report_dashboard],
)
trimester_select.change(
fn=on_slider_change,
inputs=[analysis_days, trimester_select],
outputs=[report_dashboard],
)
# ============================================================
# 主入口
# ============================================================
def create_app(loop=None):
"""创建主应用(3 Tab + 中英文切换 + 全双工语音)"""
with gr.Blocks(
title="PregoPal - 孕期陪护AI助手",
) as demo:
lang_state = gr.State(value="zh")
# 顶部标题
with gr.Row():
with gr.Column(scale=4):
gr.HTML("""
<div style="text-align: left; padding: 8px 0;">
<h1 style="margin: 0; font-size: 28px; font-family: 'Nunito', 'Quicksand', system-ui, sans-serif;">🌸 PregoPal</h1>
<p style="color: #999; margin: 4px 0 0 0; font-size: 16px; font-family: 'Nunito', 'Quicksand', system-ui, sans-serif;">
孕期陪护AI助手 — 全双工语音对话
<br><span style="font-size: 14px; color: #bbb;">Pregnancy Companion AI — Full Duplex Voice Chat</span>
</p>
</div>
<link href="https://fonts.googleapis.com/css2?family=Nunito:wght@400;600;700;800&family=Quicksand:wght@400;500;600;700&display=swap" rel="stylesheet">
""")
with gr.Column(scale=1, min_width=120):
lang_selector = gr.Radio(
label="🌐 Language",
choices=["中文", "English"],
value="中文",
interactive=True,
)
def switch_lang(choice):
return "zh" if choice == "中文" else "en"
lang_selector.change(fn=switch_lang, inputs=[lang_selector], outputs=[lang_state])
@gr.render(inputs=[lang_state])
def render_all(lang):
with gr.Tabs():
with gr.Tab(f"🏠 {t('tab_home_suffix', lang)}", id="tab_home"):
_home_content(loop, lang)
with gr.Tab(f"👨‍👩‍👧‍👦 {t('tab_family_suffix', lang)}", id="tab_family"):
_family_content(lang)
with gr.Tab(f"📈 {t('tab_report_suffix', lang)}", id="tab_report"):
_report_content(lang)
return demo