Spaces:
Sleeping
Sleeping
| """Streamlit UI for the Speech AI Agent (WebSocket streaming).""" | |
| from __future__ import annotations | |
| import base64 | |
| import json | |
| import os | |
| import shutil | |
| from datetime import datetime | |
| from pathlib import Path | |
| from typing import Any | |
| import httpx | |
| import streamlit as st | |
| from streamlit import components | |
| WS_URL = os.getenv("SPEECH_AGENT_WS_URL", "").strip() | |
| HTTP_URL = os.getenv("SPEECH_AGENT_HTTP_URL", "").strip() | |
| BASE_DIR = Path(__file__).resolve().parent.parent | |
| DATA_DIR = BASE_DIR / "data" | |
| VECTOR_DIR = BASE_DIR / "data/vector_store" | |
| def _clear_session_data() -> None: | |
| for path in (DATA_DIR, VECTOR_DIR): | |
| if path.exists(): | |
| shutil.rmtree(path, ignore_errors=True) | |
| path.mkdir(parents=True, exist_ok=True) | |
| def _http_base() -> str: | |
| if HTTP_URL: | |
| return HTTP_URL.rstrip("/") | |
| if WS_URL: | |
| base = WS_URL.replace("ws://", "http://").replace("wss://", "https://") | |
| if base.endswith("/ws/voice"): | |
| base = base[: -len("/ws/voice")] | |
| return base.rstrip("/") | |
| return "http://localhost:8000" | |
| def _avatar_svg(kind: str, small: bool = False) -> str: | |
| size = 78 if not small else 36 | |
| if kind == "agent": | |
| return ( | |
| f"<img alt='{kind}' src=\"data:image/svg+xml;utf8," | |
| f"<svg xmlns='http://www.w3.org/2000/svg' width='{size}' height='{size}' viewBox='0 0 100 100'>" | |
| f"<circle cx='50' cy='50' r='50' fill='%23e7ecff'/>" | |
| f"<rect x='26' y='30' width='48' height='44' rx='10' fill='%2397b3ff'/>" | |
| f"<rect x='34' y='38' width='32' height='18' rx='6' fill='%23ffffff'/>" | |
| f"<circle cx='42' cy='47' r='3' fill='%23333'/>" | |
| f"<circle cx='58' cy='47' r='3' fill='%23333'/>" | |
| f"<rect x='40' y='60' width='20' height='6' rx='3' fill='%238090d6'/>" | |
| f"<rect x='46' y='24' width='8' height='8' rx='2' fill='%238090d6'/>" | |
| f"</svg>\" />" | |
| ) | |
| else: | |
| fill = "%23f1c7a9" | |
| hair = "%232f3557" | |
| return ( | |
| f"<img alt='{kind}' src=\"data:image/svg+xml;utf8," | |
| f"<svg xmlns='http://www.w3.org/2000/svg' width='{size}' height='{size}' viewBox='0 0 100 100'>" | |
| f"<circle cx='50' cy='50' r='50' fill='%23e7ecff'/>" | |
| f"<circle cx='50' cy='54' r='28' fill='{fill}'/>" | |
| f"<path d='M22 44 Q50 15 78 44' fill='{hair}'/>" | |
| f"<circle cx='40' cy='55' r='3' fill='%23333'/>" | |
| f"<circle cx='60' cy='55' r='3' fill='%23333'/>" | |
| f"<path d='M42 67 Q50 72 58 67' stroke='%23333' stroke-width='3' fill='none'/>" | |
| f"</svg>\" />" | |
| ) | |
| st.set_page_config(page_title="Speech AI Agent", layout="wide") | |
| st.markdown( | |
| """ | |
| <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.10/dist/katex.min.css"> | |
| <script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.10/dist/katex.min.js"></script> | |
| <script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.10/dist/contrib/auto-render.min.js"></script> | |
| <style> | |
| #MainMenu, footer, header {visibility: hidden;} | |
| .block-container {padding: 0;} | |
| iframe {border: none;} | |
| [data-testid="stSidebar"] {display: none;} | |
| </style> | |
| """, | |
| unsafe_allow_html=True, | |
| ) | |
| if "state" not in st.session_state: | |
| st.session_state.state = "muted" | |
| if "messages" not in st.session_state: | |
| st.session_state.messages: list[dict[str, Any]] = [] | |
| if "last_audio_b64" not in st.session_state: | |
| st.session_state.last_audio_b64 = None | |
| if "show_player" not in st.session_state: | |
| st.session_state.show_player = False | |
| if "ui_provider" not in st.session_state: | |
| st.session_state.ui_provider = "azure_openai" | |
| messages_json = json.dumps(st.session_state.messages) | |
| state_json = json.dumps(st.session_state.state) | |
| provider_json = json.dumps(st.session_state.ui_provider) | |
| ws_url_json = json.dumps(WS_URL if WS_URL else None) | |
| http_url_json = json.dumps(HTTP_URL if HTTP_URL else None) | |
| html = """ | |
| <!doctype html> | |
| <html> | |
| <head> | |
| <meta charset="utf-8" /> | |
| <meta name="viewport" content="width=device-width, initial-scale=1" /> | |
| <style> | |
| :root { | |
| --navy-1: #0c1540; | |
| --navy-2: #0f1e57; | |
| --card: #1c2555; | |
| --frost: rgba(240, 244, 255, 0.75); | |
| --text-light: #eef2ff; | |
| --text-muted: #c7d2fe; | |
| --blue: #6f8ef6; | |
| --blue-2: #7aa5ff; | |
| --bubble-light: #f8f9ff; | |
| --shadow: 0 18px 50px rgba(8, 13, 40, 0.45); | |
| } | |
| html, body { | |
| margin: 0; | |
| padding: 0; | |
| width: 100%; | |
| height: 100%; | |
| font-family: 'Poppins', 'Segoe UI', sans-serif; | |
| background: radial-gradient(circle at top, #1a275f 0%, #0b1130 55%, #070a20 100%); | |
| } | |
| .page { | |
| min-height: 100vh; | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| padding: 32px 16px 48px; | |
| box-sizing: border-box; | |
| } | |
| .card { | |
| width: min(980px, 95vw); | |
| background: linear-gradient(135deg, #1a2353 0%, #1b2559 45%, #1f2a64 100%); | |
| border-radius: 28px; | |
| box-shadow: var(--shadow); | |
| padding: 28px; | |
| } | |
| .header { | |
| display: flex; | |
| align-items: center; | |
| justify-content: space-between; | |
| color: var(--text-light); | |
| margin-bottom: 22px; | |
| gap: 16px; | |
| } | |
| .title { | |
| font-size: 46px; | |
| font-weight: 700; | |
| letter-spacing: 0.3px; | |
| } | |
| .powered { | |
| font-size: 12px; | |
| letter-spacing: 2px; | |
| text-transform: uppercase; | |
| color: #cdd7ff; | |
| } | |
| .provider-toggle { | |
| display: inline-flex; | |
| background: rgba(255, 255, 255, 0.08); | |
| border-radius: 999px; | |
| padding: 4px; | |
| gap: 4px; | |
| } | |
| .provider-btn { | |
| border: none; | |
| background: transparent; | |
| color: #cdd7ff; | |
| padding: 8px 14px; | |
| border-radius: 999px; | |
| font-size: 12px; | |
| letter-spacing: 1.5px; | |
| text-transform: uppercase; | |
| cursor: pointer; | |
| } | |
| .provider-btn.active { | |
| background: #6f8ef6; | |
| color: #fff; | |
| box-shadow: 0 6px 16px rgba(40, 60, 140, 0.35); | |
| } | |
| .header-actions { | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 12px; | |
| flex-wrap: wrap; | |
| } | |
| .reset-btn { | |
| border: none; | |
| background: rgba(255, 255, 255, 0.12); | |
| color: #e8edff; | |
| padding: 8px 14px; | |
| border-radius: 999px; | |
| font-size: 11px; | |
| letter-spacing: 1px; | |
| text-transform: uppercase; | |
| cursor: pointer; | |
| transition: transform 0.2s ease, background 0.2s ease; | |
| } | |
| .reset-btn:hover { | |
| transform: translateY(-1px); | |
| background: rgba(255, 255, 255, 0.2); | |
| } | |
| .upload-btn { | |
| border: none; | |
| background: rgba(255, 255, 255, 0.12); | |
| color: #e8edff; | |
| width: 40px; | |
| height: 40px; | |
| border-radius: 50%; | |
| cursor: pointer; | |
| display: inline-flex; | |
| align-items: center; | |
| justify-content: center; | |
| transition: transform 0.2s ease, background 0.2s ease; | |
| } | |
| .upload-btn:hover { | |
| transform: translateY(-1px); | |
| background: rgba(255, 255, 255, 0.2); | |
| } | |
| .upload-btn.hidden { | |
| display: none; | |
| } | |
| .panel { | |
| background: var(--frost); | |
| border-radius: 24px; | |
| padding: 24px 24px 28px; | |
| backdrop-filter: blur(14px); | |
| display: flex; | |
| flex-direction: column; | |
| gap: 18px; | |
| height: min(680px, 72vh); | |
| overflow: hidden; | |
| } | |
| .top-row { | |
| display: grid; | |
| grid-template-columns: 120px minmax(140px, 1fr) 120px; | |
| align-items: center; | |
| gap: 12px; | |
| } | |
| .top-avatar { | |
| display: flex; | |
| flex-direction: column; | |
| align-items: center; | |
| gap: 6px; | |
| } | |
| .avatar { | |
| width: 86px; | |
| height: 86px; | |
| border-radius: 50%; | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| background: #e6ebff; | |
| box-shadow: inset 0 0 0 6px #f2f5ff; | |
| } | |
| .avatar img { | |
| width: 78px; | |
| height: 78px; | |
| border-radius: 50%; | |
| } | |
| .status { | |
| font-size: 18px; | |
| color: #6c74a8; | |
| margin-top: 10px; | |
| margin-left: 6px; | |
| } | |
| .waveform { | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| gap: 4px; | |
| height: 120px; | |
| max-width: 100%; | |
| margin: 0 auto; | |
| overflow: hidden; | |
| } | |
| .wave-bar { | |
| width: 2px; | |
| border-radius: 10px; | |
| background: linear-gradient(180deg, #ff6b6b 0%, #ffb4b4 100%); | |
| height: 12px; | |
| opacity: 0.6; | |
| } | |
| .listening .wave-bar { | |
| animation: pulse 0.5s ease-in-out infinite; | |
| } | |
| .waveform.realtime .wave-bar { | |
| animation: none; | |
| } | |
| @keyframes pulse { | |
| 0% { height: 10px; opacity: 0.4; } | |
| 50% { height: 70px; opacity: 1; } | |
| 100% { height: 16px; opacity: 0.5; } | |
| } | |
| .conversation { | |
| display: flex; | |
| flex-direction: column; | |
| gap: 20px; | |
| flex: 1; | |
| overflow-y: auto; | |
| } | |
| .bubble-row { | |
| display: flex; | |
| align-items: flex-start; | |
| gap: 12px; | |
| width: 100%; | |
| box-sizing: border-box; | |
| } | |
| .bubble-row.user { | |
| justify-content: flex-end; | |
| } | |
| .bubble-row.agent { | |
| justify-content: flex-start; | |
| } | |
| .bubble { | |
| max-width: 60%; | |
| padding: 16px 20px; | |
| border-radius: 16px; | |
| font-size: 18px; | |
| line-height: 1.35; | |
| box-shadow: 0 10px 20px rgba(70, 78, 140, 0.15); | |
| } | |
| .bubble.user { | |
| background: linear-gradient(135deg, #7aa1ff 0%, #6b85ea 100%); | |
| color: #ffffff; | |
| max-width: 60%; | |
| } | |
| .bubble.agent { | |
| background: var(--bubble-light); | |
| color: #3f4a7d; | |
| max-width: 55%; | |
| } | |
| .meta { | |
| font-size: 12px; | |
| color: #7f87b8; | |
| margin-top: 6px; | |
| } | |
| .mute-row { | |
| margin-top: 28px; | |
| display: flex; | |
| justify-content: center; | |
| } | |
| .mute-btn { | |
| width: 108px; | |
| height: 108px; | |
| border-radius: 50%; | |
| border: none; | |
| background: #eef1ff; | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| box-shadow: 0 10px 28px rgba(77, 86, 150, 0.25); | |
| cursor: pointer; | |
| } | |
| .mute-btn svg { | |
| width: 44px; | |
| height: 44px; | |
| fill: #8a92c9; | |
| } | |
| .mute-btn.listening { | |
| background: #dbe6ff; | |
| } | |
| .mute-btn.listening svg { | |
| fill: #5e75dc; | |
| } | |
| .small-avatar { | |
| width: 44px; | |
| height: 44px; | |
| border-radius: 50%; | |
| background: #e6ebff; | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| box-shadow: inset 0 0 0 3px #f2f5ff; | |
| } | |
| .small-avatar img { | |
| width: 36px; | |
| height: 36px; | |
| border-radius: 50%; | |
| } | |
| @media (max-width: 900px) { | |
| .card { | |
| padding: 20px; | |
| } | |
| .title { | |
| font-size: 34px; | |
| } | |
| .panel { | |
| height: min(640px, 76vh); | |
| padding: 20px; | |
| } | |
| .top-row { | |
| grid-template-columns: 80px minmax(120px, 1fr) 80px; | |
| } | |
| .avatar { | |
| width: 70px; | |
| height: 70px; | |
| } | |
| .avatar img { | |
| width: 62px; | |
| height: 62px; | |
| } | |
| .waveform { | |
| height: 78px; | |
| } | |
| } | |
| @media (max-width: 640px) { | |
| .page { | |
| padding: 20px 12px 32px; | |
| } | |
| .card { | |
| padding: 16px; | |
| border-radius: 22px; | |
| } | |
| .header { | |
| flex-direction: column; | |
| align-items: flex-start; | |
| gap: 10px; | |
| } | |
| .title { | |
| font-size: 28px; | |
| } | |
| .provider-toggle { | |
| align-self: flex-start; | |
| } | |
| .panel { | |
| height: min(620px, 78vh); | |
| padding: 18px; | |
| } | |
| .top-row { | |
| grid-template-columns: 1fr 1fr; | |
| grid-template-areas: | |
| "left right" | |
| "wave wave"; | |
| gap: 10px; | |
| } | |
| .top-avatar.left { | |
| grid-area: left; | |
| justify-self: start; | |
| } | |
| .top-avatar.right { | |
| grid-area: right; | |
| justify-self: end; | |
| } | |
| .waveform { | |
| grid-area: wave; | |
| } | |
| .avatar { | |
| width: 58px; | |
| height: 58px; | |
| } | |
| .avatar img { | |
| width: 50px; | |
| height: 50px; | |
| } | |
| .status { | |
| font-size: 14px; | |
| } | |
| .waveform { | |
| height: 66px; | |
| } | |
| .wave-bar { | |
| width: 2px; | |
| } | |
| .bubble { | |
| font-size: 15px; | |
| padding: 12px 14px; | |
| max-width: 70%; | |
| } | |
| .bubble-row { | |
| gap: 8px; | |
| } | |
| .mute-row { | |
| margin-top: 16px; | |
| } | |
| .mute-btn { | |
| width: 84px; | |
| height: 84px; | |
| } | |
| .mute-btn svg { | |
| width: 34px; | |
| height: 34px; | |
| } | |
| .small-avatar { | |
| width: 36px; | |
| height: 36px; | |
| } | |
| .small-avatar img { | |
| width: 30px; | |
| height: 30px; | |
| } | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <div class="page"> | |
| <div class="card"> | |
| <div class="header"> | |
| <div class="title">Voice AI Agent</div> | |
| <div class="header-actions"> | |
| <div class="provider-toggle"> | |
| <button id="provider-llm" class="provider-btn active" data-provider="azure_openai">LLM</button> | |
| <button id="provider-agent" class="provider-btn" data-provider="local_agent">Agent</button> | |
| </div> | |
| <button id="upload-btn" class="upload-btn hidden" title="Upload files"> | |
| <svg viewBox="0 0 24 24" width="20" height="20" fill="none" stroke="#e8edff" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"> | |
| <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/> | |
| <polyline points="17 8 12 3 7 8"/> | |
| <line x1="12" y1="3" x2="12" y2="15"/> | |
| </svg> | |
| </button> | |
| <button id="reset-session" class="reset-btn">Reset Session Data</button> | |
| </div> | |
| </div> | |
| <div class="panel"> | |
| <div class="top-row"> | |
| <div class="top-avatar left"> | |
| <div class="avatar">__AVATAR_AGENT__</div> | |
| <div id="agent-status" class="status"></div> | |
| </div> | |
| <div id="waveform" class="waveform"></div> | |
| <div class="top-avatar right"> | |
| <div class="avatar">__AVATAR_USER__</div> | |
| <div id="status" class="status">Muted</div> | |
| </div> | |
| </div> | |
| <div id="conversation" class="conversation"></div> | |
| <div class="mute-row"> | |
| <button id="mute-toggle" class="mute-btn muted" aria-label="Mute toggle"> | |
| <svg id="mic-icon" viewBox="0 0 24 24"> | |
| <path d="M12 14a3 3 0 0 0 3-3V5a3 3 0 0 0-6 0v6a3 3 0 0 0 3 3zm5-3a5 5 0 0 1-10 0H5a7 7 0 0 0 14 0h-2zm-4 7v3h-2v-3h2z"/> | |
| <path id="mic-slash" d="M4 4l16 16-1.4 1.4L2.6 5.4 4 4z" fill="#8a92c9"/> | |
| </svg> | |
| </button> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <script> | |
| const INITIAL_MESSAGES = __MESSAGES_JSON__; | |
| const INITIAL_STATE = __STATE_JSON__; | |
| const INITIAL_PROVIDER = __PROVIDER_JSON__; | |
| const ENV_WS_URL = __WS_URL_JSON__; | |
| const ENV_HTTP_URL = __HTTP_URL_JSON__; | |
| const derivedWsUrl = (() => { | |
| let host = ''; | |
| let protocol = 'https:'; | |
| try { | |
| host = window.parent?.location?.host || window.location.host || ''; | |
| protocol = window.parent?.location?.protocol || window.location.protocol || 'https:'; | |
| } catch (err) { | |
| // Cross-origin parent access may fail; fall back to referrer. | |
| } | |
| if (!host && document.referrer) { | |
| try { | |
| const ref = new URL(document.referrer); | |
| host = ref.host; | |
| protocol = ref.protocol; | |
| } catch (err) { | |
| // ignore | |
| } | |
| } | |
| const wsProtocol = protocol === 'https:' ? 'wss' : 'ws'; | |
| if (!host) { | |
| return 'ws://localhost:8000/ws/voice'; | |
| } | |
| if (host.endsWith('.hf.space') || host.endsWith('.hf.co')) { | |
| return `${wsProtocol}://${host}/ws/voice`; | |
| } | |
| const parts = host.split(':'); | |
| const hostname = parts[0]; | |
| const port = parts[1] || ''; | |
| if (hostname === 'localhost' || hostname === '127.0.0.1' || hostname === '0.0.0.0') { | |
| if (port === '8501') { | |
| return `${wsProtocol}://${hostname}:8000/ws/voice`; | |
| } | |
| return `${wsProtocol}://${host}/ws/voice`; | |
| } | |
| return `${wsProtocol}://${host}/ws/voice`; | |
| })(); | |
| const WS_URL = ENV_WS_URL || derivedWsUrl; | |
| const HTTP_BASE = (() => { | |
| if (ENV_HTTP_URL) return ENV_HTTP_URL; | |
| const base = WS_URL.replace('wss://', 'https://').replace('ws://', 'http://'); | |
| if (base.endsWith('/ws/voice')) { | |
| return base.slice(0, -9); | |
| } | |
| try { | |
| const host = window.parent?.location?.host || window.location.host || ''; | |
| const protocol = window.parent?.location?.protocol || window.location.protocol || 'https:'; | |
| if (host) { | |
| const parts = host.split(':'); | |
| const hostname = parts[0]; | |
| const port = parts[1] || ''; | |
| if ((hostname === 'localhost' || hostname === '127.0.0.1') && port === '8501') { | |
| return `${protocol}//${hostname}:8000`; | |
| } | |
| return `${protocol}//${host}`.replace(/\\/$/, ''); | |
| } | |
| } catch (err) { | |
| // ignore | |
| } | |
| return 'http://localhost:8000'; | |
| })(); | |
| const waveform = document.getElementById('waveform'); | |
| const statusEl = document.getElementById('status'); | |
| const agentStatusEl = document.getElementById('agent-status'); | |
| const conversation = document.getElementById('conversation'); | |
| const muteBtn = document.getElementById('mute-toggle'); | |
| const micSlash = document.getElementById('mic-slash'); | |
| const providerAgentBtn = document.getElementById('provider-agent'); | |
| const providerLlmBtn = document.getElementById('provider-llm'); | |
| const resetBtn = document.getElementById('reset-session'); | |
| const uploadBtn = document.getElementById('upload-btn'); | |
| const providerButtons = [providerLlmBtn, providerAgentBtn]; | |
| let state = INITIAL_STATE || 'muted'; | |
| let isMuted = state === 'muted'; | |
| let ws = null; | |
| let audioContext = null; | |
| let processor = null; | |
| let micStream = null; | |
| let analyser = null; | |
| let freqData = null; | |
| let waveAnimId = null; | |
| let listening = false; | |
| let llmProvider = INITIAL_PROVIDER || 'azure_openai'; | |
| let sessionId = null; | |
| let sendEnabled = true; | |
| let segmentInFlight = false; | |
| let lastVoiceAt = 0; | |
| let hadVoice = false; | |
| let framesSent = 0; | |
| let rmsSum = 0; | |
| let rmsCount = 0; | |
| let bargeInMs = 0; | |
| const SILENCE_MS = 1500; | |
| const RMS_THRESHOLD = 0.025; | |
| const BARGEIN_THRESHOLD = 0.03; | |
| const BARGEIN_HOLD_MS = 100; | |
| const MIN_FRAMES_PER_SEGMENT = 3; | |
| const MIN_SPEECH_FRAMES = 4 | |
| let currentReplyAudio = null; | |
| const setProvider = (provider, notify = true) => { | |
| const changed = llmProvider !== provider; | |
| llmProvider = provider; | |
| providerButtons.forEach((btn) => { | |
| if (!btn) return; | |
| const isActive = btn.dataset.provider === provider; | |
| btn.classList.toggle('active', isActive); | |
| }); | |
| if (uploadBtn) { | |
| uploadBtn.classList.toggle('hidden', provider !== 'local_agent'); | |
| } | |
| if (notify) { | |
| sendToStreamlit({ event: 'provider_change', provider }); | |
| } | |
| if (changed && listening) { | |
| stopListening(); | |
| startListening(); | |
| } | |
| }; | |
| providerButtons.forEach((btn) => { | |
| if (!btn) return; | |
| btn.addEventListener('click', () => setProvider(btn.dataset.provider)); | |
| }); | |
| const sendToStreamlit = (value) => { | |
| window.parent.postMessage({ | |
| isStreamlitMessage: true, | |
| type: 'streamlit:setComponentValue', | |
| value: value | |
| }, '*'); | |
| }; | |
| const setFrameHeight = (h) => { | |
| window.parent.postMessage({ | |
| isStreamlitMessage: true, | |
| type: 'streamlit:setFrameHeight', | |
| height: h | |
| }, '*'); | |
| }; | |
| const waveBars = []; | |
| let waveLevel = 0; | |
| const WAVE_BAR_COUNT = 48; | |
| const makeBars = () => { | |
| waveform.innerHTML = ''; | |
| waveBars.length = 0; | |
| for (let i = 0; i < WAVE_BAR_COUNT; i++) { | |
| const bar = document.createElement('div'); | |
| bar.className = 'wave-bar'; | |
| bar.style.animationDelay = `${(i % 8) * 0.12}s`; | |
| bar.style.height = `${12 + (i % 6) * 4}px`; | |
| waveform.appendChild(bar); | |
| waveBars.push(bar); | |
| } | |
| }; | |
| const resetWave = () => { | |
| waveBars.forEach((bar, i) => { | |
| bar.style.height = `${12 + (i % 6) * 4}px`; | |
| }); | |
| }; | |
| const updateWave = (rms) => { | |
| const level = Math.min(1, rms * 3.2); | |
| waveLevel = waveLevel * 0.7 + level * 0.3; | |
| const t = performance.now() / 140; | |
| waveBars.forEach((bar, i) => { | |
| const variance = 0.6 + 0.4 * Math.sin(t + i * 0.6); | |
| const height = 10 + waveLevel * 70 * variance; | |
| bar.style.height = `${height}px`; | |
| }); | |
| }; | |
| const startWaveLoop = () => { | |
| if (!analyser || !freqData) return; | |
| const render = () => { | |
| analyser.getByteFrequencyData(freqData); | |
| const step = Math.max(1, Math.floor(freqData.length / waveBars.length)); | |
| for (let i = 0; i < waveBars.length; i++) { | |
| const v = freqData[i * step] / 255; | |
| const height = 10 + v * 90; | |
| waveBars[i].style.height = `${height}px`; | |
| } | |
| waveAnimId = requestAnimationFrame(render); | |
| }; | |
| if (waveAnimId) cancelAnimationFrame(waveAnimId); | |
| waveAnimId = requestAnimationFrame(render); | |
| }; | |
| const stopWaveLoop = () => { | |
| if (waveAnimId) { | |
| cancelAnimationFrame(waveAnimId); | |
| waveAnimId = null; | |
| } | |
| }; | |
| const renderMessages = (messages) => { | |
| conversation.innerHTML = ''; | |
| messages.forEach((msg) => { | |
| const row = document.createElement('div'); | |
| row.className = `bubble-row ${msg.role}`; | |
| const avatar = document.createElement('div'); | |
| avatar.className = 'small-avatar'; | |
| avatar.innerHTML = msg.role === 'user' ? `__AVATAR_USER_SMALL__` : `__AVATAR_AGENT_SMALL__`; | |
| const bubble = document.createElement('div'); | |
| bubble.className = `bubble ${msg.role}`; | |
| bubble.textContent = msg.text; | |
| const meta = document.createElement('div'); | |
| meta.className = 'meta'; | |
| const ts = msg.ts ? new Date(msg.ts) : new Date(); | |
| meta.textContent = ts.toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' }); | |
| if (msg.role === 'user') { | |
| row.appendChild(meta); | |
| row.appendChild(bubble); | |
| row.appendChild(avatar); | |
| } else { | |
| row.appendChild(avatar); | |
| row.appendChild(bubble); | |
| row.appendChild(meta); | |
| } | |
| conversation.appendChild(row); | |
| renderLatex(bubble); | |
| }); | |
| conversation.scrollTop = conversation.scrollHeight; | |
| }; | |
| const setState = (next) => { | |
| state = next; | |
| if (state === 'listening') { | |
| statusEl.textContent = 'Listening…'; | |
| agentStatusEl.textContent = ''; | |
| waveform.classList.add('listening'); | |
| waveform.classList.add('realtime'); | |
| muteBtn.classList.add('listening'); | |
| micSlash.style.display = 'none'; | |
| startWaveLoop(); | |
| } else if (state === 'thinking') { | |
| statusEl.textContent = ''; | |
| agentStatusEl.textContent = 'Thinking…'; | |
| waveform.classList.remove('listening'); | |
| muteBtn.classList.remove('listening'); | |
| micSlash.style.display = 'none'; | |
| } else { | |
| statusEl.textContent = 'Muted'; | |
| agentStatusEl.textContent = ''; | |
| waveform.classList.remove('listening'); | |
| waveform.classList.remove('realtime'); | |
| muteBtn.classList.remove('listening'); | |
| micSlash.style.display = 'block'; | |
| stopWaveLoop(); | |
| resetWave(); | |
| } | |
| }; | |
| const downsampleBuffer = (buffer, inputRate, outputRate) => { | |
| if (outputRate === inputRate) return buffer; | |
| const ratio = inputRate / outputRate; | |
| const newLength = Math.round(buffer.length / ratio); | |
| const result = new Float32Array(newLength); | |
| let offsetResult = 0; | |
| let offsetBuffer = 0; | |
| while (offsetResult < result.length) { | |
| const nextOffsetBuffer = Math.round((offsetResult + 1) * ratio); | |
| let accum = 0; | |
| let count = 0; | |
| for (let i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) { | |
| accum += buffer[i]; | |
| count++; | |
| } | |
| result[offsetResult] = accum / count; | |
| offsetResult++; | |
| offsetBuffer = nextOffsetBuffer; | |
| } | |
| return result; | |
| }; | |
| const floatTo16BitPCM = (float32) => { | |
| const output = new Int16Array(float32.length); | |
| for (let i = 0; i < float32.length; i++) { | |
| let s = Math.max(-1, Math.min(1, float32[i])); | |
| output[i] = s < 0 ? s * 0x8000 : s * 0x7fff; | |
| } | |
| return output; | |
| }; | |
| const stopReplyAudio = () => { | |
| if (!currentReplyAudio) return; | |
| try { | |
| currentReplyAudio.pause(); | |
| currentReplyAudio.currentTime = 0; | |
| } catch (err) { | |
| // ignore | |
| } | |
| currentReplyAudio = null; | |
| }; | |
| const arrayBufferToBase64 = (buffer) => { | |
| let binary = ''; | |
| const bytes = new Uint8Array(buffer); | |
| const len = bytes.byteLength; | |
| for (let i = 0; i < len; i++) { | |
| binary += String.fromCharCode(bytes[i]); | |
| } | |
| return btoa(binary); | |
| }; | |
| let katexLoading = false; | |
| const katexQueue = []; | |
| const katexOptions = { | |
| delimiters: [ | |
| { left: '$$', right: '$$', display: true }, | |
| { left: '\\\\[', right: '\\\\]', display: true }, | |
| { left: '\\\\(', right: '\\\\)', display: false }, | |
| { left: '$', right: '$', display: false } | |
| ], | |
| throwOnError: false | |
| }; | |
| const loadScript = (src, onload) => { | |
| const script = document.createElement('script'); | |
| script.src = src; | |
| script.async = true; | |
| script.onload = onload; | |
| document.head.appendChild(script); | |
| }; | |
| const ensureKatex = () => { | |
| if (window.renderMathInElement) return; | |
| if (katexLoading) return; | |
| katexLoading = true; | |
| if (!document.getElementById('katex-css')) { | |
| const link = document.createElement('link'); | |
| link.id = 'katex-css'; | |
| link.rel = 'stylesheet'; | |
| link.href = 'https://cdn.jsdelivr.net/npm/katex@0.16.10/dist/katex.min.css'; | |
| document.head.appendChild(link); | |
| } | |
| loadScript('https://cdn.jsdelivr.net/npm/katex@0.16.10/dist/katex.min.js', () => { | |
| loadScript('https://cdn.jsdelivr.net/npm/katex@0.16.10/dist/contrib/auto-render.min.js', () => { | |
| katexLoading = false; | |
| if (window.renderMathInElement) { | |
| while (katexQueue.length) { | |
| const node = katexQueue.shift(); | |
| try { | |
| window.renderMathInElement(node, katexOptions); | |
| } catch (err) { | |
| // ignore render errors | |
| } | |
| } | |
| } | |
| }); | |
| }); | |
| }; | |
| const renderLatex = (el) => { | |
| if (!el) return; | |
| if (window.renderMathInElement) { | |
| try { | |
| window.renderMathInElement(el, katexOptions); | |
| } catch (err) { | |
| // ignore render errors | |
| } | |
| return; | |
| } | |
| katexQueue.push(el); | |
| ensureKatex(); | |
| }; | |
| let messages = INITIAL_MESSAGES || []; | |
| const startListening = async () => { | |
| if (listening) return; | |
| listening = true; | |
| framesSent = 0; | |
| setState('listening'); | |
| isMuted = false; | |
| sendEnabled = true; | |
| segmentInFlight = false; | |
| lastVoiceAt = performance.now(); | |
| hadVoice = false; | |
| ws = new WebSocket(WS_URL); | |
| ws.binaryType = 'arraybuffer'; | |
| ws.onopen = async () => { | |
| if (!sessionId && crypto?.randomUUID) { | |
| sessionId = crypto.randomUUID(); | |
| } | |
| ws.send(JSON.stringify({ event: 'start', content_type: 'audio/pcm;rate=16000;bits=16;channels=1', return_audio: true, llm_provider: llmProvider, session_id: sessionId })); | |
| micStream = await navigator.mediaDevices.getUserMedia({ | |
| audio: { | |
| noiseSuppression: true, | |
| echoCancellation: true, | |
| autoGainControl: true | |
| } | |
| }); | |
| audioContext = new (window.AudioContext || window.webkitAudioContext)(); | |
| await audioContext.resume(); | |
| const source = audioContext.createMediaStreamSource(micStream); | |
| analyser = audioContext.createAnalyser(); | |
| analyser.fftSize = 256; | |
| analyser.smoothingTimeConstant = 0.8; | |
| freqData = new Uint8Array(analyser.frequencyBinCount); | |
| source.connect(analyser); | |
| startWaveLoop(); | |
| processor = audioContext.createScriptProcessor(4096, 1, 1); | |
| processor.onaudioprocess = (event) => { | |
| if (!listening || !ws || ws.readyState !== WebSocket.OPEN) return; | |
| const input = event.inputBuffer.getChannelData(0); | |
| let rms = 0; | |
| for (let i = 0; i < input.length; i++) { | |
| rms += input[i] * input[i]; | |
| } | |
| rms = Math.sqrt(rms / input.length); | |
| rmsSum += rms; | |
| rmsCount += 1; | |
| if (!analyser) { | |
| updateWave(rms); | |
| } | |
| if (rms > BARGEIN_THRESHOLD) { | |
| bargeInMs += 32; | |
| if (bargeInMs >= BARGEIN_HOLD_MS) { | |
| stopReplyAudio(); | |
| } | |
| } else { | |
| bargeInMs = 0; | |
| } | |
| if (rms > RMS_THRESHOLD) { | |
| lastVoiceAt = performance.now(); | |
| hadVoice = true; | |
| } | |
| if (sendEnabled) { | |
| const downsampled = downsampleBuffer(input, audioContext.sampleRate, 16000); | |
| const pcm = floatTo16BitPCM(downsampled); | |
| ws.send(pcm.buffer); | |
| framesSent += 1; | |
| } | |
| if ( | |
| sendEnabled && | |
| !segmentInFlight && | |
| hadVoice && | |
| framesSent >= MIN_FRAMES_PER_SEGMENT && | |
| performance.now() - lastVoiceAt > SILENCE_MS | |
| ) { | |
| segmentInFlight = true; | |
| setState('thinking'); | |
| } | |
| }; | |
| source.connect(processor); | |
| processor.connect(audioContext.destination); | |
| }; | |
| ws.onmessage = (event) => { | |
| if (event.data instanceof ArrayBuffer) { | |
| const audioB64 = arrayBufferToBase64(event.data); | |
| const blob = new Blob([event.data], { type: 'audio/wav' }); | |
| const url = URL.createObjectURL(blob); | |
| const audio = new Audio(url); | |
| currentReplyAudio = audio; | |
| audio.onended = () => { | |
| if (currentReplyAudio === audio) currentReplyAudio = null; | |
| }; | |
| audio.play().catch(() => { | |
| sendToStreamlit({ event: 'autoplay_failed', audio_b64: audioB64 }); | |
| }); | |
| sendToStreamlit({ event: 'audio', audio_b64: audioB64 }); | |
| return; | |
| } | |
| const data = JSON.parse(event.data); | |
| if (data.event === 'transcript') { | |
| if (data.transcript) { | |
| messages.push({ role: 'user', text: data.transcript, ts: new Date().toISOString() }); | |
| renderMessages(messages); | |
| } | |
| } | |
| if (data.event === 'result') { | |
| if (!isMuted) { | |
| setState('listening'); | |
| } else { | |
| setState('muted'); | |
| } | |
| segmentInFlight = false; | |
| sendEnabled = !isMuted; | |
| hadVoice = false; | |
| lastVoiceAt = performance.now(); | |
| framesSent = 0; | |
| rmsSum = 0; | |
| rmsCount = 0; | |
| if (data.transcript) { | |
| const last = messages[messages.length - 1]; | |
| if (!last || last.role !== 'user' || last.text !== data.transcript) { | |
| messages.push({ role: 'user', text: data.transcript, ts: new Date().toISOString() }); | |
| } | |
| } | |
| if (data.reply_text) { | |
| messages.push({ role: 'agent', text: data.reply_text, ts: new Date().toISOString() }); | |
| } | |
| renderMessages(messages); | |
| sendToStreamlit({ | |
| event: 'result', | |
| transcript: data.transcript, | |
| reply_text: data.reply_text | |
| }); | |
| } | |
| if (data.event === 'error') { | |
| if (!isMuted) { | |
| setState('listening'); | |
| } else { | |
| setState('muted'); | |
| } | |
| segmentInFlight = false; | |
| sendEnabled = !isMuted; | |
| hadVoice = false; | |
| lastVoiceAt = performance.now(); | |
| framesSent = 0; | |
| rmsSum = 0; | |
| rmsCount = 0; | |
| if (isMuted && ws) ws.close(); | |
| } | |
| }; | |
| }; | |
| const stopListening = () => { | |
| if (!listening) return; | |
| setState('muted'); | |
| isMuted = true; | |
| sendEnabled = false; | |
| if (processor) processor.disconnect(); | |
| if (audioContext) audioContext.close(); | |
| if (micStream) micStream.getTracks().forEach((t) => t.stop()); | |
| analyser = null; | |
| freqData = null; | |
| if (ws && ws.readyState === WebSocket.OPEN) { | |
| if (framesSent === 0) { | |
| const silence = new Int16Array(320); | |
| ws.send(silence.buffer); | |
| } | |
| const avgRms = rmsCount ? rmsSum / rmsCount : 0; | |
| ws.send(JSON.stringify({ event: 'stop', prompt: 'Answer briefly.', frames_sent: framesSent, avg_rms: avgRms, llm_provider: llmProvider, session_id: sessionId })); | |
| } | |
| listening = false; | |
| }; | |
| muteBtn.addEventListener('click', () => { | |
| if (state === 'muted') { | |
| startListening(); | |
| } else if (state === 'listening' || state === 'thinking') { | |
| stopListening(); | |
| } | |
| }); | |
| if (resetBtn) { | |
| resetBtn.addEventListener('click', () => { | |
| if (listening) stopListening(); | |
| messages = []; | |
| renderMessages(messages); | |
| setState('muted'); | |
| sendToStreamlit({ event: 'reset_session' }); | |
| fetch(`${HTTP_BASE}/v1/agent/reset`, { method: 'POST' }).catch(() => {}); | |
| }); | |
| } | |
| if (uploadBtn) { | |
| const input = document.createElement('input'); | |
| input.type = 'file'; | |
| input.multiple = true; | |
| input.accept = '.txt,.md,.pdf,.docx,.csv'; | |
| input.style.display = 'none'; | |
| document.body.appendChild(input); | |
| uploadBtn.addEventListener('click', () => input.click()); | |
| input.addEventListener('change', async () => { | |
| if (!input.files || input.files.length === 0) return; | |
| uploadBtn.disabled = true; | |
| uploadBtn.classList.add('loading'); | |
| uploadBtn.innerHTML = '<svg viewBox="0 0 50 50" width="20" height="20"><circle cx="25" cy="25" r="20" stroke="#e8edff" stroke-width="4" fill="none" stroke-linecap="round"><animateTransform attributeName="transform" type="rotate" from="0 25 25" to="360 25 25" dur="0.8s" repeatCount="indefinite"/></circle></svg>'; | |
| const form = new FormData(); | |
| Array.from(input.files).forEach((f) => form.append('files', f)); | |
| try { | |
| await fetch(`${HTTP_BASE}/v1/agent/upload`, { method: 'POST', body: form }); | |
| } catch (err) { | |
| // ignore | |
| } | |
| uploadBtn.disabled = false; | |
| uploadBtn.classList.remove('loading'); | |
| uploadBtn.innerHTML = '<svg viewBox="0 0 24 24" width="20" height="20" fill="none" stroke="#e8edff" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/><polyline points="17 8 12 3 7 8"/><line x1="12" y1="3" x2="12" y2="15"/></svg>'; | |
| input.value = ''; | |
| }); | |
| } | |
| makeBars(); | |
| renderMessages(INITIAL_MESSAGES || []); | |
| setState(state); | |
| setProvider(llmProvider, false); | |
| setFrameHeight(980); | |
| </script> | |
| </body> | |
| </html> | |
| """ | |
| html = ( | |
| html.replace("__MESSAGES_JSON__", messages_json) | |
| .replace("__STATE_JSON__", state_json) | |
| .replace("__PROVIDER_JSON__", provider_json) | |
| .replace("__WS_URL_JSON__", ws_url_json) | |
| .replace("__HTTP_URL_JSON__", http_url_json) | |
| .replace("__AVATAR_USER__", _avatar_svg("user")) | |
| .replace("__AVATAR_AGENT__", _avatar_svg("agent")) | |
| .replace("__AVATAR_USER_SMALL__", _avatar_svg("user", small=True)) | |
| .replace("__AVATAR_AGENT_SMALL__", _avatar_svg("agent", small=True)) | |
| ) | |
| html = html.replace("{{", "{").replace("}}", "}") | |
| value = components.v1.html(html, height=980, scrolling=False) | |
| if isinstance(value, dict): | |
| event = value.get("event") | |
| if event == "result": | |
| transcript = value.get("transcript") | |
| reply_text = value.get("reply_text") | |
| now_ts = datetime.utcnow().isoformat() | |
| if transcript: | |
| st.session_state.messages.append( | |
| {"role": "user", "text": transcript, "ts": now_ts} | |
| ) | |
| if reply_text: | |
| st.session_state.messages.append( | |
| {"role": "agent", "text": reply_text, "ts": now_ts} | |
| ) | |
| st.session_state.state = "muted" | |
| elif event == "autoplay_failed": | |
| st.session_state.last_audio_b64 = value.get("audio_b64") | |
| st.session_state.show_player = True | |
| elif event == "audio": | |
| st.session_state.last_audio_b64 = value.get("audio_b64") | |
| elif event == "reset_session": | |
| _clear_session_data() | |
| try: | |
| with httpx.Client(timeout=10.0) as client: | |
| client.post(f"{_http_base()}/v1/agent/reset") | |
| except Exception: | |
| pass | |
| st.session_state.messages = [] | |
| st.session_state.state = "muted" | |
| st.session_state.last_audio_b64 = None | |
| st.session_state.show_player = False | |
| st.rerun() | |
| elif event == "provider_change": | |
| provider = value.get("provider") | |
| if provider in {"azure_openai", "local_agent", "foundry_agent"}: | |
| st.session_state.ui_provider = provider | |
| st.rerun() | |
| if st.session_state.show_player and st.session_state.last_audio_b64: | |
| try: | |
| audio_bytes = base64.b64decode(st.session_state.last_audio_b64) | |
| st.audio(audio_bytes, format="audio/wav") | |
| except Exception: | |
| st.session_state.show_player = False | |