voice-agent / ui /streamlit_app.py
Sbboss's picture
RAG, language updates
0b2d478
"""Streamlit UI for the Speech AI Agent (WebSocket streaming)."""
from __future__ import annotations
import base64
import json
import os
import shutil
from datetime import datetime
from pathlib import Path
from typing import Any
import httpx
import streamlit as st
from streamlit import components
WS_URL = os.getenv("SPEECH_AGENT_WS_URL", "").strip()
HTTP_URL = os.getenv("SPEECH_AGENT_HTTP_URL", "").strip()
BASE_DIR = Path(__file__).resolve().parent.parent
DATA_DIR = BASE_DIR / "data"
VECTOR_DIR = BASE_DIR / "data/vector_store"
def _clear_session_data() -> None:
for path in (DATA_DIR, VECTOR_DIR):
if path.exists():
shutil.rmtree(path, ignore_errors=True)
path.mkdir(parents=True, exist_ok=True)
def _http_base() -> str:
if HTTP_URL:
return HTTP_URL.rstrip("/")
if WS_URL:
base = WS_URL.replace("ws://", "http://").replace("wss://", "https://")
if base.endswith("/ws/voice"):
base = base[: -len("/ws/voice")]
return base.rstrip("/")
return "http://localhost:8000"
def _avatar_svg(kind: str, small: bool = False) -> str:
size = 78 if not small else 36
if kind == "agent":
return (
f"<img alt='{kind}' src=\"data:image/svg+xml;utf8,"
f"<svg xmlns='http://www.w3.org/2000/svg' width='{size}' height='{size}' viewBox='0 0 100 100'>"
f"<circle cx='50' cy='50' r='50' fill='%23e7ecff'/>"
f"<rect x='26' y='30' width='48' height='44' rx='10' fill='%2397b3ff'/>"
f"<rect x='34' y='38' width='32' height='18' rx='6' fill='%23ffffff'/>"
f"<circle cx='42' cy='47' r='3' fill='%23333'/>"
f"<circle cx='58' cy='47' r='3' fill='%23333'/>"
f"<rect x='40' y='60' width='20' height='6' rx='3' fill='%238090d6'/>"
f"<rect x='46' y='24' width='8' height='8' rx='2' fill='%238090d6'/>"
f"</svg>\" />"
)
else:
fill = "%23f1c7a9"
hair = "%232f3557"
return (
f"<img alt='{kind}' src=\"data:image/svg+xml;utf8,"
f"<svg xmlns='http://www.w3.org/2000/svg' width='{size}' height='{size}' viewBox='0 0 100 100'>"
f"<circle cx='50' cy='50' r='50' fill='%23e7ecff'/>"
f"<circle cx='50' cy='54' r='28' fill='{fill}'/>"
f"<path d='M22 44 Q50 15 78 44' fill='{hair}'/>"
f"<circle cx='40' cy='55' r='3' fill='%23333'/>"
f"<circle cx='60' cy='55' r='3' fill='%23333'/>"
f"<path d='M42 67 Q50 72 58 67' stroke='%23333' stroke-width='3' fill='none'/>"
f"</svg>\" />"
)
st.set_page_config(page_title="Speech AI Agent", layout="wide")
st.markdown(
"""
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.10/dist/katex.min.css">
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.10/dist/katex.min.js"></script>
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.10/dist/contrib/auto-render.min.js"></script>
<style>
#MainMenu, footer, header {visibility: hidden;}
.block-container {padding: 0;}
iframe {border: none;}
[data-testid="stSidebar"] {display: none;}
</style>
""",
unsafe_allow_html=True,
)
if "state" not in st.session_state:
st.session_state.state = "muted"
if "messages" not in st.session_state:
st.session_state.messages: list[dict[str, Any]] = []
if "last_audio_b64" not in st.session_state:
st.session_state.last_audio_b64 = None
if "show_player" not in st.session_state:
st.session_state.show_player = False
if "ui_provider" not in st.session_state:
st.session_state.ui_provider = "azure_openai"
messages_json = json.dumps(st.session_state.messages)
state_json = json.dumps(st.session_state.state)
provider_json = json.dumps(st.session_state.ui_provider)
ws_url_json = json.dumps(WS_URL if WS_URL else None)
http_url_json = json.dumps(HTTP_URL if HTTP_URL else None)
html = """
<!doctype html>
<html>
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<style>
:root {
--navy-1: #0c1540;
--navy-2: #0f1e57;
--card: #1c2555;
--frost: rgba(240, 244, 255, 0.75);
--text-light: #eef2ff;
--text-muted: #c7d2fe;
--blue: #6f8ef6;
--blue-2: #7aa5ff;
--bubble-light: #f8f9ff;
--shadow: 0 18px 50px rgba(8, 13, 40, 0.45);
}
html, body {
margin: 0;
padding: 0;
width: 100%;
height: 100%;
font-family: 'Poppins', 'Segoe UI', sans-serif;
background: radial-gradient(circle at top, #1a275f 0%, #0b1130 55%, #070a20 100%);
}
.page {
min-height: 100vh;
display: flex;
align-items: center;
justify-content: center;
padding: 32px 16px 48px;
box-sizing: border-box;
}
.card {
width: min(980px, 95vw);
background: linear-gradient(135deg, #1a2353 0%, #1b2559 45%, #1f2a64 100%);
border-radius: 28px;
box-shadow: var(--shadow);
padding: 28px;
}
.header {
display: flex;
align-items: center;
justify-content: space-between;
color: var(--text-light);
margin-bottom: 22px;
gap: 16px;
}
.title {
font-size: 46px;
font-weight: 700;
letter-spacing: 0.3px;
}
.powered {
font-size: 12px;
letter-spacing: 2px;
text-transform: uppercase;
color: #cdd7ff;
}
.provider-toggle {
display: inline-flex;
background: rgba(255, 255, 255, 0.08);
border-radius: 999px;
padding: 4px;
gap: 4px;
}
.provider-btn {
border: none;
background: transparent;
color: #cdd7ff;
padding: 8px 14px;
border-radius: 999px;
font-size: 12px;
letter-spacing: 1.5px;
text-transform: uppercase;
cursor: pointer;
}
.provider-btn.active {
background: #6f8ef6;
color: #fff;
box-shadow: 0 6px 16px rgba(40, 60, 140, 0.35);
}
.header-actions {
display: inline-flex;
align-items: center;
gap: 12px;
flex-wrap: wrap;
}
.reset-btn {
border: none;
background: rgba(255, 255, 255, 0.12);
color: #e8edff;
padding: 8px 14px;
border-radius: 999px;
font-size: 11px;
letter-spacing: 1px;
text-transform: uppercase;
cursor: pointer;
transition: transform 0.2s ease, background 0.2s ease;
}
.reset-btn:hover {
transform: translateY(-1px);
background: rgba(255, 255, 255, 0.2);
}
.upload-btn {
border: none;
background: rgba(255, 255, 255, 0.12);
color: #e8edff;
width: 40px;
height: 40px;
border-radius: 50%;
cursor: pointer;
display: inline-flex;
align-items: center;
justify-content: center;
transition: transform 0.2s ease, background 0.2s ease;
}
.upload-btn:hover {
transform: translateY(-1px);
background: rgba(255, 255, 255, 0.2);
}
.upload-btn.hidden {
display: none;
}
.panel {
background: var(--frost);
border-radius: 24px;
padding: 24px 24px 28px;
backdrop-filter: blur(14px);
display: flex;
flex-direction: column;
gap: 18px;
height: min(680px, 72vh);
overflow: hidden;
}
.top-row {
display: grid;
grid-template-columns: 120px minmax(140px, 1fr) 120px;
align-items: center;
gap: 12px;
}
.top-avatar {
display: flex;
flex-direction: column;
align-items: center;
gap: 6px;
}
.avatar {
width: 86px;
height: 86px;
border-radius: 50%;
display: flex;
align-items: center;
justify-content: center;
background: #e6ebff;
box-shadow: inset 0 0 0 6px #f2f5ff;
}
.avatar img {
width: 78px;
height: 78px;
border-radius: 50%;
}
.status {
font-size: 18px;
color: #6c74a8;
margin-top: 10px;
margin-left: 6px;
}
.waveform {
display: flex;
align-items: center;
justify-content: center;
gap: 4px;
height: 120px;
max-width: 100%;
margin: 0 auto;
overflow: hidden;
}
.wave-bar {
width: 2px;
border-radius: 10px;
background: linear-gradient(180deg, #ff6b6b 0%, #ffb4b4 100%);
height: 12px;
opacity: 0.6;
}
.listening .wave-bar {
animation: pulse 0.5s ease-in-out infinite;
}
.waveform.realtime .wave-bar {
animation: none;
}
@keyframes pulse {
0% { height: 10px; opacity: 0.4; }
50% { height: 70px; opacity: 1; }
100% { height: 16px; opacity: 0.5; }
}
.conversation {
display: flex;
flex-direction: column;
gap: 20px;
flex: 1;
overflow-y: auto;
}
.bubble-row {
display: flex;
align-items: flex-start;
gap: 12px;
width: 100%;
box-sizing: border-box;
}
.bubble-row.user {
justify-content: flex-end;
}
.bubble-row.agent {
justify-content: flex-start;
}
.bubble {
max-width: 60%;
padding: 16px 20px;
border-radius: 16px;
font-size: 18px;
line-height: 1.35;
box-shadow: 0 10px 20px rgba(70, 78, 140, 0.15);
}
.bubble.user {
background: linear-gradient(135deg, #7aa1ff 0%, #6b85ea 100%);
color: #ffffff;
max-width: 60%;
}
.bubble.agent {
background: var(--bubble-light);
color: #3f4a7d;
max-width: 55%;
}
.meta {
font-size: 12px;
color: #7f87b8;
margin-top: 6px;
}
.mute-row {
margin-top: 28px;
display: flex;
justify-content: center;
}
.mute-btn {
width: 108px;
height: 108px;
border-radius: 50%;
border: none;
background: #eef1ff;
display: flex;
align-items: center;
justify-content: center;
box-shadow: 0 10px 28px rgba(77, 86, 150, 0.25);
cursor: pointer;
}
.mute-btn svg {
width: 44px;
height: 44px;
fill: #8a92c9;
}
.mute-btn.listening {
background: #dbe6ff;
}
.mute-btn.listening svg {
fill: #5e75dc;
}
.small-avatar {
width: 44px;
height: 44px;
border-radius: 50%;
background: #e6ebff;
display: flex;
align-items: center;
justify-content: center;
box-shadow: inset 0 0 0 3px #f2f5ff;
}
.small-avatar img {
width: 36px;
height: 36px;
border-radius: 50%;
}
@media (max-width: 900px) {
.card {
padding: 20px;
}
.title {
font-size: 34px;
}
.panel {
height: min(640px, 76vh);
padding: 20px;
}
.top-row {
grid-template-columns: 80px minmax(120px, 1fr) 80px;
}
.avatar {
width: 70px;
height: 70px;
}
.avatar img {
width: 62px;
height: 62px;
}
.waveform {
height: 78px;
}
}
@media (max-width: 640px) {
.page {
padding: 20px 12px 32px;
}
.card {
padding: 16px;
border-radius: 22px;
}
.header {
flex-direction: column;
align-items: flex-start;
gap: 10px;
}
.title {
font-size: 28px;
}
.provider-toggle {
align-self: flex-start;
}
.panel {
height: min(620px, 78vh);
padding: 18px;
}
.top-row {
grid-template-columns: 1fr 1fr;
grid-template-areas:
"left right"
"wave wave";
gap: 10px;
}
.top-avatar.left {
grid-area: left;
justify-self: start;
}
.top-avatar.right {
grid-area: right;
justify-self: end;
}
.waveform {
grid-area: wave;
}
.avatar {
width: 58px;
height: 58px;
}
.avatar img {
width: 50px;
height: 50px;
}
.status {
font-size: 14px;
}
.waveform {
height: 66px;
}
.wave-bar {
width: 2px;
}
.bubble {
font-size: 15px;
padding: 12px 14px;
max-width: 70%;
}
.bubble-row {
gap: 8px;
}
.mute-row {
margin-top: 16px;
}
.mute-btn {
width: 84px;
height: 84px;
}
.mute-btn svg {
width: 34px;
height: 34px;
}
.small-avatar {
width: 36px;
height: 36px;
}
.small-avatar img {
width: 30px;
height: 30px;
}
}
</style>
</head>
<body>
<div class="page">
<div class="card">
<div class="header">
<div class="title">Voice AI Agent</div>
<div class="header-actions">
<div class="provider-toggle">
<button id="provider-llm" class="provider-btn active" data-provider="azure_openai">LLM</button>
<button id="provider-agent" class="provider-btn" data-provider="local_agent">Agent</button>
</div>
<button id="upload-btn" class="upload-btn hidden" title="Upload files">
<svg viewBox="0 0 24 24" width="20" height="20" fill="none" stroke="#e8edff" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
<polyline points="17 8 12 3 7 8"/>
<line x1="12" y1="3" x2="12" y2="15"/>
</svg>
</button>
<button id="reset-session" class="reset-btn">Reset Session Data</button>
</div>
</div>
<div class="panel">
<div class="top-row">
<div class="top-avatar left">
<div class="avatar">__AVATAR_AGENT__</div>
<div id="agent-status" class="status"></div>
</div>
<div id="waveform" class="waveform"></div>
<div class="top-avatar right">
<div class="avatar">__AVATAR_USER__</div>
<div id="status" class="status">Muted</div>
</div>
</div>
<div id="conversation" class="conversation"></div>
<div class="mute-row">
<button id="mute-toggle" class="mute-btn muted" aria-label="Mute toggle">
<svg id="mic-icon" viewBox="0 0 24 24">
<path d="M12 14a3 3 0 0 0 3-3V5a3 3 0 0 0-6 0v6a3 3 0 0 0 3 3zm5-3a5 5 0 0 1-10 0H5a7 7 0 0 0 14 0h-2zm-4 7v3h-2v-3h2z"/>
<path id="mic-slash" d="M4 4l16 16-1.4 1.4L2.6 5.4 4 4z" fill="#8a92c9"/>
</svg>
</button>
</div>
</div>
</div>
</div>
<script>
const INITIAL_MESSAGES = __MESSAGES_JSON__;
const INITIAL_STATE = __STATE_JSON__;
const INITIAL_PROVIDER = __PROVIDER_JSON__;
const ENV_WS_URL = __WS_URL_JSON__;
const ENV_HTTP_URL = __HTTP_URL_JSON__;
const derivedWsUrl = (() => {
let host = '';
let protocol = 'https:';
try {
host = window.parent?.location?.host || window.location.host || '';
protocol = window.parent?.location?.protocol || window.location.protocol || 'https:';
} catch (err) {
// Cross-origin parent access may fail; fall back to referrer.
}
if (!host && document.referrer) {
try {
const ref = new URL(document.referrer);
host = ref.host;
protocol = ref.protocol;
} catch (err) {
// ignore
}
}
const wsProtocol = protocol === 'https:' ? 'wss' : 'ws';
if (!host) {
return 'ws://localhost:8000/ws/voice';
}
if (host.endsWith('.hf.space') || host.endsWith('.hf.co')) {
return `${wsProtocol}://${host}/ws/voice`;
}
const parts = host.split(':');
const hostname = parts[0];
const port = parts[1] || '';
if (hostname === 'localhost' || hostname === '127.0.0.1' || hostname === '0.0.0.0') {
if (port === '8501') {
return `${wsProtocol}://${hostname}:8000/ws/voice`;
}
return `${wsProtocol}://${host}/ws/voice`;
}
return `${wsProtocol}://${host}/ws/voice`;
})();
const WS_URL = ENV_WS_URL || derivedWsUrl;
const HTTP_BASE = (() => {
if (ENV_HTTP_URL) return ENV_HTTP_URL;
const base = WS_URL.replace('wss://', 'https://').replace('ws://', 'http://');
if (base.endsWith('/ws/voice')) {
return base.slice(0, -9);
}
try {
const host = window.parent?.location?.host || window.location.host || '';
const protocol = window.parent?.location?.protocol || window.location.protocol || 'https:';
if (host) {
const parts = host.split(':');
const hostname = parts[0];
const port = parts[1] || '';
if ((hostname === 'localhost' || hostname === '127.0.0.1') && port === '8501') {
return `${protocol}//${hostname}:8000`;
}
return `${protocol}//${host}`.replace(/\\/$/, '');
}
} catch (err) {
// ignore
}
return 'http://localhost:8000';
})();
const waveform = document.getElementById('waveform');
const statusEl = document.getElementById('status');
const agentStatusEl = document.getElementById('agent-status');
const conversation = document.getElementById('conversation');
const muteBtn = document.getElementById('mute-toggle');
const micSlash = document.getElementById('mic-slash');
const providerAgentBtn = document.getElementById('provider-agent');
const providerLlmBtn = document.getElementById('provider-llm');
const resetBtn = document.getElementById('reset-session');
const uploadBtn = document.getElementById('upload-btn');
const providerButtons = [providerLlmBtn, providerAgentBtn];
let state = INITIAL_STATE || 'muted';
let isMuted = state === 'muted';
let ws = null;
let audioContext = null;
let processor = null;
let micStream = null;
let analyser = null;
let freqData = null;
let waveAnimId = null;
let listening = false;
let llmProvider = INITIAL_PROVIDER || 'azure_openai';
let sessionId = null;
let sendEnabled = true;
let segmentInFlight = false;
let lastVoiceAt = 0;
let hadVoice = false;
let framesSent = 0;
let rmsSum = 0;
let rmsCount = 0;
let bargeInMs = 0;
const SILENCE_MS = 1500;
const RMS_THRESHOLD = 0.025;
const BARGEIN_THRESHOLD = 0.03;
const BARGEIN_HOLD_MS = 100;
const MIN_FRAMES_PER_SEGMENT = 3;
const MIN_SPEECH_FRAMES = 4
let currentReplyAudio = null;
const setProvider = (provider, notify = true) => {
const changed = llmProvider !== provider;
llmProvider = provider;
providerButtons.forEach((btn) => {
if (!btn) return;
const isActive = btn.dataset.provider === provider;
btn.classList.toggle('active', isActive);
});
if (uploadBtn) {
uploadBtn.classList.toggle('hidden', provider !== 'local_agent');
}
if (notify) {
sendToStreamlit({ event: 'provider_change', provider });
}
if (changed && listening) {
stopListening();
startListening();
}
};
providerButtons.forEach((btn) => {
if (!btn) return;
btn.addEventListener('click', () => setProvider(btn.dataset.provider));
});
const sendToStreamlit = (value) => {
window.parent.postMessage({
isStreamlitMessage: true,
type: 'streamlit:setComponentValue',
value: value
}, '*');
};
const setFrameHeight = (h) => {
window.parent.postMessage({
isStreamlitMessage: true,
type: 'streamlit:setFrameHeight',
height: h
}, '*');
};
const waveBars = [];
let waveLevel = 0;
const WAVE_BAR_COUNT = 48;
const makeBars = () => {
waveform.innerHTML = '';
waveBars.length = 0;
for (let i = 0; i < WAVE_BAR_COUNT; i++) {
const bar = document.createElement('div');
bar.className = 'wave-bar';
bar.style.animationDelay = `${(i % 8) * 0.12}s`;
bar.style.height = `${12 + (i % 6) * 4}px`;
waveform.appendChild(bar);
waveBars.push(bar);
}
};
const resetWave = () => {
waveBars.forEach((bar, i) => {
bar.style.height = `${12 + (i % 6) * 4}px`;
});
};
const updateWave = (rms) => {
const level = Math.min(1, rms * 3.2);
waveLevel = waveLevel * 0.7 + level * 0.3;
const t = performance.now() / 140;
waveBars.forEach((bar, i) => {
const variance = 0.6 + 0.4 * Math.sin(t + i * 0.6);
const height = 10 + waveLevel * 70 * variance;
bar.style.height = `${height}px`;
});
};
const startWaveLoop = () => {
if (!analyser || !freqData) return;
const render = () => {
analyser.getByteFrequencyData(freqData);
const step = Math.max(1, Math.floor(freqData.length / waveBars.length));
for (let i = 0; i < waveBars.length; i++) {
const v = freqData[i * step] / 255;
const height = 10 + v * 90;
waveBars[i].style.height = `${height}px`;
}
waveAnimId = requestAnimationFrame(render);
};
if (waveAnimId) cancelAnimationFrame(waveAnimId);
waveAnimId = requestAnimationFrame(render);
};
const stopWaveLoop = () => {
if (waveAnimId) {
cancelAnimationFrame(waveAnimId);
waveAnimId = null;
}
};
const renderMessages = (messages) => {
conversation.innerHTML = '';
messages.forEach((msg) => {
const row = document.createElement('div');
row.className = `bubble-row ${msg.role}`;
const avatar = document.createElement('div');
avatar.className = 'small-avatar';
avatar.innerHTML = msg.role === 'user' ? `__AVATAR_USER_SMALL__` : `__AVATAR_AGENT_SMALL__`;
const bubble = document.createElement('div');
bubble.className = `bubble ${msg.role}`;
bubble.textContent = msg.text;
const meta = document.createElement('div');
meta.className = 'meta';
const ts = msg.ts ? new Date(msg.ts) : new Date();
meta.textContent = ts.toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' });
if (msg.role === 'user') {
row.appendChild(meta);
row.appendChild(bubble);
row.appendChild(avatar);
} else {
row.appendChild(avatar);
row.appendChild(bubble);
row.appendChild(meta);
}
conversation.appendChild(row);
renderLatex(bubble);
});
conversation.scrollTop = conversation.scrollHeight;
};
const setState = (next) => {
state = next;
if (state === 'listening') {
statusEl.textContent = 'Listening…';
agentStatusEl.textContent = '';
waveform.classList.add('listening');
waveform.classList.add('realtime');
muteBtn.classList.add('listening');
micSlash.style.display = 'none';
startWaveLoop();
} else if (state === 'thinking') {
statusEl.textContent = '';
agentStatusEl.textContent = 'Thinking…';
waveform.classList.remove('listening');
muteBtn.classList.remove('listening');
micSlash.style.display = 'none';
} else {
statusEl.textContent = 'Muted';
agentStatusEl.textContent = '';
waveform.classList.remove('listening');
waveform.classList.remove('realtime');
muteBtn.classList.remove('listening');
micSlash.style.display = 'block';
stopWaveLoop();
resetWave();
}
};
const downsampleBuffer = (buffer, inputRate, outputRate) => {
if (outputRate === inputRate) return buffer;
const ratio = inputRate / outputRate;
const newLength = Math.round(buffer.length / ratio);
const result = new Float32Array(newLength);
let offsetResult = 0;
let offsetBuffer = 0;
while (offsetResult < result.length) {
const nextOffsetBuffer = Math.round((offsetResult + 1) * ratio);
let accum = 0;
let count = 0;
for (let i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
accum += buffer[i];
count++;
}
result[offsetResult] = accum / count;
offsetResult++;
offsetBuffer = nextOffsetBuffer;
}
return result;
};
const floatTo16BitPCM = (float32) => {
const output = new Int16Array(float32.length);
for (let i = 0; i < float32.length; i++) {
let s = Math.max(-1, Math.min(1, float32[i]));
output[i] = s < 0 ? s * 0x8000 : s * 0x7fff;
}
return output;
};
const stopReplyAudio = () => {
if (!currentReplyAudio) return;
try {
currentReplyAudio.pause();
currentReplyAudio.currentTime = 0;
} catch (err) {
// ignore
}
currentReplyAudio = null;
};
const arrayBufferToBase64 = (buffer) => {
let binary = '';
const bytes = new Uint8Array(buffer);
const len = bytes.byteLength;
for (let i = 0; i < len; i++) {
binary += String.fromCharCode(bytes[i]);
}
return btoa(binary);
};
let katexLoading = false;
const katexQueue = [];
const katexOptions = {
delimiters: [
{ left: '$$', right: '$$', display: true },
{ left: '\\\\[', right: '\\\\]', display: true },
{ left: '\\\\(', right: '\\\\)', display: false },
{ left: '$', right: '$', display: false }
],
throwOnError: false
};
const loadScript = (src, onload) => {
const script = document.createElement('script');
script.src = src;
script.async = true;
script.onload = onload;
document.head.appendChild(script);
};
const ensureKatex = () => {
if (window.renderMathInElement) return;
if (katexLoading) return;
katexLoading = true;
if (!document.getElementById('katex-css')) {
const link = document.createElement('link');
link.id = 'katex-css';
link.rel = 'stylesheet';
link.href = 'https://cdn.jsdelivr.net/npm/katex@0.16.10/dist/katex.min.css';
document.head.appendChild(link);
}
loadScript('https://cdn.jsdelivr.net/npm/katex@0.16.10/dist/katex.min.js', () => {
loadScript('https://cdn.jsdelivr.net/npm/katex@0.16.10/dist/contrib/auto-render.min.js', () => {
katexLoading = false;
if (window.renderMathInElement) {
while (katexQueue.length) {
const node = katexQueue.shift();
try {
window.renderMathInElement(node, katexOptions);
} catch (err) {
// ignore render errors
}
}
}
});
});
};
const renderLatex = (el) => {
if (!el) return;
if (window.renderMathInElement) {
try {
window.renderMathInElement(el, katexOptions);
} catch (err) {
// ignore render errors
}
return;
}
katexQueue.push(el);
ensureKatex();
};
let messages = INITIAL_MESSAGES || [];
const startListening = async () => {
if (listening) return;
listening = true;
framesSent = 0;
setState('listening');
isMuted = false;
sendEnabled = true;
segmentInFlight = false;
lastVoiceAt = performance.now();
hadVoice = false;
ws = new WebSocket(WS_URL);
ws.binaryType = 'arraybuffer';
ws.onopen = async () => {
if (!sessionId && crypto?.randomUUID) {
sessionId = crypto.randomUUID();
}
ws.send(JSON.stringify({ event: 'start', content_type: 'audio/pcm;rate=16000;bits=16;channels=1', return_audio: true, llm_provider: llmProvider, session_id: sessionId }));
micStream = await navigator.mediaDevices.getUserMedia({
audio: {
noiseSuppression: true,
echoCancellation: true,
autoGainControl: true
}
});
audioContext = new (window.AudioContext || window.webkitAudioContext)();
await audioContext.resume();
const source = audioContext.createMediaStreamSource(micStream);
analyser = audioContext.createAnalyser();
analyser.fftSize = 256;
analyser.smoothingTimeConstant = 0.8;
freqData = new Uint8Array(analyser.frequencyBinCount);
source.connect(analyser);
startWaveLoop();
processor = audioContext.createScriptProcessor(4096, 1, 1);
processor.onaudioprocess = (event) => {
if (!listening || !ws || ws.readyState !== WebSocket.OPEN) return;
const input = event.inputBuffer.getChannelData(0);
let rms = 0;
for (let i = 0; i < input.length; i++) {
rms += input[i] * input[i];
}
rms = Math.sqrt(rms / input.length);
rmsSum += rms;
rmsCount += 1;
if (!analyser) {
updateWave(rms);
}
if (rms > BARGEIN_THRESHOLD) {
bargeInMs += 32;
if (bargeInMs >= BARGEIN_HOLD_MS) {
stopReplyAudio();
}
} else {
bargeInMs = 0;
}
if (rms > RMS_THRESHOLD) {
lastVoiceAt = performance.now();
hadVoice = true;
}
if (sendEnabled) {
const downsampled = downsampleBuffer(input, audioContext.sampleRate, 16000);
const pcm = floatTo16BitPCM(downsampled);
ws.send(pcm.buffer);
framesSent += 1;
}
if (
sendEnabled &&
!segmentInFlight &&
hadVoice &&
framesSent >= MIN_FRAMES_PER_SEGMENT &&
performance.now() - lastVoiceAt > SILENCE_MS
) {
segmentInFlight = true;
setState('thinking');
}
};
source.connect(processor);
processor.connect(audioContext.destination);
};
ws.onmessage = (event) => {
if (event.data instanceof ArrayBuffer) {
const audioB64 = arrayBufferToBase64(event.data);
const blob = new Blob([event.data], { type: 'audio/wav' });
const url = URL.createObjectURL(blob);
const audio = new Audio(url);
currentReplyAudio = audio;
audio.onended = () => {
if (currentReplyAudio === audio) currentReplyAudio = null;
};
audio.play().catch(() => {
sendToStreamlit({ event: 'autoplay_failed', audio_b64: audioB64 });
});
sendToStreamlit({ event: 'audio', audio_b64: audioB64 });
return;
}
const data = JSON.parse(event.data);
if (data.event === 'transcript') {
if (data.transcript) {
messages.push({ role: 'user', text: data.transcript, ts: new Date().toISOString() });
renderMessages(messages);
}
}
if (data.event === 'result') {
if (!isMuted) {
setState('listening');
} else {
setState('muted');
}
segmentInFlight = false;
sendEnabled = !isMuted;
hadVoice = false;
lastVoiceAt = performance.now();
framesSent = 0;
rmsSum = 0;
rmsCount = 0;
if (data.transcript) {
const last = messages[messages.length - 1];
if (!last || last.role !== 'user' || last.text !== data.transcript) {
messages.push({ role: 'user', text: data.transcript, ts: new Date().toISOString() });
}
}
if (data.reply_text) {
messages.push({ role: 'agent', text: data.reply_text, ts: new Date().toISOString() });
}
renderMessages(messages);
sendToStreamlit({
event: 'result',
transcript: data.transcript,
reply_text: data.reply_text
});
}
if (data.event === 'error') {
if (!isMuted) {
setState('listening');
} else {
setState('muted');
}
segmentInFlight = false;
sendEnabled = !isMuted;
hadVoice = false;
lastVoiceAt = performance.now();
framesSent = 0;
rmsSum = 0;
rmsCount = 0;
if (isMuted && ws) ws.close();
}
};
};
const stopListening = () => {
if (!listening) return;
setState('muted');
isMuted = true;
sendEnabled = false;
if (processor) processor.disconnect();
if (audioContext) audioContext.close();
if (micStream) micStream.getTracks().forEach((t) => t.stop());
analyser = null;
freqData = null;
if (ws && ws.readyState === WebSocket.OPEN) {
if (framesSent === 0) {
const silence = new Int16Array(320);
ws.send(silence.buffer);
}
const avgRms = rmsCount ? rmsSum / rmsCount : 0;
ws.send(JSON.stringify({ event: 'stop', prompt: 'Answer briefly.', frames_sent: framesSent, avg_rms: avgRms, llm_provider: llmProvider, session_id: sessionId }));
}
listening = false;
};
muteBtn.addEventListener('click', () => {
if (state === 'muted') {
startListening();
} else if (state === 'listening' || state === 'thinking') {
stopListening();
}
});
if (resetBtn) {
resetBtn.addEventListener('click', () => {
if (listening) stopListening();
messages = [];
renderMessages(messages);
setState('muted');
sendToStreamlit({ event: 'reset_session' });
fetch(`${HTTP_BASE}/v1/agent/reset`, { method: 'POST' }).catch(() => {});
});
}
if (uploadBtn) {
const input = document.createElement('input');
input.type = 'file';
input.multiple = true;
input.accept = '.txt,.md,.pdf,.docx,.csv';
input.style.display = 'none';
document.body.appendChild(input);
uploadBtn.addEventListener('click', () => input.click());
input.addEventListener('change', async () => {
if (!input.files || input.files.length === 0) return;
uploadBtn.disabled = true;
uploadBtn.classList.add('loading');
uploadBtn.innerHTML = '<svg viewBox="0 0 50 50" width="20" height="20"><circle cx="25" cy="25" r="20" stroke="#e8edff" stroke-width="4" fill="none" stroke-linecap="round"><animateTransform attributeName="transform" type="rotate" from="0 25 25" to="360 25 25" dur="0.8s" repeatCount="indefinite"/></circle></svg>';
const form = new FormData();
Array.from(input.files).forEach((f) => form.append('files', f));
try {
await fetch(`${HTTP_BASE}/v1/agent/upload`, { method: 'POST', body: form });
} catch (err) {
// ignore
}
uploadBtn.disabled = false;
uploadBtn.classList.remove('loading');
uploadBtn.innerHTML = '<svg viewBox="0 0 24 24" width="20" height="20" fill="none" stroke="#e8edff" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/><polyline points="17 8 12 3 7 8"/><line x1="12" y1="3" x2="12" y2="15"/></svg>';
input.value = '';
});
}
makeBars();
renderMessages(INITIAL_MESSAGES || []);
setState(state);
setProvider(llmProvider, false);
setFrameHeight(980);
</script>
</body>
</html>
"""
html = (
html.replace("__MESSAGES_JSON__", messages_json)
.replace("__STATE_JSON__", state_json)
.replace("__PROVIDER_JSON__", provider_json)
.replace("__WS_URL_JSON__", ws_url_json)
.replace("__HTTP_URL_JSON__", http_url_json)
.replace("__AVATAR_USER__", _avatar_svg("user"))
.replace("__AVATAR_AGENT__", _avatar_svg("agent"))
.replace("__AVATAR_USER_SMALL__", _avatar_svg("user", small=True))
.replace("__AVATAR_AGENT_SMALL__", _avatar_svg("agent", small=True))
)
html = html.replace("{{", "{").replace("}}", "}")
value = components.v1.html(html, height=980, scrolling=False)
if isinstance(value, dict):
event = value.get("event")
if event == "result":
transcript = value.get("transcript")
reply_text = value.get("reply_text")
now_ts = datetime.utcnow().isoformat()
if transcript:
st.session_state.messages.append(
{"role": "user", "text": transcript, "ts": now_ts}
)
if reply_text:
st.session_state.messages.append(
{"role": "agent", "text": reply_text, "ts": now_ts}
)
st.session_state.state = "muted"
elif event == "autoplay_failed":
st.session_state.last_audio_b64 = value.get("audio_b64")
st.session_state.show_player = True
elif event == "audio":
st.session_state.last_audio_b64 = value.get("audio_b64")
elif event == "reset_session":
_clear_session_data()
try:
with httpx.Client(timeout=10.0) as client:
client.post(f"{_http_base()}/v1/agent/reset")
except Exception:
pass
st.session_state.messages = []
st.session_state.state = "muted"
st.session_state.last_audio_b64 = None
st.session_state.show_player = False
st.rerun()
elif event == "provider_change":
provider = value.get("provider")
if provider in {"azure_openai", "local_agent", "foundry_agent"}:
st.session_state.ui_provider = provider
st.rerun()
if st.session_state.show_player and st.session_state.last_audio_b64:
try:
audio_bytes = base64.b64decode(st.session_state.last_audio_b64)
st.audio(audio_bytes, format="audio/wav")
except Exception:
st.session_state.show_player = False