sitsope's picture
add changes
76ccddb
"""
app.py — Interface Gradio pour le Voice Agent LiveKit
Démarre :
1. Le worker LiveKit agent (subprocess)
2. L'interface Gradio avec UI WebRTC
"""
import os
import sys
import json
import subprocess
import threading
import gradio as gr
import uvicorn
from fastapi import FastAPI
from fastapi.responses import HTMLResponse
from pathlib import Path
from dotenv import load_dotenv
# Ajouter le dossier agent au path
sys.path.insert(0, str(Path(__file__).parent / "agent"))
load_dotenv()
# ──────────────────────────────────────────────
# Démarrage de l'agent en arrière-plan
# ──────────────────────────────────────────────
_agent_process: subprocess.Popen | None = None
def start_agent_worker():
"""Lance le worker LiveKit dans un sous-processus."""
global _agent_process
agent_path = Path(__file__).parent / "agent" / "voice_agent.py"
_agent_process = subprocess.Popen(
[sys.executable, str(agent_path), "start"],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
)
print(f"[Agent] Worker démarré — PID {_agent_process.pid}")
for line in _agent_process.stdout:
print(f"[Agent] {line}", end="")
def launch_agent_thread():
t = threading.Thread(target=start_agent_worker, daemon=True)
t.start()
# ──────────────────────────────────────────────
# Génération de token LiveKit
# ──────────────────────────────────────────────
def get_token(room_name: str = "") -> str:
try:
from token_server import get_connection_details
details = get_connection_details(room_name=room_name or None)
return json.dumps(details, indent=2)
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
# ──────────────────────────────────────────────
# HTML de l'interface LiveKit WebRTC
# ──────────────────────────────────────────────
def build_livekit_html() -> str:
"""Génère le HTML en injectant URL + token depuis le .env."""
try:
from token_server import get_connection_details
details = get_connection_details()
injected_url = details["url"]
injected_token = details["token"]
auto_connect = "true"
except Exception:
injected_url = os.getenv("LIVEKIT_URL", "")
injected_token = ""
auto_connect = "false"
return LIVEKIT_CLIENT_HTML_TEMPLATE.replace("__LIVEKIT_URL__", injected_url) \
.replace("__LIVEKIT_TOKEN__", injected_token) \
.replace("__AUTO_CONNECT__", auto_connect)
LIVEKIT_CLIENT_HTML_TEMPLATE = """
<!DOCTYPE html>
<html lang="fr">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Voice Agent</title>
<script src="https://cdn.jsdelivr.net/npm/livekit-client@2/dist/livekit-client.umd.min.js"></script>
<style>
* { box-sizing: border-box; margin: 0; padding: 0; }
body {
font-family: 'Segoe UI', system-ui, sans-serif;
background: #0f0f13;
color: #e8e8f0;
min-height: 100vh;
display: flex;
align-items: center;
justify-content: center;
}
.container {
width: 100%;
max-width: 480px;
padding: 2rem;
}
.card {
background: #1a1a24;
border: 1px solid #2a2a3a;
border-radius: 20px;
padding: 2rem;
text-align: center;
}
h2 {
font-size: 1.4rem;
font-weight: 600;
margin-bottom: 0.4rem;
color: #fff;
}
.subtitle { color: #7070a0; font-size: 0.85rem; margin-bottom: 2rem; }
/* Visualiseur audio */
.audio-viz {
display: flex;
align-items: center;
justify-content: center;
gap: 5px;
height: 80px;
margin: 1.5rem 0;
}
.bar {
width: 5px;
border-radius: 3px;
background: #4c4cff;
transition: height 0.1s ease;
height: 8px;
}
.bar.active { animation: pulse 0.8s ease-in-out infinite alternate; }
@keyframes pulse {
from { height: 8px; background: #4c4cff; }
to { height: 48px; background: #7c7cff; }
}
.bar:nth-child(2) { animation-delay: 0.1s; }
.bar:nth-child(3) { animation-delay: 0.2s; }
.bar:nth-child(4) { animation-delay: 0.05s; }
.bar:nth-child(5) { animation-delay: 0.15s; }
.bar:nth-child(6) { animation-delay: 0.25s; }
.bar:nth-child(7) { animation-delay: 0.08s; }
.bar:nth-child(8) { animation-delay: 0.18s; }
/* Status */
.status-badge {
display: inline-flex;
align-items: center;
gap: 6px;
padding: 6px 14px;
border-radius: 999px;
font-size: 0.8rem;
font-weight: 500;
margin-bottom: 1.5rem;
background: #12121c;
border: 1px solid #2a2a3a;
}
.dot {
width: 8px; height: 8px;
border-radius: 50%;
background: #444;
}
.dot.connected { background: #22c55e; box-shadow: 0 0 6px #22c55e; }
.dot.connecting { background: #f59e0b; animation: blink 1s infinite; }
.dot.error { background: #ef4444; }
@keyframes blink { 0%,100%{opacity:1} 50%{opacity:0.3} }
/* Bouton principal */
.btn-main {
width: 100%;
padding: 14px;
border-radius: 12px;
border: none;
font-size: 1rem;
font-weight: 600;
cursor: pointer;
transition: all 0.2s;
margin-bottom: 0.8rem;
}
.btn-connect {
background: linear-gradient(135deg, #4c4cff, #7c4cff);
color: #fff;
}
.btn-connect:hover { filter: brightness(1.1); transform: translateY(-1px); }
.btn-disconnect {
background: #2a1a1a;
color: #ff7070;
border: 1px solid #5a2a2a;
}
.btn-disconnect:hover { background: #3a2020; }
.btn-main:disabled { opacity: 0.4; cursor: not-allowed; transform: none; }
/* Mute toggle */
.btn-mute {
width: 100%;
padding: 10px;
border-radius: 10px;
border: 1px solid #2a2a3a;
background: #12121c;
color: #a0a0c0;
font-size: 0.9rem;
cursor: pointer;
transition: all 0.2s;
}
.btn-mute:hover { border-color: #4c4cff; color: #fff; }
.btn-mute.muted { border-color: #ef4444; color: #ef4444; }
/* Transcript */
.transcript {
margin-top: 1.5rem;
background: #12121c;
border: 1px solid #2a2a3a;
border-radius: 12px;
padding: 1rem;
max-height: 180px;
overflow-y: auto;
text-align: left;
font-size: 0.82rem;
color: #8888aa;
line-height: 1.5;
}
.transcript:empty::before { content: "La transcription apparaîtra ici…"; }
.transcript .msg { margin-bottom: 0.5rem; }
.transcript .msg.user { color: #7c7cff; }
.transcript .msg.agent { color: #e8e8f0; }
/* Config panel */
.config { margin-bottom: 1.5rem; }
.config input {
width: 100%;
padding: 10px 14px;
background: #12121c;
border: 1px solid #2a2a3a;
border-radius: 10px;
color: #e8e8f0;
font-size: 0.85rem;
outline: none;
transition: border-color 0.2s;
}
.config input:focus { border-color: #4c4cff; }
.config label { display: block; font-size: 0.78rem; color: #7070a0; margin-bottom: 6px; }
</style>
</head>
<body>
<div class="container">
<div class="card">
<h2>🎙️ Voice Agent</h2>
<p class="subtitle"> LiveKit </p>
<div id="statusBadge" class="status-badge">
<span class="dot" id="dot"></span>
<span id="statusText">Déconnecté</span>
</div>
<div class="audio-viz">
<div class="bar" id="b1"></div>
<div class="bar" id="b2"></div>
<div class="bar" id="b3"></div>
<div class="bar" id="b4"></div>
<div class="bar" id="b5"></div>
<div class="bar" id="b6"></div>
<div class="bar" id="b7"></div>
<div class="bar" id="b8"></div>
</div>
<input type="hidden" id="livekitUrl" value="__LIVEKIT_URL__" />
<input type="hidden" id="livekitToken" value="__LIVEKIT_TOKEN__" />
<button id="btnConnect" class="btn-main btn-connect">
Se connecter
</button>
<button id="btnDisconnect" class="btn-main btn-disconnect" style="display:none">
⏹ Déconnecter
</button>
<button id="btnMute" class="btn-mute" style="display:none">
🎤 Micro actif
</button>
<div class="transcript" id="transcript"></div>
</div>
</div>
<script>
(function() {
const { Room, RoomEvent, Track, TrackEvent, createLocalAudioTrack } = LivekitClient;
let room = null;
let localAudio = null;
let isMuted = false;
function setStatus(state, text) {
document.getElementById('dot').className = 'dot ' + state;
document.getElementById('statusText').textContent = text;
}
function addTranscript(role, text) {
const t = document.getElementById('transcript');
const div = document.createElement('div');
div.className = 'msg ' + role;
div.textContent = (role === 'user' ? '👤 ' : '🤖 ') + text;
t.appendChild(div);
t.scrollTop = t.scrollHeight;
}
function startAudioAnimation() {
document.querySelectorAll('.bar').forEach(b => b.classList.add('active'));
}
function stopAudioAnimation() {
document.querySelectorAll('.bar').forEach(b => b.classList.remove('active'));
}
async function connectAgent() {
// Vérifier contexte sécurisé (HTTPS ou localhost)
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
setStatus('error', 'Micro indisponible');
addTranscript('agent', '⚠️ Micro inaccessible : ouvrez la page en HTTPS (pas en HTTP).');
return;
}
const url = document.getElementById('livekitUrl').value.trim();
const token = document.getElementById('livekitToken').value.trim();
if (!url || !token) {
setStatus('error', 'Config manquante');
return;
}
setStatus('connecting', 'Connexion…');
document.getElementById('btnConnect').disabled = true;
try {
room = new Room({ audioCaptureDefaults: { echoCancellation: true, noiseSuppression: true } });
room.on(RoomEvent.Connected, () => {
setStatus('connected', 'Connecté');
document.getElementById('btnConnect').style.display = 'none';
document.getElementById('btnDisconnect').style.display = 'block';
document.getElementById('btnMute').style.display = 'block';
addTranscript('agent', 'Connexion établie — parlez !');
});
room.on(RoomEvent.Disconnected, () => {
setStatus('', 'Déconnecté');
document.getElementById('btnConnect').style.display = 'block';
document.getElementById('btnConnect').disabled = false;
document.getElementById('btnDisconnect').style.display = 'none';
document.getElementById('btnMute').style.display = 'none';
stopAudioAnimation();
});
room.on(RoomEvent.TrackSubscribed, (track) => {
if (track.kind === Track.Kind.Audio) {
document.body.appendChild(track.attach());
startAudioAnimation();
}
});
room.on(RoomEvent.TrackUnsubscribed, (track) => {
if (track.kind === Track.Kind.Audio) {
track.detach().forEach(el => el.remove());
stopAudioAnimation();
}
});
room.on(RoomEvent.DataReceived, (data) => {
try {
const msg = JSON.parse(new TextDecoder().decode(data));
if (msg.type === 'transcript') addTranscript(msg.role || 'agent', msg.text);
} catch (_) {}
});
await room.connect(url, token);
localAudio = await createLocalAudioTrack({ echoCancellation: true, noiseSuppression: true, autoGainControl: true });
await room.localParticipant.publishTrack(localAudio);
} catch (err) {
setStatus('error', 'Erreur : ' + err.message);
document.getElementById('btnConnect').disabled = false;
}
}
async function disconnectAgent() {
if (room) { await room.disconnect(); room = null; localAudio = null; }
}
function toggleMute() {
if (!localAudio) return;
isMuted = !isMuted;
localAudio.mute(isMuted);
const btn = document.getElementById('btnMute');
btn.textContent = isMuted ? '🔇 Micro coupé' : '🎤 Micro actif';
btn.classList.toggle('muted', isMuted);
}
// Attacher les événements sur les boutons (pas d'onclick inline)
document.getElementById('btnConnect').addEventListener('click', connectAgent);
document.getElementById('btnDisconnect').addEventListener('click', disconnectAgent);
document.getElementById('btnMute').addEventListener('click', toggleMute);
// Pas d'auto-connexion — l'utilisateur clique sur "Se connecter"
})();
</script>
</body>
</html>
"""
# ──────────────────────────────────────────────
# Interface Gradio
# ──────────────────────────────────────────────
def build_gradio_app() -> gr.Blocks:
with gr.Blocks(title="Voice Agent LiveKit") as demo:
gr.Markdown(
"""
# 🎙️ Voice Agent — LiveKit + Gradio
Assistant vocal propulsé par **LiveKit** et **Whisper**.
"""
)
with gr.Tabs():
# ── Onglet Agent ──────────────────────────────────────────────
with gr.TabItem("🔊 Agent Vocal"):
gr.HTML(
"<iframe src='/voice-ui' "
"style='width:100%;height:700px;border:none;border-radius:12px;' "
"allow='microphone; autoplay; clipboard-write' "
"allowtransparency='true'></iframe>"
)
# ── Onglet Token ──────────────────────────────────────────────
with gr.TabItem("🔑 Générer un Token"):
gr.Markdown(
"Générez un token LiveKit depuis le serveur. "
"Copiez `url` et `token` dans l'interface ci-dessus."
)
room_input = gr.Textbox(
label="Nom de la room (optionnel)",
placeholder="voice-room-demo",
max_lines=1,
)
gen_btn = gr.Button("Générer le Token", variant="primary")
token_output = gr.Code(language="json", label="Détails de connexion")
gen_btn.click(fn=get_token, inputs=[room_input], outputs=[token_output])
# Pied de page
gr.Markdown(
"<center style='color:#555;font-size:0.8rem;margin-top:1rem'>"
"LiveKit Agents · OpenAI GPT-4o-mini · Silero VAD"
"</center>"
)
return demo
# ──────────────────────────────────────────────
# Main
# ──────────────────────────────────────────────
if __name__ == "__main__":
import argparse
import uvicorn
from fastapi import FastAPI
from fastapi.responses import HTMLResponse
parser = argparse.ArgumentParser()
parser.add_argument("--no-agent", action="store_true", help="Ne pas lancer l'agent")
parser.add_argument("--port", type=int, default=7860)
args = parser.parse_args()
# Lancer le worker agent en arrière-plan
if not args.no_agent:
print("[Main] Démarrage du worker LiveKit Agent…")
launch_agent_thread()
# Construire l'app FastAPI et monter Gradio dessus
fastapi_app = FastAPI()
from fastapi.responses import Response
@fastapi_app.get("/voice-ui")
async def voice_ui():
html = build_livekit_html()
return Response(
content=html,
media_type="text/html",
headers={
"Permissions-Policy": "microphone=*, camera=*",
"Cross-Origin-Opener-Policy": "same-origin-allow-popups",
},
)
gradio_demo = build_gradio_app()
app = gr.mount_gradio_app(fastapi_app, gradio_demo, path="/")
print(f"[Main] App lancée sur http://localhost:{args.port}")
uvicorn.run(app, host="0.0.0.0", port=args.port)