Update index.html
Browse files- index.html +314 -111
index.html
CHANGED
|
@@ -1,13 +1,73 @@
|
|
| 1 |
<!DOCTYPE html>
|
| 2 |
-
<html>
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8" />
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
| 6 |
-
<title>
|
| 7 |
<style>
|
| 8 |
-
@import url("https://fonts.googleapis.com/css2?family=Source+Sans+3:wght@300;400;600;700&display=swap");
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
</style>
|
|
|
|
| 11 |
<script src="css/tailwind-3.4.17.js"></script>
|
| 12 |
<script type="module">
|
| 13 |
const MODEL_ID = "moshi_1b_en_fr_q4k";
|
|
@@ -26,56 +86,93 @@
|
|
| 26 |
let source = null;
|
| 27 |
let modelInitialized = false;
|
| 28 |
let pendingStart = false;
|
| 29 |
-
|
| 30 |
-
// Performance tracking
|
| 31 |
let audioChunksProcessed = 0;
|
| 32 |
let sessionStartTime = 0;
|
| 33 |
|
| 34 |
function updateStatusDiv(message) {
|
| 35 |
-
document.querySelector("#status-div")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
}
|
| 37 |
|
| 38 |
function updateDiagnostics() {
|
| 39 |
const diagnostics = document.querySelector("#diagnostics");
|
| 40 |
if (!diagnostics) return;
|
| 41 |
-
|
| 42 |
-
const cpuCount = navigator.hardwareConcurrency ||
|
| 43 |
-
|
| 44 |
-
// Only update metrics when recording, otherwise show final values
|
| 45 |
if (isRecording && sessionStartTime) {
|
| 46 |
-
// Calculate real-time factor (audio processed / wall clock time)
|
| 47 |
-
// >1 = faster than real-time, <1 = slower than real-time
|
| 48 |
const audioProcessed = audioChunksProcessed * (1024 / 24000);
|
| 49 |
const audioSessionDuration = (Date.now() - sessionStartTime) / 1000;
|
| 50 |
const realTimeFactor = audioSessionDuration > 0 ? (audioProcessed / audioSessionDuration) : 0;
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
let factorColor = '';
|
| 54 |
if (realTimeFactor >= 0.95) {
|
| 55 |
-
factorColor =
|
| 56 |
} else if (realTimeFactor >= 0.8) {
|
| 57 |
-
factorColor =
|
| 58 |
}
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
} else if (!sessionStartTime) {
|
| 65 |
-
diagnostics.innerHTML = `
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
}
|
| 68 |
|
| 69 |
-
window.addEventListener(
|
| 70 |
setInterval(updateDiagnostics, 200);
|
| 71 |
|
| 72 |
function initializeModel() {
|
| 73 |
if (modelInitialized) return;
|
| 74 |
-
|
| 75 |
const button = document.querySelector("#speech-button");
|
| 76 |
button.disabled = true;
|
| 77 |
-
button.className =
|
| 78 |
-
|
|
|
|
| 79 |
moshiWorker.postMessage({
|
| 80 |
command: "initialize",
|
| 81 |
weightsURL: WEIGHTS_URL,
|
|
@@ -86,28 +183,27 @@
|
|
| 86 |
});
|
| 87 |
}
|
| 88 |
|
| 89 |
-
// Handle messages from worker
|
| 90 |
moshiWorker.addEventListener("message", async (event) => {
|
| 91 |
const data = event.data;
|
| 92 |
if (data.status === "model_ready") {
|
| 93 |
modelInitialized = true;
|
| 94 |
-
updateStatusDiv("Model loaded
|
| 95 |
-
|
| 96 |
const button = document.querySelector("#speech-button");
|
| 97 |
button.disabled = false;
|
| 98 |
-
button.className =
|
| 99 |
-
|
|
|
|
| 100 |
if (pendingStart) {
|
| 101 |
pendingStart = false;
|
| 102 |
await startRecording();
|
| 103 |
}
|
| 104 |
} else if (data.status === "streaming") {
|
| 105 |
-
// Add new word to transcription in real-time
|
| 106 |
const outputDiv = document.querySelector("#output-generation");
|
| 107 |
const placeholder = document.querySelector("#output-placeholder");
|
| 108 |
-
|
| 109 |
if (placeholder) placeholder.hidden = true;
|
| 110 |
-
|
| 111 |
if (outputDiv.textContent) {
|
| 112 |
outputDiv.textContent += " " + data.word;
|
| 113 |
} else {
|
|
@@ -127,11 +223,10 @@
|
|
| 127 |
function updateStatus(data) {
|
| 128 |
const { status, message, word } = data;
|
| 129 |
const outputDiv = document.querySelector("#output-generation");
|
| 130 |
-
|
| 131 |
if (status === "loading" || status === "decoding") {
|
| 132 |
updateStatusDiv(message || (status === "loading" ? "Loading..." : "Decoding..."));
|
| 133 |
} else if (status === "streaming") {
|
| 134 |
-
// Add new word to the transcription in real-time
|
| 135 |
if (outputDiv.textContent) {
|
| 136 |
outputDiv.textContent += " " + word;
|
| 137 |
} else {
|
|
@@ -147,37 +242,37 @@
|
|
| 147 |
try {
|
| 148 |
audioStream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
| 149 |
updateStatusDiv("Microphone access granted");
|
| 150 |
-
|
| 151 |
audioContext = new AudioContext({ sampleRate: 24000 });
|
| 152 |
source = audioContext.createMediaStreamSource(audioStream);
|
| 153 |
-
|
| 154 |
processor = audioContext.createScriptProcessor(1024, 1, 1);
|
| 155 |
-
|
| 156 |
-
processor.onaudioprocess = function(event) {
|
| 157 |
if (!isRecording || !modelInitialized) return;
|
| 158 |
-
|
| 159 |
const inputBuffer = event.inputBuffer;
|
| 160 |
const inputData = inputBuffer.getChannelData(0);
|
| 161 |
-
|
| 162 |
-
// Send audio chunk to worker
|
| 163 |
const audioChunk = new Float32Array(inputData);
|
| 164 |
-
moshiWorker.postMessage(
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
|
|
|
|
|
|
|
|
|
| 168 |
};
|
| 169 |
-
|
| 170 |
source.connect(processor);
|
| 171 |
processor.connect(audioContext.destination);
|
| 172 |
-
|
| 173 |
} catch (error) {
|
| 174 |
updateStatusDiv("Microphone access denied: " + error.message);
|
| 175 |
throw error;
|
| 176 |
}
|
| 177 |
}
|
| 178 |
-
|
| 179 |
function stopMicrophone() {
|
| 180 |
-
// Disconnect audio nodes
|
| 181 |
if (processor) {
|
| 182 |
processor.disconnect();
|
| 183 |
processor = null;
|
|
@@ -190,42 +285,38 @@
|
|
| 190 |
audioContext.close();
|
| 191 |
audioContext = null;
|
| 192 |
}
|
| 193 |
-
|
| 194 |
-
// Stop media stream
|
| 195 |
if (audioStream) {
|
| 196 |
-
audioStream.getTracks().forEach(track => track.stop());
|
| 197 |
audioStream = null;
|
| 198 |
}
|
| 199 |
-
|
| 200 |
updateStatusDiv("Microphone stopped");
|
| 201 |
}
|
| 202 |
|
| 203 |
async function startRecording() {
|
| 204 |
const button = document.querySelector("#speech-button");
|
| 205 |
-
|
| 206 |
try {
|
| 207 |
updateStatusDiv("Requesting microphone access...");
|
| 208 |
await startMicrophone();
|
| 209 |
-
|
| 210 |
-
// Reset performance counters
|
| 211 |
audioChunksProcessed = 0;
|
| 212 |
sessionStartTime = Date.now();
|
| 213 |
-
|
| 214 |
-
// Start streaming session
|
| 215 |
moshiWorker.postMessage({ command: "start_stream" });
|
| 216 |
-
|
| 217 |
isRecording = true;
|
| 218 |
-
button.textContent = "Stop
|
| 219 |
-
button.className =
|
|
|
|
| 220 |
updateStatusDiv("Listening...");
|
| 221 |
-
|
| 222 |
-
// Clear previous transcription
|
| 223 |
document.querySelector("#output-generation").textContent = "";
|
| 224 |
document.querySelector("#output-generation").hidden = true;
|
| 225 |
document.querySelector("#output-placeholder").hidden = true;
|
| 226 |
-
|
| 227 |
} catch (error) {
|
| 228 |
-
console.error(
|
| 229 |
updateStatusDiv("Error: " + error.message);
|
| 230 |
pendingStart = false;
|
| 231 |
}
|
|
@@ -233,67 +324,179 @@
|
|
| 233 |
|
| 234 |
document.querySelector("#speech-button").addEventListener("click", async () => {
|
| 235 |
const button = document.querySelector("#speech-button");
|
| 236 |
-
|
| 237 |
if (!isRecording) {
|
| 238 |
-
// Check if model is ready
|
| 239 |
if (!modelInitialized) {
|
| 240 |
pendingStart = true;
|
| 241 |
initializeModel();
|
| 242 |
return;
|
| 243 |
}
|
| 244 |
-
|
| 245 |
await startRecording();
|
| 246 |
} else {
|
| 247 |
stopMicrophone();
|
| 248 |
-
|
| 249 |
-
// End streaming session
|
| 250 |
moshiWorker.postMessage({ command: "stop_stream" });
|
| 251 |
-
|
| 252 |
isRecording = false;
|
| 253 |
-
button.textContent = "Start
|
| 254 |
-
button.className =
|
|
|
|
| 255 |
updateStatusDiv("Ready to start");
|
| 256 |
}
|
| 257 |
});
|
| 258 |
</script>
|
| 259 |
</head>
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
<
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
It understands English and French, and uses the
|
| 268 |
-
<a href="https://huggingface.co/kyutai/stt-1b-en_fr" target="_blank" class="underline hover:text-blue-600">Kyutai STT model</a>
|
| 269 |
-
together with a WASM runtime built in
|
| 270 |
-
<a href="https://github.com/huggingface/candle/" target="_blank" class="underline hover:text-blue-600">Candle</a>.
|
| 271 |
-
</p>
|
| 272 |
-
</div>
|
| 273 |
-
|
| 274 |
-
<div>
|
| 275 |
-
<button id="speech-button" class="bg-gray-700 hover:bg-gray-800 text-white font-normal py-2 px-4 rounded">
|
| 276 |
-
Start Speech
|
| 277 |
-
</button>
|
| 278 |
-
<div class="mt-2 text-gray-600 text-sm space-y-1">
|
| 279 |
-
<div>Status: <span id="status-div">Click "Start Speech" to begin</span></div>
|
| 280 |
-
<div id="diagnostics">CPUs: -, Real-time factor: 0.00x, Duration: 0.0s</div>
|
| 281 |
</div>
|
| 282 |
-
</div>
|
| 283 |
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
</div>
|
| 290 |
-
</
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 297 |
</main>
|
| 298 |
</body>
|
| 299 |
</html>
|
|
|
|
| 1 |
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8" />
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
| 6 |
+
<title>Eburon ASR • Streaming Speech Recognition</title>
|
| 7 |
<style>
|
| 8 |
+
@import url("https://fonts.googleapis.com/css2?family=Source+Sans+3:wght@300;400;600;700;800&display=swap");
|
| 9 |
+
|
| 10 |
+
html,
|
| 11 |
+
body {
|
| 12 |
+
font-family: "Source Sans 3", system-ui, -apple-system, Segoe UI, Roboto, sans-serif;
|
| 13 |
+
min-height: 100%;
|
| 14 |
+
background:
|
| 15 |
+
radial-gradient(circle at top left, rgba(56, 189, 248, 0.12), transparent 28%),
|
| 16 |
+
radial-gradient(circle at top right, rgba(139, 92, 246, 0.14), transparent 24%),
|
| 17 |
+
linear-gradient(180deg, #07111f 0%, #0a1628 42%, #08111e 100%);
|
| 18 |
+
color: #e5eefb;
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
.glass {
|
| 22 |
+
background: rgba(10, 18, 34, 0.72);
|
| 23 |
+
border: 1px solid rgba(148, 163, 184, 0.16);
|
| 24 |
+
box-shadow:
|
| 25 |
+
0 10px 30px rgba(0, 0, 0, 0.28),
|
| 26 |
+
inset 0 1px 0 rgba(255, 255, 255, 0.03);
|
| 27 |
+
backdrop-filter: blur(14px);
|
| 28 |
+
-webkit-backdrop-filter: blur(14px);
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
.brand-glow {
|
| 32 |
+
box-shadow:
|
| 33 |
+
0 0 0 1px rgba(56, 189, 248, 0.15),
|
| 34 |
+
0 0 40px rgba(56, 189, 248, 0.08),
|
| 35 |
+
0 0 80px rgba(139, 92, 246, 0.06);
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
.brand-gradient {
|
| 39 |
+
background: linear-gradient(135deg, #38bdf8 0%, #818cf8 50%, #c084fc 100%);
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
.text-brand {
|
| 43 |
+
background: linear-gradient(135deg, #e0f2fe 0%, #bfdbfe 35%, #c4b5fd 100%);
|
| 44 |
+
-webkit-background-clip: text;
|
| 45 |
+
-webkit-text-fill-color: transparent;
|
| 46 |
+
background-clip: text;
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
.status-dot {
|
| 50 |
+
width: 0.65rem;
|
| 51 |
+
height: 0.65rem;
|
| 52 |
+
border-radius: 9999px;
|
| 53 |
+
display: inline-block;
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
.scroll-soft::-webkit-scrollbar {
|
| 57 |
+
width: 10px;
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
.scroll-soft::-webkit-scrollbar-track {
|
| 61 |
+
background: rgba(255, 255, 255, 0.03);
|
| 62 |
+
border-radius: 9999px;
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
.scroll-soft::-webkit-scrollbar-thumb {
|
| 66 |
+
background: rgba(148, 163, 184, 0.26);
|
| 67 |
+
border-radius: 9999px;
|
| 68 |
+
}
|
| 69 |
</style>
|
| 70 |
+
|
| 71 |
<script src="css/tailwind-3.4.17.js"></script>
|
| 72 |
<script type="module">
|
| 73 |
const MODEL_ID = "moshi_1b_en_fr_q4k";
|
|
|
|
| 86 |
let source = null;
|
| 87 |
let modelInitialized = false;
|
| 88 |
let pendingStart = false;
|
| 89 |
+
|
|
|
|
| 90 |
let audioChunksProcessed = 0;
|
| 91 |
let sessionStartTime = 0;
|
| 92 |
|
| 93 |
function updateStatusDiv(message) {
|
| 94 |
+
const statusEl = document.querySelector("#status-div");
|
| 95 |
+
statusEl.textContent = message;
|
| 96 |
+
|
| 97 |
+
const liveBadge = document.querySelector("#live-indicator");
|
| 98 |
+
if (!liveBadge) return;
|
| 99 |
+
|
| 100 |
+
if (message.toLowerCase().includes("listening")) {
|
| 101 |
+
liveBadge.className = "status-dot bg-emerald-400 animate-pulse";
|
| 102 |
+
} else if (message.toLowerCase().includes("loading") || message.toLowerCase().includes("requesting")) {
|
| 103 |
+
liveBadge.className = "status-dot bg-amber-400 animate-pulse";
|
| 104 |
+
} else if (message.toLowerCase().includes("error") || message.toLowerCase().includes("denied")) {
|
| 105 |
+
liveBadge.className = "status-dot bg-rose-400";
|
| 106 |
+
} else {
|
| 107 |
+
liveBadge.className = "status-dot bg-sky-400";
|
| 108 |
+
}
|
| 109 |
}
|
| 110 |
|
| 111 |
function updateDiagnostics() {
|
| 112 |
const diagnostics = document.querySelector("#diagnostics");
|
| 113 |
if (!diagnostics) return;
|
| 114 |
+
|
| 115 |
+
const cpuCount = navigator.hardwareConcurrency || "unknown";
|
| 116 |
+
|
|
|
|
| 117 |
if (isRecording && sessionStartTime) {
|
|
|
|
|
|
|
| 118 |
const audioProcessed = audioChunksProcessed * (1024 / 24000);
|
| 119 |
const audioSessionDuration = (Date.now() - sessionStartTime) / 1000;
|
| 120 |
const realTimeFactor = audioSessionDuration > 0 ? (audioProcessed / audioSessionDuration) : 0;
|
| 121 |
+
|
| 122 |
+
let factorColor = "text-rose-300";
|
|
|
|
| 123 |
if (realTimeFactor >= 0.95) {
|
| 124 |
+
factorColor = "text-emerald-300";
|
| 125 |
} else if (realTimeFactor >= 0.8) {
|
| 126 |
+
factorColor = "text-amber-300";
|
| 127 |
}
|
| 128 |
+
|
| 129 |
+
diagnostics.innerHTML = `
|
| 130 |
+
<div class="grid grid-cols-1 sm:grid-cols-3 gap-3">
|
| 131 |
+
<div class="rounded-xl bg-white/5 border border-white/10 px-4 py-3">
|
| 132 |
+
<div class="text-[11px] uppercase tracking-[0.18em] text-slate-400">CPU Threads</div>
|
| 133 |
+
<div class="text-lg font-semibold text-slate-100">${cpuCount}</div>
|
| 134 |
+
</div>
|
| 135 |
+
<div class="rounded-xl bg-white/5 border border-white/10 px-4 py-3">
|
| 136 |
+
<div class="text-[11px] uppercase tracking-[0.18em] text-slate-400">Real-time Factor</div>
|
| 137 |
+
<div class="text-lg font-semibold ${factorColor}">${realTimeFactor.toFixed(2)}x</div>
|
| 138 |
+
</div>
|
| 139 |
+
<div class="rounded-xl bg-white/5 border border-white/10 px-4 py-3">
|
| 140 |
+
<div class="text-[11px] uppercase tracking-[0.18em] text-slate-400">Session Duration</div>
|
| 141 |
+
<div class="text-lg font-semibold text-slate-100">${audioSessionDuration.toFixed(1)}s</div>
|
| 142 |
+
</div>
|
| 143 |
+
</div>
|
| 144 |
+
`;
|
| 145 |
} else if (!sessionStartTime) {
|
| 146 |
+
diagnostics.innerHTML = `
|
| 147 |
+
<div class="grid grid-cols-1 sm:grid-cols-3 gap-3">
|
| 148 |
+
<div class="rounded-xl bg-white/5 border border-white/10 px-4 py-3">
|
| 149 |
+
<div class="text-[11px] uppercase tracking-[0.18em] text-slate-400">CPU Threads</div>
|
| 150 |
+
<div class="text-lg font-semibold text-slate-100">${cpuCount}</div>
|
| 151 |
+
</div>
|
| 152 |
+
<div class="rounded-xl bg-white/5 border border-white/10 px-4 py-3">
|
| 153 |
+
<div class="text-[11px] uppercase tracking-[0.18em] text-slate-400">Real-time Factor</div>
|
| 154 |
+
<div class="text-lg font-semibold text-slate-400">0.00x</div>
|
| 155 |
+
</div>
|
| 156 |
+
<div class="rounded-xl bg-white/5 border border-white/10 px-4 py-3">
|
| 157 |
+
<div class="text-[11px] uppercase tracking-[0.18em] text-slate-400">Session Duration</div>
|
| 158 |
+
<div class="text-lg font-semibold text-slate-400">0.0s</div>
|
| 159 |
+
</div>
|
| 160 |
+
</div>
|
| 161 |
+
`;
|
| 162 |
+
}
|
| 163 |
}
|
| 164 |
|
| 165 |
+
window.addEventListener("load", updateDiagnostics);
|
| 166 |
setInterval(updateDiagnostics, 200);
|
| 167 |
|
| 168 |
function initializeModel() {
|
| 169 |
if (modelInitialized) return;
|
| 170 |
+
|
| 171 |
const button = document.querySelector("#speech-button");
|
| 172 |
button.disabled = true;
|
| 173 |
+
button.className =
|
| 174 |
+
"inline-flex items-center justify-center gap-2 rounded-2xl bg-slate-700/70 border border-slate-500/20 px-5 py-3 text-slate-400 font-semibold cursor-not-allowed shadow-lg";
|
| 175 |
+
|
| 176 |
moshiWorker.postMessage({
|
| 177 |
command: "initialize",
|
| 178 |
weightsURL: WEIGHTS_URL,
|
|
|
|
| 183 |
});
|
| 184 |
}
|
| 185 |
|
|
|
|
| 186 |
moshiWorker.addEventListener("message", async (event) => {
|
| 187 |
const data = event.data;
|
| 188 |
if (data.status === "model_ready") {
|
| 189 |
modelInitialized = true;
|
| 190 |
+
updateStatusDiv("Model loaded • Eburon ASR is ready");
|
| 191 |
+
|
| 192 |
const button = document.querySelector("#speech-button");
|
| 193 |
button.disabled = false;
|
| 194 |
+
button.className =
|
| 195 |
+
"inline-flex items-center justify-center gap-2 rounded-2xl bg-gradient-to-r from-sky-500 via-indigo-500 to-violet-500 hover:opacity-95 px-5 py-3 text-white font-semibold shadow-[0_10px_30px_rgba(56,189,248,0.22)] transition";
|
| 196 |
+
|
| 197 |
if (pendingStart) {
|
| 198 |
pendingStart = false;
|
| 199 |
await startRecording();
|
| 200 |
}
|
| 201 |
} else if (data.status === "streaming") {
|
|
|
|
| 202 |
const outputDiv = document.querySelector("#output-generation");
|
| 203 |
const placeholder = document.querySelector("#output-placeholder");
|
| 204 |
+
|
| 205 |
if (placeholder) placeholder.hidden = true;
|
| 206 |
+
|
| 207 |
if (outputDiv.textContent) {
|
| 208 |
outputDiv.textContent += " " + data.word;
|
| 209 |
} else {
|
|
|
|
| 223 |
function updateStatus(data) {
|
| 224 |
const { status, message, word } = data;
|
| 225 |
const outputDiv = document.querySelector("#output-generation");
|
| 226 |
+
|
| 227 |
if (status === "loading" || status === "decoding") {
|
| 228 |
updateStatusDiv(message || (status === "loading" ? "Loading..." : "Decoding..."));
|
| 229 |
} else if (status === "streaming") {
|
|
|
|
| 230 |
if (outputDiv.textContent) {
|
| 231 |
outputDiv.textContent += " " + word;
|
| 232 |
} else {
|
|
|
|
| 242 |
try {
|
| 243 |
audioStream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
| 244 |
updateStatusDiv("Microphone access granted");
|
| 245 |
+
|
| 246 |
audioContext = new AudioContext({ sampleRate: 24000 });
|
| 247 |
source = audioContext.createMediaStreamSource(audioStream);
|
| 248 |
+
|
| 249 |
processor = audioContext.createScriptProcessor(1024, 1, 1);
|
| 250 |
+
|
| 251 |
+
processor.onaudioprocess = function (event) {
|
| 252 |
if (!isRecording || !modelInitialized) return;
|
| 253 |
+
|
| 254 |
const inputBuffer = event.inputBuffer;
|
| 255 |
const inputData = inputBuffer.getChannelData(0);
|
| 256 |
+
|
|
|
|
| 257 |
const audioChunk = new Float32Array(inputData);
|
| 258 |
+
moshiWorker.postMessage(
|
| 259 |
+
{
|
| 260 |
+
command: "process_audio",
|
| 261 |
+
audioData: audioChunk,
|
| 262 |
+
},
|
| 263 |
+
[audioChunk.buffer]
|
| 264 |
+
);
|
| 265 |
};
|
| 266 |
+
|
| 267 |
source.connect(processor);
|
| 268 |
processor.connect(audioContext.destination);
|
|
|
|
| 269 |
} catch (error) {
|
| 270 |
updateStatusDiv("Microphone access denied: " + error.message);
|
| 271 |
throw error;
|
| 272 |
}
|
| 273 |
}
|
| 274 |
+
|
| 275 |
function stopMicrophone() {
|
|
|
|
| 276 |
if (processor) {
|
| 277 |
processor.disconnect();
|
| 278 |
processor = null;
|
|
|
|
| 285 |
audioContext.close();
|
| 286 |
audioContext = null;
|
| 287 |
}
|
| 288 |
+
|
|
|
|
| 289 |
if (audioStream) {
|
| 290 |
+
audioStream.getTracks().forEach((track) => track.stop());
|
| 291 |
audioStream = null;
|
| 292 |
}
|
| 293 |
+
|
| 294 |
updateStatusDiv("Microphone stopped");
|
| 295 |
}
|
| 296 |
|
| 297 |
async function startRecording() {
|
| 298 |
const button = document.querySelector("#speech-button");
|
| 299 |
+
|
| 300 |
try {
|
| 301 |
updateStatusDiv("Requesting microphone access...");
|
| 302 |
await startMicrophone();
|
| 303 |
+
|
|
|
|
| 304 |
audioChunksProcessed = 0;
|
| 305 |
sessionStartTime = Date.now();
|
| 306 |
+
|
|
|
|
| 307 |
moshiWorker.postMessage({ command: "start_stream" });
|
| 308 |
+
|
| 309 |
isRecording = true;
|
| 310 |
+
button.textContent = "Stop Capture";
|
| 311 |
+
button.className =
|
| 312 |
+
"inline-flex items-center justify-center gap-2 rounded-2xl bg-gradient-to-r from-rose-500 to-red-600 hover:opacity-95 px-5 py-3 text-white font-semibold shadow-[0_10px_30px_rgba(244,63,94,0.22)] transition";
|
| 313 |
updateStatusDiv("Listening...");
|
| 314 |
+
|
|
|
|
| 315 |
document.querySelector("#output-generation").textContent = "";
|
| 316 |
document.querySelector("#output-generation").hidden = true;
|
| 317 |
document.querySelector("#output-placeholder").hidden = true;
|
|
|
|
| 318 |
} catch (error) {
|
| 319 |
+
console.error("Error starting microphone:", error);
|
| 320 |
updateStatusDiv("Error: " + error.message);
|
| 321 |
pendingStart = false;
|
| 322 |
}
|
|
|
|
| 324 |
|
| 325 |
document.querySelector("#speech-button").addEventListener("click", async () => {
|
| 326 |
const button = document.querySelector("#speech-button");
|
| 327 |
+
|
| 328 |
if (!isRecording) {
|
|
|
|
| 329 |
if (!modelInitialized) {
|
| 330 |
pendingStart = true;
|
| 331 |
initializeModel();
|
| 332 |
return;
|
| 333 |
}
|
| 334 |
+
|
| 335 |
await startRecording();
|
| 336 |
} else {
|
| 337 |
stopMicrophone();
|
| 338 |
+
|
|
|
|
| 339 |
moshiWorker.postMessage({ command: "stop_stream" });
|
| 340 |
+
|
| 341 |
isRecording = false;
|
| 342 |
+
button.textContent = "Start Capture";
|
| 343 |
+
button.className =
|
| 344 |
+
"inline-flex items-center justify-center gap-2 rounded-2xl bg-gradient-to-r from-sky-500 via-indigo-500 to-violet-500 hover:opacity-95 px-5 py-3 text-white font-semibold shadow-[0_10px_30px_rgba(56,189,248,0.22)] transition";
|
| 345 |
updateStatusDiv("Ready to start");
|
| 346 |
}
|
| 347 |
});
|
| 348 |
</script>
|
| 349 |
</head>
|
| 350 |
+
|
| 351 |
+
<body class="px-4 py-6 sm:px-6 lg:px-8">
|
| 352 |
+
<main class="mx-auto max-w-6xl">
|
| 353 |
+
<section class="glass brand-glow rounded-3xl p-6 sm:p-8 lg:p-10 overflow-hidden relative">
|
| 354 |
+
<div class="absolute inset-0 pointer-events-none">
|
| 355 |
+
<div class="absolute -top-24 -right-16 h-56 w-56 rounded-full bg-sky-400/10 blur-3xl"></div>
|
| 356 |
+
<div class="absolute -bottom-24 -left-12 h-64 w-64 rounded-full bg-violet-400/10 blur-3xl"></div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 357 |
</div>
|
|
|
|
| 358 |
|
| 359 |
+
<div class="relative z-10 grid grid-cols-1 lg:grid-cols-[1.25fr_0.75fr] gap-8 items-start">
|
| 360 |
+
<div>
|
| 361 |
+
<div class="inline-flex items-center gap-3 rounded-full border border-white/10 bg-white/5 px-4 py-2 text-sm text-slate-200">
|
| 362 |
+
<span class="h-2.5 w-2.5 rounded-full brand-gradient"></span>
|
| 363 |
+
<span class="font-semibold tracking-wide">EBURON AI</span>
|
| 364 |
+
<span class="text-slate-400">•</span>
|
| 365 |
+
<span class="text-slate-300">ASR Interface</span>
|
| 366 |
+
</div>
|
| 367 |
+
|
| 368 |
+
<h1 class="mt-6 text-4xl sm:text-5xl font-extrabold leading-tight text-brand">
|
| 369 |
+
Eburon ASR
|
| 370 |
+
</h1>
|
| 371 |
+
|
| 372 |
+
<p class="mt-4 max-w-3xl text-base sm:text-lg text-slate-300 leading-relaxed">
|
| 373 |
+
Real-time browser-based speech recognition with an Eburon-branded interface.
|
| 374 |
+
This experience runs locally in your browser after the model download and keeps
|
| 375 |
+
the underlying recognition pipeline unchanged.
|
| 376 |
+
</p>
|
| 377 |
+
|
| 378 |
+
<div class="mt-6 flex flex-wrap gap-3 text-sm">
|
| 379 |
+
<span class="rounded-full border border-sky-400/20 bg-sky-400/10 px-4 py-2 text-sky-200">
|
| 380 |
+
Streaming Transcription
|
| 381 |
+
</span>
|
| 382 |
+
<span class="rounded-full border border-violet-400/20 bg-violet-400/10 px-4 py-2 text-violet-200">
|
| 383 |
+
Browser Runtime
|
| 384 |
+
</span>
|
| 385 |
+
<span class="rounded-full border border-emerald-400/20 bg-emerald-400/10 px-4 py-2 text-emerald-200">
|
| 386 |
+
Offline After Download
|
| 387 |
+
</span>
|
| 388 |
+
</div>
|
| 389 |
+
</div>
|
| 390 |
+
|
| 391 |
+
<div class="glass rounded-3xl p-5 border border-white/10">
|
| 392 |
+
<div class="text-xs uppercase tracking-[0.22em] text-slate-400">System Overview</div>
|
| 393 |
+
<div class="mt-4 space-y-4">
|
| 394 |
+
<div class="rounded-2xl bg-white/5 border border-white/10 p-4">
|
| 395 |
+
<div class="text-sm text-slate-400">Engine</div>
|
| 396 |
+
<div class="mt-1 text-lg font-semibold text-slate-100">via WASM Runtime</div>
|
| 397 |
+
</div>
|
| 398 |
+
<div class="rounded-2xl bg-white/5 border border-white/10 p-4">
|
| 399 |
+
<div class="text-sm text-slate-400">Execution Mode</div>
|
| 400 |
+
<div class="mt-1 text-lg font-semibold text-slate-100">On-device CPU Processing</div>
|
| 401 |
+
</div>
|
| 402 |
+
<div class="rounded-2xl bg-white/5 border border-white/10 p-4">
|
| 403 |
+
<div class="text-sm text-slate-400">Brand Layer</div>
|
| 404 |
+
<div class="mt-1 text-lg font-semibold text-slate-100">Eburon ASR UI</div>
|
| 405 |
+
</div>
|
| 406 |
+
</div>
|
| 407 |
+
</div>
|
| 408 |
</div>
|
| 409 |
+
</section>
|
| 410 |
+
|
| 411 |
+
<section class="mt-8 grid grid-cols-1 xl:grid-cols-[0.9fr_1.1fr] gap-8">
|
| 412 |
+
<div class="space-y-8">
|
| 413 |
+
<div class="glass rounded-3xl p-6">
|
| 414 |
+
<div class="flex items-center justify-between gap-4 flex-wrap">
|
| 415 |
+
<div>
|
| 416 |
+
<div class="text-xs uppercase tracking-[0.22em] text-slate-400">Capture Control</div>
|
| 417 |
+
<h2 class="mt-2 text-2xl font-bold text-slate-100">Live Microphone Session</h2>
|
| 418 |
+
<p class="mt-2 text-slate-400">
|
| 419 |
+
Start or stop live speech capture without changing the recognition model.
|
| 420 |
+
</p>
|
| 421 |
+
</div>
|
| 422 |
+
|
| 423 |
+
<button
|
| 424 |
+
id="speech-button"
|
| 425 |
+
class="inline-flex items-center justify-center gap-2 rounded-2xl bg-gradient-to-r from-sky-500 via-indigo-500 to-violet-500 hover:opacity-95 px-5 py-3 text-white font-semibold shadow-[0_10px_30px_rgba(56,189,248,0.22)] transition"
|
| 426 |
+
>
|
| 427 |
+
Start Capture
|
| 428 |
+
</button>
|
| 429 |
+
</div>
|
| 430 |
|
| 431 |
+
<div class="mt-6 rounded-2xl border border-white/10 bg-white/5 p-4">
|
| 432 |
+
<div class="flex items-center gap-3 text-sm text-slate-300">
|
| 433 |
+
<span id="live-indicator" class="status-dot bg-sky-400"></span>
|
| 434 |
+
<span class="font-semibold">System Status</span>
|
| 435 |
+
</div>
|
| 436 |
+
<div class="mt-2 text-slate-200">
|
| 437 |
+
<span id="status-div">Click "Start Capture" to begin</span>
|
| 438 |
+
</div>
|
| 439 |
+
</div>
|
| 440 |
+
</div>
|
| 441 |
+
|
| 442 |
+
<div class="glass rounded-3xl p-6">
|
| 443 |
+
<div class="text-xs uppercase tracking-[0.22em] text-slate-400">Performance Telemetry</div>
|
| 444 |
+
<h3 class="mt-2 text-2xl font-bold text-slate-100">Runtime Diagnostics</h3>
|
| 445 |
+
<p class="mt-2 text-slate-400">
|
| 446 |
+
Monitoring interface performance and live throughput during recognition.
|
| 447 |
+
</p>
|
| 448 |
+
<div id="diagnostics" class="mt-5"></div>
|
| 449 |
+
</div>
|
| 450 |
+
</div>
|
| 451 |
+
|
| 452 |
+
<div class="glass rounded-3xl p-6">
|
| 453 |
+
<div class="flex items-center justify-between gap-4 flex-wrap">
|
| 454 |
+
<div>
|
| 455 |
+
<div class="text-xs uppercase tracking-[0.22em] text-slate-400">Recognition Output</div>
|
| 456 |
+
<h3 class="mt-2 text-2xl font-bold text-slate-100">Live Transcription</h3>
|
| 457 |
+
</div>
|
| 458 |
+
<div class="rounded-full border border-white/10 bg-white/5 px-4 py-2 text-sm text-slate-300">
|
| 459 |
+
Eburon ASR Stream
|
| 460 |
+
</div>
|
| 461 |
+
</div>
|
| 462 |
+
|
| 463 |
+
<div class="mt-5 min-h-[320px] rounded-3xl border border-white/10 bg-[#09101c] p-5 sm:p-6 text-slate-200 shadow-inner overflow-auto scroll-soft">
|
| 464 |
+
<p id="output-generation" class="whitespace-pre-wrap leading-8 text-lg" hidden></p>
|
| 465 |
+
<span id="output-placeholder" class="text-slate-500">
|
| 466 |
+
Your live transcript will appear here once capture begins.
|
| 467 |
+
</span>
|
| 468 |
+
</div>
|
| 469 |
+
|
| 470 |
+
<div class="mt-5 grid grid-cols-1 md:grid-cols-2 gap-4">
|
| 471 |
+
<div class="rounded-2xl bg-white/5 border border-white/10 p-4">
|
| 472 |
+
<div class="text-sm font-semibold text-slate-200">Scope</div>
|
| 473 |
+
<p class="mt-2 text-sm text-slate-400">
|
| 474 |
+
Interface only. Recognition model, URLs, worker, and audio pipeline remain unchanged.
|
| 475 |
+
</p>
|
| 476 |
+
</div>
|
| 477 |
+
<div class="rounded-2xl bg-white/5 border border-white/10 p-4">
|
| 478 |
+
<div class="text-sm font-semibold text-slate-200">Deployment Feel</div>
|
| 479 |
+
<p class="mt-2 text-sm text-slate-400">
|
| 480 |
+
Polished dark dashboard suitable for Eburon demos, client previews, or product shells.
|
| 481 |
+
</p>
|
| 482 |
+
</div>
|
| 483 |
+
</div>
|
| 484 |
+
</div>
|
| 485 |
+
</section>
|
| 486 |
+
|
| 487 |
+
<footer class="mt-8">
|
| 488 |
+
<div class="glass rounded-3xl p-5 flex flex-col md:flex-row md:items-center md:justify-between gap-4">
|
| 489 |
+
<div>
|
| 490 |
+
<div class="text-sm font-semibold text-slate-200">Eburon ASR Interface</div>
|
| 491 |
+
<p class="mt-1 text-sm text-slate-400">
|
| 492 |
+
Premium branding layer for browser-based streaming speech recognition.
|
| 493 |
+
</p>
|
| 494 |
+
</div>
|
| 495 |
+
<div class="text-xs uppercase tracking-[0.18em] text-slate-500">
|
| 496 |
+
Eburon AI • Speech Systems UI
|
| 497 |
+
</div>
|
| 498 |
+
</div>
|
| 499 |
+
</footer>
|
| 500 |
</main>
|
| 501 |
</body>
|
| 502 |
</html>
|