Manjunath Kudlur
commited on
Commit
·
23932e3
1
Parent(s):
bb3b9a0
Timeline prettify
Browse files- decoder_worker.js +79 -4
- encoder_worker.js +22 -1
- index.html +2 -2
- streaming_asr.js +121 -9
decoder_worker.js
CHANGED
|
@@ -170,6 +170,12 @@ let tokenizer = null;
|
|
| 170 |
let accumulatedFeatures = null;
|
| 171 |
let currentSegmentId = null;
|
| 172 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
class MoonshineTokenizer {
|
| 174 |
constructor() {
|
| 175 |
this.decoder = null;
|
|
@@ -320,7 +326,11 @@ async function decodeAccumulated() {
|
|
| 320 |
}
|
| 321 |
}
|
| 322 |
|
| 323 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 324 |
const { type, data } = e.data;
|
| 325 |
|
| 326 |
switch (type) {
|
|
@@ -376,6 +386,9 @@ self.onmessage = async function(e) {
|
|
| 376 |
case 'segment_start': {
|
| 377 |
accumulatedFeatures = null;
|
| 378 |
currentSegmentId = data.segmentId;
|
|
|
|
|
|
|
|
|
|
| 379 |
self.postMessage({ type: 'live_caption', text: '' });
|
| 380 |
break;
|
| 381 |
}
|
|
@@ -383,7 +396,15 @@ self.onmessage = async function(e) {
|
|
| 383 |
case 'segment_end': {
|
| 384 |
if (data.segmentId !== currentSegmentId) break;
|
| 385 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 386 |
const text = await decodeAccumulated();
|
|
|
|
|
|
|
| 387 |
self.postMessage({
|
| 388 |
type: 'transcript',
|
| 389 |
segmentId: data.segmentId,
|
|
@@ -435,10 +456,64 @@ self.onmessage = async function(e) {
|
|
| 435 |
}
|
| 436 |
}
|
| 437 |
|
| 438 |
-
// Live caption
|
| 439 |
-
const
|
| 440 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 441 |
break;
|
| 442 |
}
|
| 443 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 444 |
};
|
|
|
|
| 170 |
let accumulatedFeatures = null;
|
| 171 |
let currentSegmentId = null;
|
| 172 |
|
| 173 |
+
// Live caption throttling to prevent pipeline backup
|
| 174 |
+
let isDecoding = false;
|
| 175 |
+
let lastDecodeTime = 0;
|
| 176 |
+
let pendingDecode = false;
|
| 177 |
+
const MIN_DECODE_INTERVAL_MS = 500; // Don't decode more often than every 500ms for live captions
|
| 178 |
+
|
| 179 |
class MoonshineTokenizer {
|
| 180 |
constructor() {
|
| 181 |
this.decoder = null;
|
|
|
|
| 326 |
}
|
| 327 |
}
|
| 328 |
|
| 329 |
+
// Message queue for sequential processing
|
| 330 |
+
const messageQueue = [];
|
| 331 |
+
let isProcessingQueue = false;
|
| 332 |
+
|
| 333 |
+
async function processMessage(e) {
|
| 334 |
const { type, data } = e.data;
|
| 335 |
|
| 336 |
switch (type) {
|
|
|
|
| 386 |
case 'segment_start': {
|
| 387 |
accumulatedFeatures = null;
|
| 388 |
currentSegmentId = data.segmentId;
|
| 389 |
+
isDecoding = false;
|
| 390 |
+
lastDecodeTime = 0;
|
| 391 |
+
pendingDecode = false;
|
| 392 |
self.postMessage({ type: 'live_caption', text: '' });
|
| 393 |
break;
|
| 394 |
}
|
|
|
|
| 396 |
case 'segment_end': {
|
| 397 |
if (data.segmentId !== currentSegmentId) break;
|
| 398 |
|
| 399 |
+
// Wait for any in-progress decode to finish before final decode
|
| 400 |
+
while (isDecoding) {
|
| 401 |
+
await new Promise(resolve => setTimeout(resolve, 50));
|
| 402 |
+
}
|
| 403 |
+
|
| 404 |
+
isDecoding = true;
|
| 405 |
const text = await decodeAccumulated();
|
| 406 |
+
isDecoding = false;
|
| 407 |
+
|
| 408 |
self.postMessage({
|
| 409 |
type: 'transcript',
|
| 410 |
segmentId: data.segmentId,
|
|
|
|
| 456 |
}
|
| 457 |
}
|
| 458 |
|
| 459 |
+
// Live caption with throttling to prevent pipeline backup
|
| 460 |
+
const now = Date.now();
|
| 461 |
+
const timeSinceLastDecode = now - lastDecodeTime;
|
| 462 |
+
|
| 463 |
+
if (isDecoding) {
|
| 464 |
+
// Already decoding, mark that we need another decode when done
|
| 465 |
+
pendingDecode = true;
|
| 466 |
+
} else if (timeSinceLastDecode >= MIN_DECODE_INTERVAL_MS) {
|
| 467 |
+
// Enough time has passed, decode now
|
| 468 |
+
isDecoding = true;
|
| 469 |
+
lastDecodeTime = now;
|
| 470 |
+
|
| 471 |
+
try {
|
| 472 |
+
const partialText = await decodeAccumulated();
|
| 473 |
+
self.postMessage({ type: 'live_caption', text: partialText });
|
| 474 |
+
} finally {
|
| 475 |
+
isDecoding = false;
|
| 476 |
+
|
| 477 |
+
// If there was a pending decode request, schedule it
|
| 478 |
+
if (pendingDecode) {
|
| 479 |
+
pendingDecode = false;
|
| 480 |
+
// Use setTimeout to avoid blocking - decode will happen on next message or timeout
|
| 481 |
+
setTimeout(async () => {
|
| 482 |
+
if (!isDecoding && currentSegmentId !== null) {
|
| 483 |
+
isDecoding = true;
|
| 484 |
+
lastDecodeTime = Date.now();
|
| 485 |
+
try {
|
| 486 |
+
const text = await decodeAccumulated();
|
| 487 |
+
self.postMessage({ type: 'live_caption', text: text });
|
| 488 |
+
} finally {
|
| 489 |
+
isDecoding = false;
|
| 490 |
+
}
|
| 491 |
+
}
|
| 492 |
+
}, MIN_DECODE_INTERVAL_MS);
|
| 493 |
+
}
|
| 494 |
+
}
|
| 495 |
+
} else {
|
| 496 |
+
// Too soon since last decode, mark pending
|
| 497 |
+
pendingDecode = true;
|
| 498 |
+
}
|
| 499 |
break;
|
| 500 |
}
|
| 501 |
}
|
| 502 |
+
}
|
| 503 |
+
|
| 504 |
+
async function processQueue() {
|
| 505 |
+
if (isProcessingQueue) return;
|
| 506 |
+
isProcessingQueue = true;
|
| 507 |
+
|
| 508 |
+
while (messageQueue.length > 0) {
|
| 509 |
+
const msg = messageQueue.shift();
|
| 510 |
+
await processMessage(msg);
|
| 511 |
+
}
|
| 512 |
+
|
| 513 |
+
isProcessingQueue = false;
|
| 514 |
+
}
|
| 515 |
+
|
| 516 |
+
self.onmessage = function(e) {
|
| 517 |
+
messageQueue.push(e);
|
| 518 |
+
processQueue();
|
| 519 |
};
|
encoder_worker.js
CHANGED
|
@@ -228,7 +228,11 @@ async function processEncoder(melData, melDims, flush = true) {
|
|
| 228 |
return { data: resultData, dims: [1, newOutputCount, encDim] };
|
| 229 |
}
|
| 230 |
|
| 231 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
const { type, data } = e.data;
|
| 233 |
|
| 234 |
switch (type) {
|
|
@@ -320,4 +324,21 @@ self.onmessage = async function(e) {
|
|
| 320 |
break;
|
| 321 |
}
|
| 322 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
};
|
|
|
|
| 228 |
return { data: resultData, dims: [1, newOutputCount, encDim] };
|
| 229 |
}
|
| 230 |
|
| 231 |
+
// Message queue for sequential processing
|
| 232 |
+
const messageQueue = [];
|
| 233 |
+
let isProcessing = false;
|
| 234 |
+
|
| 235 |
+
async function processMessage(e) {
|
| 236 |
const { type, data } = e.data;
|
| 237 |
|
| 238 |
switch (type) {
|
|
|
|
| 324 |
break;
|
| 325 |
}
|
| 326 |
}
|
| 327 |
+
}
|
| 328 |
+
|
| 329 |
+
async function processQueue() {
|
| 330 |
+
if (isProcessing) return;
|
| 331 |
+
isProcessing = true;
|
| 332 |
+
|
| 333 |
+
while (messageQueue.length > 0) {
|
| 334 |
+
const msg = messageQueue.shift();
|
| 335 |
+
await processMessage(msg);
|
| 336 |
+
}
|
| 337 |
+
|
| 338 |
+
isProcessing = false;
|
| 339 |
+
}
|
| 340 |
+
|
| 341 |
+
self.onmessage = function(e) {
|
| 342 |
+
messageQueue.push(e);
|
| 343 |
+
processQueue();
|
| 344 |
};
|
index.html
CHANGED
|
@@ -115,7 +115,7 @@
|
|
| 115 |
background: #0f0f23;
|
| 116 |
border-radius: 5px;
|
| 117 |
padding: 10px;
|
| 118 |
-
height:
|
| 119 |
position: relative;
|
| 120 |
overflow: hidden;
|
| 121 |
}
|
|
@@ -531,7 +531,7 @@
|
|
| 531 |
}
|
| 532 |
|
| 533 |
.vad-graph {
|
| 534 |
-
height:
|
| 535 |
}
|
| 536 |
|
| 537 |
.pipeline-status {
|
|
|
|
| 115 |
background: #0f0f23;
|
| 116 |
border-radius: 5px;
|
| 117 |
padding: 10px;
|
| 118 |
+
height: 140px;
|
| 119 |
position: relative;
|
| 120 |
overflow: hidden;
|
| 121 |
}
|
|
|
|
| 531 |
}
|
| 532 |
|
| 533 |
.vad-graph {
|
| 534 |
+
height: 100px;
|
| 535 |
}
|
| 536 |
|
| 537 |
.pipeline-status {
|
streaming_asr.js
CHANGED
|
@@ -19,6 +19,8 @@ const ENCODER_BATCH_SAMPLES = 5120; // 320ms - batch size for encoder
|
|
| 19 |
const PRE_BUFFER_CHUNKS = 25; // ~500ms at 20ms chunks - capture more audio before onset
|
| 20 |
const POST_BUFFER_CHUNKS = 5; // ~100ms at 20ms chunks
|
| 21 |
const MIN_SEGMENT_DURATION_MS = 2000; // Minimum 2 seconds before allowing segment end
|
|
|
|
|
|
|
| 22 |
const OFFSET_CHUNKS_REQUIRED = 10; // ~100ms of silence needed to end segment
|
| 23 |
|
| 24 |
const MODEL_CONFIGS = {
|
|
@@ -225,6 +227,8 @@ class PipelinedStreamingASR {
|
|
| 225 |
this.vadHistory = [];
|
| 226 |
this.vadUpdateCounter = 0;
|
| 227 |
this.vadUpdateInterval = 5; // Update display every 5 VAD chunks (50ms)
|
|
|
|
|
|
|
| 228 |
|
| 229 |
// Callbacks
|
| 230 |
this.onVadUpdate = null;
|
|
@@ -504,9 +508,26 @@ class PipelinedStreamingASR {
|
|
| 504 |
this.vadUpdateCounter++;
|
| 505 |
if (this.vadUpdateCounter >= this.vadUpdateInterval) {
|
| 506 |
this.vadUpdateCounter = 0;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 507 |
this.vadHistory.push(this.emaProb);
|
| 508 |
-
if (this.vadHistory.length > 100)
|
| 509 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 510 |
}
|
| 511 |
|
| 512 |
this.updateSegmentState();
|
|
@@ -572,11 +593,25 @@ class PipelinedStreamingASR {
|
|
| 572 |
this.onsetCounter = 0;
|
| 573 |
}
|
| 574 |
} else if (this.state === 'speech') {
|
| 575 |
-
// Check if minimum segment duration has passed
|
| 576 |
const segmentDuration = Date.now() - this.segmentStartTime;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 577 |
const minDurationMet = segmentDuration >= MIN_SEGMENT_DURATION_MS;
|
| 578 |
|
| 579 |
-
if (this.emaProb <
|
| 580 |
this.offsetCounter++;
|
| 581 |
// Only end segment if minimum duration met AND enough silence chunks
|
| 582 |
if (minDurationMet && this.offsetCounter >= OFFSET_CHUNKS_REQUIRED) {
|
|
@@ -589,6 +624,12 @@ class PipelinedStreamingASR {
|
|
| 589 |
}
|
| 590 |
|
| 591 |
startSegment() {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 592 |
this.currentSegmentId++;
|
| 593 |
this.state = 'speech';
|
| 594 |
this.onsetCounter = 0;
|
|
@@ -596,6 +637,9 @@ class PipelinedStreamingASR {
|
|
| 596 |
this.segmentStartTime = Date.now();
|
| 597 |
this.encoderBatchBuffer = []; // Reset batch buffer for new segment
|
| 598 |
|
|
|
|
|
|
|
|
|
|
| 599 |
// Tell encoder to start new segment
|
| 600 |
this.encoderWorker?.postMessage({
|
| 601 |
type: 'segment_start',
|
|
@@ -616,6 +660,9 @@ class PipelinedStreamingASR {
|
|
| 616 |
this.offsetCounter = 0;
|
| 617 |
this.postBufferRemaining = POST_BUFFER_CHUNKS;
|
| 618 |
|
|
|
|
|
|
|
|
|
|
| 619 |
if (this.postBufferRemaining === 0) {
|
| 620 |
this.finalizeSegmentEnd();
|
| 621 |
}
|
|
@@ -741,7 +788,7 @@ class ASRDemoUI {
|
|
| 741 |
|
| 742 |
this.asr = new PipelinedStreamingASR(config);
|
| 743 |
|
| 744 |
-
this.asr.onVadUpdate = (prob, history) => this.updateVadDisplay(prob, history);
|
| 745 |
this.asr.onTranscript = (text, segmentId) => this.addTranscript(text, segmentId);
|
| 746 |
this.asr.onLiveCaption = (text) => this.updateLiveCaption(text);
|
| 747 |
this.asr.onStatusUpdate = (status, text) => this.updateStatus(status, text);
|
|
@@ -781,7 +828,7 @@ class ASRDemoUI {
|
|
| 781 |
this.updateStatus('idle', 'Ready');
|
| 782 |
}
|
| 783 |
|
| 784 |
-
updateVadDisplay(prob, history) {
|
| 785 |
this.vadBarFill.style.width = `${prob * 100}%`;
|
| 786 |
this.vadValue.textContent = `${Math.round(prob * 100)}%`;
|
| 787 |
|
|
@@ -790,15 +837,79 @@ class ASRDemoUI {
|
|
| 790 |
const width = rect.width;
|
| 791 |
const height = rect.height;
|
| 792 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 793 |
ctx.fillStyle = '#0f0f23';
|
| 794 |
ctx.fillRect(0, 0, width, height);
|
| 795 |
|
| 796 |
if (history.length < 2) return;
|
| 797 |
|
| 798 |
-
const
|
| 799 |
-
const
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 800 |
|
| 801 |
ctx.strokeStyle = '#ff444466';
|
|
|
|
| 802 |
ctx.beginPath();
|
| 803 |
ctx.moveTo(0, onsetY);
|
| 804 |
ctx.lineTo(width, onsetY);
|
|
@@ -810,13 +921,14 @@ class ASRDemoUI {
|
|
| 810 |
ctx.lineTo(width, offsetY);
|
| 811 |
ctx.stroke();
|
| 812 |
|
|
|
|
| 813 |
ctx.strokeStyle = '#00d4ff';
|
| 814 |
ctx.lineWidth = 2;
|
| 815 |
ctx.beginPath();
|
| 816 |
|
| 817 |
for (let i = 0; i < history.length; i++) {
|
| 818 |
const x = (i / (history.length - 1)) * width;
|
| 819 |
-
const y =
|
| 820 |
if (i === 0) {
|
| 821 |
ctx.moveTo(x, y);
|
| 822 |
} else {
|
|
|
|
| 19 |
const PRE_BUFFER_CHUNKS = 25; // ~500ms at 20ms chunks - capture more audio before onset
|
| 20 |
const POST_BUFFER_CHUNKS = 5; // ~100ms at 20ms chunks
|
| 21 |
const MIN_SEGMENT_DURATION_MS = 2000; // Minimum 2 seconds before allowing segment end
|
| 22 |
+
const OFFSET_RAMP_START_MS = 6000; // Start ramping offset threshold at 6 seconds
|
| 23 |
+
const OFFSET_RAMP_END_MS = 8000; // Reach max offset threshold (1.0) at 8 seconds
|
| 24 |
const OFFSET_CHUNKS_REQUIRED = 10; // ~100ms of silence needed to end segment
|
| 25 |
|
| 26 |
const MODEL_CONFIGS = {
|
|
|
|
| 227 |
this.vadHistory = [];
|
| 228 |
this.vadUpdateCounter = 0;
|
| 229 |
this.vadUpdateInterval = 5; // Update display every 5 VAD chunks (50ms)
|
| 230 |
+
this.segmentEvents = []; // Track segment start/end for visualization
|
| 231 |
+
this.vadHistoryStartTime = 0; // When the current history window started
|
| 232 |
|
| 233 |
// Callbacks
|
| 234 |
this.onVadUpdate = null;
|
|
|
|
| 508 |
this.vadUpdateCounter++;
|
| 509 |
if (this.vadUpdateCounter >= this.vadUpdateInterval) {
|
| 510 |
this.vadUpdateCounter = 0;
|
| 511 |
+
const now = Date.now();
|
| 512 |
+
|
| 513 |
+
// Initialize history start time
|
| 514 |
+
if (this.vadHistory.length === 0) {
|
| 515 |
+
this.vadHistoryStartTime = now;
|
| 516 |
+
}
|
| 517 |
+
|
| 518 |
this.vadHistory.push(this.emaProb);
|
| 519 |
+
if (this.vadHistory.length > 100) {
|
| 520 |
+
this.vadHistory.shift();
|
| 521 |
+
// Shift the start time by 50ms (one history entry)
|
| 522 |
+
this.vadHistoryStartTime += 50;
|
| 523 |
+
}
|
| 524 |
+
|
| 525 |
+
// Remove old segment events that are outside the history window
|
| 526 |
+
const historyDuration = this.vadHistory.length * 50; // ms
|
| 527 |
+
const historyStart = now - historyDuration;
|
| 528 |
+
this.segmentEvents = this.segmentEvents.filter(e => e.time >= historyStart);
|
| 529 |
+
|
| 530 |
+
this.onVadUpdate?.(this.emaProb, this.vadHistory, this.segmentEvents, this.vadHistoryStartTime);
|
| 531 |
}
|
| 532 |
|
| 533 |
this.updateSegmentState();
|
|
|
|
| 593 |
this.onsetCounter = 0;
|
| 594 |
}
|
| 595 |
} else if (this.state === 'speech') {
|
|
|
|
| 596 |
const segmentDuration = Date.now() - this.segmentStartTime;
|
| 597 |
+
|
| 598 |
+
// Calculate effective offset threshold with gradual ramp
|
| 599 |
+
// Before OFFSET_RAMP_START_MS: use normal offsetThreshold
|
| 600 |
+
// Between OFFSET_RAMP_START_MS and OFFSET_RAMP_END_MS: linearly ramp to 1.0
|
| 601 |
+
// After OFFSET_RAMP_END_MS: use 1.0 (any VAD level will trigger offset)
|
| 602 |
+
let effectiveOffsetThreshold = this.offsetThreshold;
|
| 603 |
+
if (segmentDuration >= OFFSET_RAMP_START_MS) {
|
| 604 |
+
const rampProgress = Math.min(1.0,
|
| 605 |
+
(segmentDuration - OFFSET_RAMP_START_MS) / (OFFSET_RAMP_END_MS - OFFSET_RAMP_START_MS)
|
| 606 |
+
);
|
| 607 |
+
// Lerp from offsetThreshold to 1.0
|
| 608 |
+
effectiveOffsetThreshold = this.offsetThreshold + rampProgress * (1.0 - this.offsetThreshold);
|
| 609 |
+
}
|
| 610 |
+
|
| 611 |
+
// Check if minimum segment duration has passed
|
| 612 |
const minDurationMet = segmentDuration >= MIN_SEGMENT_DURATION_MS;
|
| 613 |
|
| 614 |
+
if (this.emaProb < effectiveOffsetThreshold) {
|
| 615 |
this.offsetCounter++;
|
| 616 |
// Only end segment if minimum duration met AND enough silence chunks
|
| 617 |
if (minDurationMet && this.offsetCounter >= OFFSET_CHUNKS_REQUIRED) {
|
|
|
|
| 624 |
}
|
| 625 |
|
| 626 |
startSegment() {
|
| 627 |
+
// If previous segment wasn't fully finalized, finalize it now
|
| 628 |
+
if (this.postBufferRemaining > 0) {
|
| 629 |
+
this.finalizeSegmentEnd();
|
| 630 |
+
this.postBufferRemaining = 0;
|
| 631 |
+
}
|
| 632 |
+
|
| 633 |
this.currentSegmentId++;
|
| 634 |
this.state = 'speech';
|
| 635 |
this.onsetCounter = 0;
|
|
|
|
| 637 |
this.segmentStartTime = Date.now();
|
| 638 |
this.encoderBatchBuffer = []; // Reset batch buffer for new segment
|
| 639 |
|
| 640 |
+
// Record segment start for visualization
|
| 641 |
+
this.segmentEvents.push({ type: 'start', time: this.segmentStartTime });
|
| 642 |
+
|
| 643 |
// Tell encoder to start new segment
|
| 644 |
this.encoderWorker?.postMessage({
|
| 645 |
type: 'segment_start',
|
|
|
|
| 660 |
this.offsetCounter = 0;
|
| 661 |
this.postBufferRemaining = POST_BUFFER_CHUNKS;
|
| 662 |
|
| 663 |
+
// Record segment end for visualization
|
| 664 |
+
this.segmentEvents.push({ type: 'end', time: Date.now() });
|
| 665 |
+
|
| 666 |
if (this.postBufferRemaining === 0) {
|
| 667 |
this.finalizeSegmentEnd();
|
| 668 |
}
|
|
|
|
| 788 |
|
| 789 |
this.asr = new PipelinedStreamingASR(config);
|
| 790 |
|
| 791 |
+
this.asr.onVadUpdate = (prob, history, segmentEvents, historyStartTime) => this.updateVadDisplay(prob, history, segmentEvents, historyStartTime);
|
| 792 |
this.asr.onTranscript = (text, segmentId) => this.addTranscript(text, segmentId);
|
| 793 |
this.asr.onLiveCaption = (text) => this.updateLiveCaption(text);
|
| 794 |
this.asr.onStatusUpdate = (status, text) => this.updateStatus(status, text);
|
|
|
|
| 828 |
this.updateStatus('idle', 'Ready');
|
| 829 |
}
|
| 830 |
|
| 831 |
+
updateVadDisplay(prob, history, segmentEvents = [], historyStartTime = 0) {
|
| 832 |
this.vadBarFill.style.width = `${prob * 100}%`;
|
| 833 |
this.vadValue.textContent = `${Math.round(prob * 100)}%`;
|
| 834 |
|
|
|
|
| 837 |
const width = rect.width;
|
| 838 |
const height = rect.height;
|
| 839 |
|
| 840 |
+
// Leave space for x-axis labels
|
| 841 |
+
const graphHeight = height - 20;
|
| 842 |
+
const graphTop = 0;
|
| 843 |
+
|
| 844 |
ctx.fillStyle = '#0f0f23';
|
| 845 |
ctx.fillRect(0, 0, width, height);
|
| 846 |
|
| 847 |
if (history.length < 2) return;
|
| 848 |
|
| 849 |
+
const historyDuration = history.length * 50; // ms (each entry is 50ms)
|
| 850 |
+
const now = Date.now();
|
| 851 |
+
|
| 852 |
+
// Draw x-axis ticks (every 0.1 seconds = 100ms)
|
| 853 |
+
ctx.strokeStyle = '#333';
|
| 854 |
+
ctx.fillStyle = '#666';
|
| 855 |
+
ctx.font = '10px monospace';
|
| 856 |
+
ctx.textAlign = 'center';
|
| 857 |
+
ctx.lineWidth = 1;
|
| 858 |
+
|
| 859 |
+
for (let t = 0; t <= historyDuration; t += 100) {
|
| 860 |
+
const x = (t / historyDuration) * width;
|
| 861 |
+
|
| 862 |
+
// Draw tick mark
|
| 863 |
+
ctx.beginPath();
|
| 864 |
+
ctx.moveTo(x, graphHeight);
|
| 865 |
+
ctx.lineTo(x, graphHeight + 5);
|
| 866 |
+
ctx.stroke();
|
| 867 |
+
|
| 868 |
+
// Draw vertical grid line (lighter for minor ticks)
|
| 869 |
+
if (t % 500 === 0) {
|
| 870 |
+
ctx.strokeStyle = '#444';
|
| 871 |
+
} else {
|
| 872 |
+
ctx.strokeStyle = '#222';
|
| 873 |
+
}
|
| 874 |
+
ctx.beginPath();
|
| 875 |
+
ctx.moveTo(x, graphTop);
|
| 876 |
+
ctx.lineTo(x, graphHeight);
|
| 877 |
+
ctx.stroke();
|
| 878 |
+
ctx.strokeStyle = '#333';
|
| 879 |
+
|
| 880 |
+
// Draw label every 0.5 seconds
|
| 881 |
+
if (t % 500 === 0) {
|
| 882 |
+
const seconds = (t / 1000).toFixed(1);
|
| 883 |
+
ctx.fillText(seconds + 's', x, height - 2);
|
| 884 |
+
}
|
| 885 |
+
}
|
| 886 |
+
|
| 887 |
+
// Draw segment events (start = green line, end = red line)
|
| 888 |
+
for (const event of segmentEvents) {
|
| 889 |
+
const eventAge = now - event.time; // ms ago
|
| 890 |
+
const eventPos = historyDuration - eventAge; // position in history
|
| 891 |
+
if (eventPos < 0 || eventPos > historyDuration) continue;
|
| 892 |
+
|
| 893 |
+
const x = (eventPos / historyDuration) * width;
|
| 894 |
+
|
| 895 |
+
ctx.lineWidth = 2;
|
| 896 |
+
if (event.type === 'start') {
|
| 897 |
+
ctx.strokeStyle = '#00ff88'; // Green for start
|
| 898 |
+
} else {
|
| 899 |
+
ctx.strokeStyle = '#ff4444'; // Red for end
|
| 900 |
+
}
|
| 901 |
+
ctx.beginPath();
|
| 902 |
+
ctx.moveTo(x, graphTop);
|
| 903 |
+
ctx.lineTo(x, graphHeight);
|
| 904 |
+
ctx.stroke();
|
| 905 |
+
}
|
| 906 |
+
|
| 907 |
+
// Draw threshold lines
|
| 908 |
+
const onsetY = graphHeight * (1 - parseFloat(this.onsetThreshold.value));
|
| 909 |
+
const offsetY = graphHeight * (1 - parseFloat(this.offsetThreshold.value));
|
| 910 |
|
| 911 |
ctx.strokeStyle = '#ff444466';
|
| 912 |
+
ctx.lineWidth = 1;
|
| 913 |
ctx.beginPath();
|
| 914 |
ctx.moveTo(0, onsetY);
|
| 915 |
ctx.lineTo(width, onsetY);
|
|
|
|
| 921 |
ctx.lineTo(width, offsetY);
|
| 922 |
ctx.stroke();
|
| 923 |
|
| 924 |
+
// Draw VAD probability line
|
| 925 |
ctx.strokeStyle = '#00d4ff';
|
| 926 |
ctx.lineWidth = 2;
|
| 927 |
ctx.beginPath();
|
| 928 |
|
| 929 |
for (let i = 0; i < history.length; i++) {
|
| 930 |
const x = (i / (history.length - 1)) * width;
|
| 931 |
+
const y = graphHeight * (1 - history[i]);
|
| 932 |
if (i === 0) {
|
| 933 |
ctx.moveTo(x, y);
|
| 934 |
} else {
|