Spaces:

KevinAHM
/

soprano-web-onnx

Running

App Files Files Community

KevinAHM commited on Jan 16

Commit

9b19787

0 Parent(s):

Soprano 1.1

Browse files

Files changed (23) hide show

.gitattributes +2 -0
EventEmitter.js +59 -0
JitterBuffer.js +73 -0
LICENSE +201 -0
PCMPlayer.js +162 -0
PCMPlayerWorklet.js +563 -0
README.md +107 -0
config.json +49 -0
generation_config.json +6 -0
index.html +168 -0
index.js +203 -0
inference-worker.js +724 -0
onnx-streaming.js +380 -0
onnx/soprano_backbone_kv_fp16.onnx +3 -0
onnx/soprano_backbone_kv_fp32.onnx +3 -0
onnx/soprano_backbone_kv_int8.onnx +3 -0
onnx/soprano_decoder_fp32.onnx +3 -0
onnx/soprano_decoder_fp32.onnx.data +3 -0
onnx/soprano_decoder_int8.onnx +3 -0
special_tokens_map.json +9 -0
style.css +978 -0
tokenizer.json +0 -0
tokenizer_config.json +0 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ *.onnx filter=lfs diff=lfs merge=lfs -text
2	+ *.onnx.data filter=lfs diff=lfs merge=lfs -text

EventEmitter.js ADDED Viewed

	@@ -0,0 +1,59 @@

+export class EventEmitter {
+  constructor() {
+    this.events = {};
+  }
+  addEventListener(event, listener, options = {}) {
+    if (!this.events[event]) {
+      this.events[event] = [];
+    }
+    const wrappedListener = {
+      callback: listener,
+      once: options.once || false
+    };
+    this.events[event].push(wrappedListener);
+  }
+  removeEventListener(event, listener) {
+    if (!this.events[event]) return;
+    this.events[event] = this.events[event].filter(
+      wrappedListener => wrappedListener.callback !== listener
+    );
+  }
+  dispatchEvent(event) {
+    const eventName = event.type;
+    if (!this.events[eventName]) return;
+    this.events[eventName] = this.events[eventName].filter(wrappedListener => {
+      wrappedListener.callback.call(this, event);
+      return !wrappedListener.once;
+    });
+  }
+  emit(eventName, data) {
+    const event = new CustomEvent(eventName, { detail: data });
+    this.dispatchEvent(event);
+  }
+}
+export class CustomEvent {
+  constructor(type, options = {}) {
+    this.type = type;
+    this.detail = options.detail;
+    this.target = null;
+    this.currentTarget = null;
+    this.defaultPrevented = false;
+    this.bubbles = options.bubbles || false;
+    this.cancelable = options.cancelable || false;
+  }
+  preventDefault() {
+    if (this.cancelable) {
+      this.defaultPrevented = true;
+    }
+  }
+}

JitterBuffer.js ADDED Viewed

	@@ -0,0 +1,73 @@

+import { EventEmitter, CustomEvent } from './EventEmitter.js';
+export class JitterBuffer extends EventEmitter {
+  constructor(maxByteLength, sampleRate = 24000, minBufferBeforePlayback = 0) {
+    super();
+    this.maxByteLength = maxByteLength;
+    this.sampleRate = sampleRate;
+    this.minBufferBeforePlayback = minBufferBeforePlayback;
+    this.buffer = [];
+    this.bytesPerSample = 2; // PCM16 = 2 bytes per sample
+    this.hasStartedPlayback = false;
+  }
+  get byteLength() {
+    return this.buffer.reduce((sum, chunk) => sum + chunk.byteLength, 0);
+  }
+  get durationMs() {
+    const totalSamples = this.byteLength / this.bytesPerSample;
+    return (totalSamples / this.sampleRate) * 1000;
+  }
+  enqueue(data) {
+    if (!(data instanceof Int16Array)) {
+      throw new Error('JitterBuffer expects Int16Array data');
+    }
+    this.buffer.push(data);
+    const currentByteLength = this.byteLength;
+    // Check if we should start playback
+    if (!this.hasStartedPlayback && currentByteLength >= this.minBufferBeforePlayback) {
+      this.hasStartedPlayback = true;
+      this.flush();
+    }
+    // If we've started playback, flush when buffer is full
+    else if (this.hasStartedPlayback && currentByteLength >= this.maxByteLength) {
+      this.flush();
+    }
+  }
+  flush() {
+    if (this.buffer.length === 0) return;
+    const event = new CustomEvent('flush', { detail: this.buffer });
+    this.dispatchEvent(event);
+    this.buffer = [];
+  }
+  clear() {
+    this.buffer = [];
+    this.hasStartedPlayback = false;
+  }
+  forceFlush() {
+    // Force flush remaining buffer when stream ends
+    if (this.buffer.length > 0) {
+      console.log(`Force flushing ${this.byteLength} bytes at stream end`);
+      this.flush();
+    }
+  }
+  getBufferStatus() {
+    return {
+      chunks: this.buffer.length,
+      byteLength: this.byteLength,
+      durationMs: this.durationMs,
+      fillPercentage: (this.byteLength / this.maxByteLength) * 100
+    };
+  }
+}

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

PCMPlayer.js ADDED Viewed

	@@ -0,0 +1,162 @@

+import { EventEmitter, CustomEvent } from './EventEmitter.js';
+export class PCMPlayer extends EventEmitter {
+  constructor(audioContext) {
+    super();
+    this.audioContext = audioContext;
+    this.playbackTime = 0;
+    this.gainNode = this.audioContext.createGain();
+    this.gainNode.connect(this.audioContext.destination);
+    this.analyser = this.audioContext.createAnalyser();
+    this.gainNode.connect(this.analyser);
+    // Default settings
+    this.fadeInDuration = 0.01; // 10ms fade in to avoid clicks
+    this.fadeOutDuration = 0.01; // 10ms fade out
+  }
+  playAudio(data) {
+    if (this.audioContext.state !== 'running') {
+      console.warn(`Audio context is in ${this.audioContext.state} state`);
+      return;
+    }
+    const float32Array = data instanceof Int16Array
+      ? this.pcm16ToFloat32(data)
+      : data;
+    const audioBuffer = this.audioContext.createBuffer(
+      1, // mono
+      float32Array.length,
+      this.audioContext.sampleRate
+    );
+    audioBuffer.copyToChannel(float32Array, 0);
+    const source = this.audioContext.createBufferSource();
+    source.buffer = audioBuffer;
+    // Create a gain node for this source to handle fading
+    const sourceGain = this.audioContext.createGain();
+    source.connect(sourceGain);
+    sourceGain.connect(this.gainNode);
+    const currentTime = this.audioContext.currentTime;
+    if (this.playbackTime < currentTime) {
+      this.playbackTime = currentTime;
+    }
+    // Apply fade in
+    sourceGain.gain.setValueAtTime(0, this.playbackTime);
+    sourceGain.gain.linearRampToValueAtTime(1, this.playbackTime + this.fadeInDuration);
+    // Apply fade out
+    const duration = audioBuffer.duration;
+    const fadeOutTime = this.playbackTime + duration - this.fadeOutDuration;
+    sourceGain.gain.setValueAtTime(1, fadeOutTime);
+    sourceGain.gain.linearRampToValueAtTime(0, this.playbackTime + duration);
+    source.start(this.playbackTime);
+    this.playbackTime += audioBuffer.duration;
+    // Emit audio started event
+    this.emit('audioStarted', {
+      startTime: this.playbackTime,
+      duration: audioBuffer.duration,
+      samples: float32Array.length
+    });
+    // Clean up after playback
+    source.onended = () => {
+      source.disconnect();
+      sourceGain.disconnect();
+      this.emit('audioEnded', {
+        endTime: this.playbackTime
+      });
+    };
+  }
+  pcm16ToFloat32(pcm16) {
+    const float32 = new Float32Array(pcm16.length);
+    for (let i = 0; i < pcm16.length; i++) {
+      float32[i] = pcm16[i] / 32768; // Convert PCM16 to Float32 (-1 to 1 range)
+    }
+    return float32;
+  }
+  get volume() {
+    return this.gainNode.gain.value;
+  }
+  set volume(value) {
+    // Clamp between 0 and 1
+    const clampedValue = Math.max(0, Math.min(1, value));
+    this.gainNode.gain.value = clampedValue;
+    this.emit('volumeChange', { volume: clampedValue });
+  }
+  get volumePercentage() {
+    return this.volume * 100;
+  }
+  set volumePercentage(percentage) {
+    this.volume = percentage / 100;
+  }
+  getAnalyserData() {
+    const bufferLength = this.analyser.frequencyBinCount;
+    const dataArray = new Uint8Array(bufferLength);
+    this.analyser.getByteFrequencyData(dataArray);
+    return dataArray;
+  }
+  getTimeDomainData() {
+    const bufferLength = this.analyser.frequencyBinCount;
+    const dataArray = new Uint8Array(bufferLength);
+    this.analyser.getByteTimeDomainData(dataArray);
+    return dataArray;
+  }
+  reset() {
+    this.playbackTime = 0;
+    // Stop all currently playing sources
+    this.stopAllSources();
+  }
+  stopAllSources() {
+    // Cancel all scheduled audio
+    if (this.audioContext) {
+      // Create a new gain node to effectively "disconnect" all audio
+      const oldGainNode = this.gainNode;
+      this.gainNode = this.audioContext.createGain();
+      this.gainNode.connect(this.audioContext.destination);
+      this.analyser = this.audioContext.createAnalyser();
+      this.gainNode.connect(this.analyser);
+      // Fade out old audio quickly
+      if (oldGainNode) {
+        const now = this.audioContext.currentTime;
+        oldGainNode.gain.setValueAtTime(oldGainNode.gain.value, now);
+        oldGainNode.gain.linearRampToValueAtTime(0, now + 0.05);
+        setTimeout(() => {
+          oldGainNode.disconnect();
+        }, 100);
+      }
+    }
+  }
+  async resume() {
+    if (this.audioContext.state === 'suspended') {
+      await this.audioContext.resume();
+    }
+  }
+  getPlaybackStatus() {
+    return {
+      currentTime: this.audioContext.currentTime,
+      scheduledTime: this.playbackTime,
+      bufferedDuration: Math.max(0, this.playbackTime - this.audioContext.currentTime),
+      state: this.audioContext.state
+    };
+  }
+}

PCMPlayerWorklet.js ADDED Viewed

	@@ -0,0 +1,563 @@

+import { EventEmitter, CustomEvent } from './EventEmitter.js';
+/**
+ * PCMPlayerWorklet - Drop-in replacement for PCMPlayer using AudioWorklet
+ * Uses dynamic buffer management with backpressure for smooth playback
+ */
+export class PCMPlayerWorklet extends EventEmitter {
+  constructor(audioContext, options = {}) {
+    super();
+    this.audioContext = audioContext;
+    this.options = options;
+    this.workletNode = null;
+    this.isInitialized = false;
+    this.playbackTime = 0; // For API compatibility
+    // Audio nodes
+    this.gainNode = this.audioContext.createGain();
+    this.gainNode.connect(this.audioContext.destination);
+    this.analyser = this.audioContext.createAnalyser();
+    this.gainNode.connect(this.analyser);
+    // Queue for chunks waiting to be sent
+    this.pendingChunks = [];
+    this.availableCapacity = 0;
+    this.isWorkletReady = false;
+    this.hasReceivedInitialCapacity = false;
+    // Metrics
+    this.metrics = {
+      chunksPlayed: 0,
+      underruns: 0,
+      bufferLevel: 0,
+      samplesPlayed: 0
+    };
+    // Initialize worklet
+    this.initPromise = this.initialize();
+  }
+  async initialize() {
+    if (this.isInitialized) return;
+    try {
+      // Calculate buffer parameters
+      const sampleRate = this.audioContext.sampleRate;
+      const minBufferMs = this.options.minBufferBeforePlaybackMs || 300;
+      const minBufferSamples = Math.floor(minBufferMs * sampleRate / 1000);
+      // Buffer size: enough for smooth playback but not excessive
+      // Target 60 seconds of buffer to prevent any overflow issues
+      const bufferSizeSamples = sampleRate * 60;
+      // Create the worklet processor code
+      const processorCode = `
+        class PCMProcessor extends AudioWorkletProcessor {
+          constructor() {
+            super();
+            // Ring buffer - sized appropriately
+            this.bufferSize = ${bufferSizeSamples};
+            this.ringBuffer = new Float32Array(this.bufferSize);
+            this.readPos = 0;
+            this.writePos = 0;
+            this.isPlaying = false;
+            // Configuration
+            this.minBufferSamples = ${minBufferSamples};
+            this.targetBufferSamples = ${minBufferSamples * 2}; // Target 2x min for stability
+            // State
+            this.streamEnded = false;
+            this.playbackCompleteReported = false;
+            // Stats reporting
+            this.frameCount = 0;
+            this.reportInterval = 256; // Report every ~5ms at 48kHz
+            this.port.onmessage = (e) => {
+              switch(e.data.type) {
+                case 'audio':
+                  this.addAudio(e.data.data);
+                  break;
+                case 'reset':
+                  this.reset();
+                  break;
+                case 'stream-ended':
+                  this.streamEnded = true;
+                  break;
+              }
+            };
+            // Send initial capacity
+            this.sendCapacityUpdate();
+          }
+          addAudio(float32Data) {
+            const samples = float32Data.length;
+            const available = this.getAvailableSpace();
+            const bufferedBefore = this.getBufferedSamples();
+            if (samples > available) {
+              // This shouldn't happen with proper backpressure
+              console.error('Buffer overflow - bug in backpressure. Samples:', samples, 'Available:', available, 'Buffered:', this.getBufferedSamples());
+              // Drop oldest data to recover
+              const overflow = samples - available;
+              this.readPos = (this.readPos + overflow) % this.bufferSize;
+            }
+            // Write to ring buffer
+            if (this.writePos + samples <= this.bufferSize) {
+              this.ringBuffer.set(float32Data, this.writePos);
+              this.writePos += samples;
+              if (this.writePos >= this.bufferSize) {
+                this.writePos = 0;
+              }
+            } else {
+              const firstPart = this.bufferSize - this.writePos;
+              const secondPart = samples - firstPart;
+              this.ringBuffer.set(float32Data.slice(0, firstPart), this.writePos);
+              this.ringBuffer.set(float32Data.slice(firstPart), 0);
+              this.writePos = secondPart;
+            }
+            // Auto-start when we have enough buffered
+            const buffered = this.getBufferedSamples();
+            if (!this.isPlaying && buffered >= this.minBufferSamples) {
+              const now = currentTime;
+              this.isPlaying = true;
+              // Notify that playback has started
+              this.port.postMessage({
+                type: 'playback-started',
+                buffered: buffered,
+                audioTime: now
+              });
+            }
+            // Report capacity after adding
+            this.sendCapacityUpdate();
+          }
+          getAvailableSpace() {
+            const used = this.getBufferedSamples();
+            return this.bufferSize - used - 128; // Leave small safety margin
+          }
+          getBufferedSamples() {
+            if (this.writePos >= this.readPos) {
+              return this.writePos - this.readPos;
+            } else {
+              return this.bufferSize - this.readPos + this.writePos;
+            }
+          }
+          sendCapacityUpdate() {
+            const buffered = this.getBufferedSamples();
+            const capacity = this.getAvailableSpace();
+            // Calculate how much we want to receive
+            // If buffer is low, request more; if it's full, request nothing
+            let requestSamples = 0;
+            if (buffered < this.targetBufferSamples) {
+              requestSamples = Math.min(capacity, this.targetBufferSamples - buffered);
+            }
+            this.port.postMessage({
+              type: 'capacity',
+              buffered: buffered,
+              capacity: capacity,
+              requestSamples: requestSamples,
+              isPlaying: this.isPlaying
+            });
+          }
+          process(inputs, outputs, parameters) {
+            const output = outputs[0];
+            if (!output || !output[0]) return true;
+            const outputChannel = output[0];
+            const numSamples = outputChannel.length;
+            // Report stats periodically
+            if (++this.frameCount % this.reportInterval === 0) {
+              this.sendCapacityUpdate();
+            }
+            if (!this.isPlaying) {
+              outputChannel.fill(0);
+              return true;
+            }
+            const buffered = this.getBufferedSamples();
+            if (buffered < numSamples) {
+              // Underrun - play what we have and fill rest with silence
+              let samplesRead = 0;
+              if (buffered > 0) {
+                // Play whatever samples we DO have
+                if (this.readPos + buffered <= this.bufferSize) {
+                  for (let i = 0; i < buffered; i++) {
+                    outputChannel[i] = this.ringBuffer[this.readPos + i];
+                  }
+                  this.readPos += buffered;
+                  if (this.readPos >= this.bufferSize) {
+                    this.readPos = 0;
+                  }
+                } else {
+                  // Wrap-around case
+                  const firstPart = this.bufferSize - this.readPos;
+                  const secondPart = buffered - firstPart;
+                  for (let i = 0; i < firstPart; i++) {
+                    outputChannel[i] = this.ringBuffer[this.readPos + i];
+                  }
+                  for (let i = 0; i < secondPart; i++) {
+                    outputChannel[firstPart + i] = this.ringBuffer[i];
+                  }
+                  this.readPos = secondPart;
+                }
+                samplesRead = buffered;
+              }
+              // Fill remaining with silence
+              for (let i = samplesRead; i < numSamples; i++) {
+                outputChannel[i] = 0;
+              }
+              // Check for playback complete
+              if (this.streamEnded && buffered === 0) {
+                if (!this.playbackCompleteReported) {
+                  this.port.postMessage({
+                    type: 'playback-complete'
+                  });
+                  this.playbackCompleteReported = true;
+                }
+                this.isPlaying = false;
+                this.streamEnded = false;
+              } else {
+                // Request more data urgently
+                this.port.postMessage({
+                  type: 'underrun',
+                  buffered: buffered,
+                  needed: numSamples
+                });
+                this.sendCapacityUpdate();
+              }
+            } else {
+              // Normal playback - read from ring buffer
+              if (this.readPos + numSamples <= this.bufferSize) {
+                for (let i = 0; i < numSamples; i++) {
+                  outputChannel[i] = this.ringBuffer[this.readPos + i];
+                }
+                this.readPos += numSamples;
+                if (this.readPos >= this.bufferSize) {
+                  this.readPos = 0;
+                }
+              } else {
+                // Wrap-around case
+                const firstPart = this.bufferSize - this.readPos;
+                const secondPart = numSamples - firstPart;
+                for (let i = 0; i < firstPart; i++) {
+                  outputChannel[i] = this.ringBuffer[this.readPos + i];
+                }
+                for (let i = 0; i < secondPart; i++) {
+                  outputChannel[firstPart + i] = this.ringBuffer[i];
+                }
+                this.readPos = secondPart;
+              }
+            }
+            return true;
+          }
+          reset() {
+            this.readPos = 0;
+            this.writePos = 0;
+            this.ringBuffer.fill(0);
+            this.isPlaying = false;
+            this.streamEnded = false;
+            this.playbackCompleteReported = false;
+            this.sendCapacityUpdate();
+          }
+        }
+        registerProcessor('pcm-processor', PCMProcessor);
+      `;
+      // Create and load worklet
+      const blob = new Blob([processorCode], { type: 'application/javascript' });
+      const workletUrl = URL.createObjectURL(blob);
+      await this.audioContext.audioWorklet.addModule(workletUrl);
+      URL.revokeObjectURL(workletUrl);
+      // Create worklet node
+      this.workletNode = new AudioWorkletNode(this.audioContext, 'pcm-processor');
+      this.workletNode.connect(this.gainNode);
+      // Handle messages from worklet
+      this.workletNode.port.onmessage = (e) => {
+        switch (e.data.type) {
+          case 'capacity':
+            this.handleCapacityUpdate(e.data);
+            break;
+          case 'underrun':
+            this.metrics.underruns++;
+            console.warn(`[MAIN THREAD] ⚠️ UNDERRUN #${this.metrics.underruns} detected! buffered=${e.data.buffered} samples, needed=${e.data.needed} samples`);
+            // Try to send more data immediately
+            this.processPendingChunks();
+            break;
+          case 'playback-started':
+            console.log(`[MAIN THREAD] Received playback-started at performance.now=${performance.now().toFixed(2)}ms, audioContext.currentTime=${this.audioContext.currentTime.toFixed(3)}s, worklet reported audioTime=${e.data.audioTime}s`);
+            this.emit('firstPlayback', {
+              startTime: this.audioContext.currentTime,
+              bufferedSamples: e.data.buffered
+            });
+            break;
+          case 'playback-complete':
+            this.emit('audioEnded', {
+              endTime: this.audioContext.currentTime
+            });
+            break;
+        }
+      };
+      this.isInitialized = true;
+      this.isWorkletReady = true;
+    } catch (error) {
+      console.error('Failed to initialize PCMPlayerWorklet:', error);
+      throw error;
+    }
+  }
+  handleCapacityUpdate(data) {
+    this.availableCapacity = data.capacity;
+    this.metrics.bufferLevel = data.buffered;
+    // console.log(`[CAPACITY] Update at ${performance.now().toFixed(2)}ms: capacity=${data.capacity}, buffered=${data.buffered}, pending=${this.pendingChunks.length}`);
+    // Mark that we've received initial capacity
+    if (!this.hasReceivedInitialCapacity) {
+      this.hasReceivedInitialCapacity = true;
+      // console.log(`[CAPACITY] *** FIRST capacity received at ${performance.now().toFixed(2)}ms, processing ${this.pendingChunks.length} pending chunks`);
+      // Process any chunks that were waiting for initial capacity
+      if (this.pendingChunks.length > 0) {
+        this.processPendingChunks();
+      }
+    }
+    // If worklet is requesting data, try to send it
+    if (data.requestSamples > 0 && this.pendingChunks.length > 0) {
+      this.processPendingChunks();
+    }
+  }
+  processPendingChunks() {
+    if (!this.isWorkletReady || this.pendingChunks.length === 0) {
+      return;
+    }
+    // Don't send if we don't know capacity yet
+    if (this.availableCapacity <= 0) {
+      return;
+    }
+    // Send ONE chunk if it fits, then wait for next capacity update
+    // This prevents race conditions from sending multiple chunks before worklet updates
+    const chunk = this.pendingChunks[0];
+    if (chunk.length <= this.availableCapacity) {
+      // Send the whole chunk
+      this.pendingChunks.shift();
+      this.workletNode.port.postMessage({
+        type: 'audio',
+        data: chunk
+      });
+      // Set capacity to 0 to prevent sending more until we get an update
+      this.availableCapacity = 0;
+    } else if (this.availableCapacity > 4096) {
+      // Send partial chunk only if we have significant space
+      const partial = chunk.slice(0, this.availableCapacity);
+      console.log(`Sending partial: ${partial.length} samples from ${chunk.length} (capacity: ${this.availableCapacity})`);
+      this.pendingChunks[0] = chunk.slice(this.availableCapacity);
+      this.workletNode.port.postMessage({
+        type: 'audio',
+        data: partial
+      });
+      // Set capacity to 0 to prevent sending more until we get an update
+      this.availableCapacity = 0;
+    } else {
+      console.log(`Not sending - chunk ${chunk.length} samples, capacity ${this.availableCapacity}`);
+    }
+    // else: Not enough space, wait for next capacity update
+    // If all chunks sent and stream ended, notify worklet
+    if (this.pendingChunks.length === 0 && this.pendingStreamEnd) {
+      this.workletNode.port.postMessage({ type: 'stream-ended' });
+      this.pendingStreamEnd = false;
+    }
+  }
+  playAudio(data) {
+    if (!this.isInitialized) {
+      // Queue the data if not initialized yet
+      if (!this.initPendingQueue) {
+        this.initPendingQueue = [];
+        this.initPromise.then(() => {
+          // Process queued data
+          const queue = this.initPendingQueue;
+          this.initPendingQueue = null;
+          for (const queuedData of queue) {
+            this.playAudio(queuedData);
+          }
+        });
+      }
+      this.initPendingQueue.push(data);
+      return;
+    }
+    if (this.audioContext.state !== 'running') {
+      return;
+    }
+    // Convert to Float32Array if needed
+    const float32Array = data instanceof Int16Array
+      ? this.pcm16ToFloat32(data)
+      : data;
+    // Add to pending queue
+    this.pendingChunks.push(float32Array);
+    // Only try to process if we've received initial capacity and have space
+    // Otherwise wait for capacity update from worklet
+    if (this.hasReceivedInitialCapacity && this.availableCapacity > 0) {
+      this.processPendingChunks();
+    }
+    // Update metrics
+    this.metrics.chunksPlayed++;
+    // Update playback time for compatibility
+    const duration = float32Array.length / this.audioContext.sampleRate;
+    this.playbackTime = this.audioContext.currentTime + duration;
+    // Emit events for compatibility
+    this.emit('audioStarted', {
+      startTime: this.audioContext.currentTime,
+      duration: duration,
+      samples: float32Array.length
+    });
+  }
+  notifyStreamEnded() {
+    if (this.pendingChunks.length > 0) {
+      // Still have chunks to send, mark for later
+      this.pendingStreamEnd = true;
+    } else {
+      // No chunks left, send immediately
+      if (this.workletNode) {
+        this.workletNode.port.postMessage({ type: 'stream-ended' });
+      }
+    }
+  }
+  pcm16ToFloat32(pcm16) {
+    const float32 = new Float32Array(pcm16.length);
+    for (let i = 0; i < pcm16.length; i++) {
+      float32[i] = pcm16[i] / 32768;
+    }
+    return float32;
+  }
+  reset() {
+    this.playbackTime = 0;
+    this.pendingChunks = [];
+    this.pendingStreamEnd = false;
+    this.availableCapacity = 0;
+    if (this.workletNode) {
+      this.workletNode.port.postMessage({ type: 'reset' });
+    }
+    // Quick fade out to avoid clicks
+    if (this.gainNode) {
+      const now = this.audioContext.currentTime;
+      this.gainNode.gain.setValueAtTime(this.gainNode.gain.value, now);
+      this.gainNode.gain.linearRampToValueAtTime(0, now + 0.05);
+      setTimeout(() => {
+        this.gainNode.gain.value = 1;
+      }, 100);
+    }
+  }
+  stopAllSources() {
+    this.reset();
+  }
+  async resume() {
+    if (this.audioContext.state === 'suspended') {
+      await this.audioContext.resume();
+    }
+  }
+  get volume() {
+    return this.gainNode.gain.value;
+  }
+  set volume(value) {
+    const clampedValue = Math.max(0, Math.min(1, value));
+    this.gainNode.gain.value = clampedValue;
+    this.emit('volumeChange', { volume: clampedValue });
+  }
+  get volumePercentage() {
+    return this.volume * 100;
+  }
+  set volumePercentage(percentage) {
+    this.volume = percentage / 100;
+  }
+  getAnalyserData() {
+    const bufferLength = this.analyser.frequencyBinCount;
+    const dataArray = new Uint8Array(bufferLength);
+    this.analyser.getByteFrequencyData(dataArray);
+    return dataArray;
+  }
+  getTimeDomainData() {
+    const bufferLength = this.analyser.frequencyBinCount;
+    const dataArray = new Uint8Array(bufferLength);
+    this.analyser.getByteTimeDomainData(dataArray);
+    return dataArray;
+  }
+  getPlaybackStatus() {
+    const bufferMs = this.metrics.bufferLevel
+      ? (this.metrics.bufferLevel / this.audioContext.sampleRate) * 1000
+      : 0;
+    return {
+      currentTime: this.audioContext.currentTime,
+      scheduledTime: this.playbackTime,
+      bufferedDuration: bufferMs / 1000,
+      state: this.audioContext.state,
+      worklet: {
+        bufferLevelSamples: this.metrics.bufferLevel,
+        bufferLevelMs: bufferMs,
+        underruns: this.metrics.underruns,
+        chunksPlayed: this.metrics.chunksPlayed,
+        pendingChunks: this.pendingChunks.length
+      }
+    };
+  }
+}

README.md ADDED Viewed

	@@ -0,0 +1,107 @@

+---
+title: Soprano 1.1 ONNX Web Demo
+emoji: 🎧
+colorFrom: blue
+colorTo: indigo
+sdk: static
+short_description: Real-time text-to-speech in the browser using ONNX
+app_file: index.html
+pinned: false
+models:
+- KevinAHM/soprano-1.1-onnx
+license: apache-2.0
+---
+<!-- Version 0.0.3 -->
+<div align="center">
+# Soprano 1.1 ONNX Streaming — Instant Text‑to‑Speech in the Browser (WASM)
+[![Upstream](https://img.shields.io/badge/Upstream-ekwek1%2Fsoprano-black?logo=github)](https://github.com/ekwek1/soprano)
+[![Hugging Face Model](https://img.shields.io/badge/HuggingFace-Model-orange?logo=huggingface)](https://huggingface.co/KevinAHM/soprano-onnx)
+[![Hugging Face Demo for Soprano Web Onnx](https://img.shields.io/badge/HuggingFace-Demo-yellow?logo=huggingface)](https://huggingface.co/spaces/KevinAHM/soprano-web-onnx)
+A **static, client-side** browser demo that runs the Soprano TTS pipeline using **onnxruntime-web**.
+Soprano 1.1 features significant performance optimizations, including moving all heavy inference to a **Web Worker** and utilizing an **int8 quantized decoder** for superior real-time speeds on consumer CPUs.
+---
+## Requirements
+- A modern browser (Chrome, Edge, Firefox, Safari).
+- You must serve this folder over HTTP (opening `index.html` via `file://` usually breaks `fetch()` / module loading).
+- The demo loads `onnxruntime-web` and `@huggingface/transformers` from a CDN by default (network required unless you vendor them).
+- The model files are large; plan to use **Git LFS** or GitHub Releases if you publish them.
+---
+## Folder layout
+Place model artifacts under `./models/`:
+```text
+.
+├─ index.html
+├─ onnx-streaming.js      (Main Thread Client)
+├─ inference-worker.js    (Heavy Inference Engine)
+├─ PCMPlayerWorklet.js    (Audio Playback Worklet)
+├─ style.css
+├─ onnx/
+│  ├─ soprano_backbone_kv_fp32.onnx
+│  └─ soprano_decoder_int8.onnx
+...
+```
+Notes:
+- ONNX models live in `onnx/` following HuggingFace convention.
+- The decoder uses external weights (`.onnx.data` file must be present alongside the `.onnx` file).
+- Tokenizer files are in the root directory.
+---
+## Run locally
+Use any static file server from this directory, for example:
+```bash
+python -m http.server 8085
+```
+Then open `http://localhost:8085`.
+---
+## Configuration
+Model paths are defined near the top of `onnx-streaming.js` in the `MODELS` object.
+Sampling defaults are set in `onnx-streaming.js` (constructor):
+- `temperature`
+- `topK`
+- `topP`
+- `repetitionPenalty`
+---
+## Troubleshooting
+- **"Load failed" / model never becomes Ready**
+  - Verify the `onnx/` filenames match `MODELS` in `onnx-streaming.js`
+  - Check DevTools → Network for a missing `.onnx.data` file (404)
+  - Confirm `/` contains `tokenizer.json` (and related files)
+- **Performance notes**
+  - **Web Worker:** Keeps the UI responsive (no lag during generation).
+  - **int8 Decoder:** Optimized for high-throughput CPU inference.
+  - Achieves real-time streaming on modern hardware
+---
+## License & attribution
+Soprano is released under **Apache-2.0** in the upstream repository:
+https://github.com/ekwek1/soprano

config.json ADDED Viewed

	@@ -0,0 +1,49 @@

+{
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 3,
+  "dtype": "bfloat16",
+  "eos_token_id": 3,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 512,
+  "initializer_range": 0.041666666666666664,
+  "intermediate_size": 2304,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 512,
+  "max_window_layers": 17,
+  "model_type": "qwen3",
+  "num_attention_heads": 4,
+  "num_hidden_layers": 17,
+  "num_key_value_heads": 1,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 10000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "transformers_version": "4.56.1",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 8192
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 3,
+  "eos_token_id": 3,
+  "transformers_version": "4.56.1"
+}

index.html ADDED Viewed

	@@ -0,0 +1,168 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Soprano - Neural TTS in Your Browser</title>
+    <link rel="stylesheet" href="style.css">
+    <link rel="preconnect" href="https://fonts.googleapis.com">
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+    <link href="https://fonts.googleapis.com/css2?family=DM+Sans:wght@400;500;600&family=Syne:wght@500;600;700;800&family=Space+Mono:wght@400;700&display=swap" rel="stylesheet">
+    <!-- ONNX Runtime Web -->
+    <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.min.js"></script>
+</head>
+<body>
+    <!-- Ambient Background Effects -->
+    <div class="ambient-layer">
+        <div class="orb orb--primary"></div>
+        <div class="orb orb--secondary"></div>
+        <div class="orb orb--tertiary"></div>
+        <div class="grid-overlay"></div>
+    </div>
+    <div class="app-shell">
+        <!-- Hero Header -->
+        <header class="hero">
+            <div class="hero__brand">
+                <div class="logo">
+                    <svg class="logo__icon" viewBox="0 0 32 32" fill="none">
+                        <path d="M16 4C16 4 8 8 8 16C8 24 16 28 16 28" stroke="url(#logoGrad)" stroke-width="2" stroke-linecap="round"/>
+                        <path d="M16 4C16 4 24 8 24 16C24 24 16 28 16 28" stroke="url(#logoGrad)" stroke-width="2" stroke-linecap="round"/>
+                        <path d="M12 10V22" stroke="url(#logoGrad)" stroke-width="2" stroke-linecap="round"/>
+                        <path d="M16 8V24" stroke="url(#logoGrad)" stroke-width="2" stroke-linecap="round"/>
+                        <path d="M20 10V22" stroke="url(#logoGrad)" stroke-width="2" stroke-linecap="round"/>
+                        <defs>
+                            <linearGradient id="logoGrad" x1="8" y1="4" x2="24" y2="28" gradientUnits="userSpaceOnUse">
+                                <stop stop-color="#3b82f6"/>
+                                <stop offset="1" stop-color="#8b5cf6"/>
+                            </linearGradient>
+                        </defs>
+                    </svg>
+                    <span class="logo__text">Soprano</span>
+                </div>
+                <div class="hero__badge">
+                    <span class="badge">ONNX Runtime</span>
+                </div>
+            </div>
+            <p class="hero__tagline">Real-time neural text-to-speech, running entirely in your browser</p>
+        </header>
+        <main class="main">
+            <!-- Input Section -->
+            <section class="input-section">
+                <div class="textarea-wrap">
+                    <textarea
+                        id="text-input"
+                        placeholder="Type or paste text to synthesize..."
+                        aria-label="Text to synthesize"
+                        maxlength="500"
+                    ></textarea>
+                    <div class="textarea-meta">
+                        <span class="char-count"><span id="char-count">0</span>/500</span>
+                    </div>
+                </div>
+                <!-- Sample Texts -->
+                <div class="sample-texts">
+                    <span class="sample-texts__label">Try:</span>
+                    <button class="sample-btn" data-text="Hello, welcome to Soprano. This is a demonstration of real-time text to speech running entirely in your browser.">Demo greeting</button>
+                    <button class="sample-btn" data-text="I completely understand how frustrating this must be for you. Let me take care of this right away and make sure we get it resolved.">Empathetic support</button>
+                    <button class="sample-btn" data-text="Wow, congratulations! That's absolutely fantastic news! I'm so thrilled for you!">Excited</button>
+                    <button class="sample-btn" data-text="I'm really sorry to hear about your loss. Please know that we're here for you, and take all the time you need.">Compassionate</button>
+                    <button class="sample-btn" data-text="Great question! I'd be happy to walk you through this step by step. First, let's start with the basics.">Helpful guide</button>
+                </div>
+                <div class="controls">
+                    <button id="generate-btn" class="btn btn--primary">
+                        <svg class="btn__icon" viewBox="0 0 24 24" fill="currentColor">
+                            <polygon points="5,3 19,12 5,21"/>
+                        </svg>
+                        <span class="btn__text">Generate Audio</span>
+                        <div class="btn__loader" id="btn-loader"></div>
+                    </button>
+                    <button id="stop-btn" class="btn btn--secondary" disabled>
+                        <svg class="btn__icon" viewBox="0 0 24 24" fill="currentColor">
+                            <rect x="6" y="6" width="12" height="12" rx="1"/>
+                        </svg>
+                        <span class="btn__text">Stop</span>
+                    </button>
+                </div>
+            </section>
+            <!-- Output Section: Visualizer + Metrics -->
+            <section class="output-section">
+                <div class="visualizer-panel">
+                    <div class="visualizer-panel__header">
+                        <span class="visualizer-panel__title">Audio Output</span>
+                        <div class="status-indicator" id="status-indicator">
+                            <span class="status-dot"></span>
+                            <span class="status-text" id="stat-status">Idle</span>
+                        </div>
+                    </div>
+                    <div class="visualizer-container">
+                        <canvas id="visualizer-waveform"></canvas>
+                        <canvas id="visualizer-bars" class="visualizer-bars"></canvas>
+                    </div>
+                </div>
+                <div class="metrics-panel">
+                    <h3 class="metrics-panel__title">Performance</h3>
+                    <div class="metric">
+                        <div class="metric__header">
+                            <span class="metric__label">Time to First Byte</span>
+                            <button class="metric__info" aria-label="TTFB explanation" data-tooltip="Time from request until first audio chunk is received">
+                                <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
+                                    <circle cx="12" cy="12" r="10"/>
+                                    <path d="M12 16v-4M12 8h.01"/>
+                                </svg>
+                            </button>
+                        </div>
+                        <div class="metric__value">
+                            <span class="metric__number" id="stat-ttfb">--</span>
+                            <span class="metric__unit">ms</span>
+                        </div>
+                        <div class="metric__bar">
+                            <div class="metric__bar-fill" id="ttfb-bar"></div>
+                        </div>
+                    </div>
+                    <div class="metric metric--highlight">
+                        <div class="metric__header">
+                            <span class="metric__label">Real-Time Factor</span>
+                            <button class="metric__info" aria-label="RTFx explanation" data-tooltip="Audio duration divided by processing time. Values above 1x mean faster than real-time playback.">
+                                <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
+                                    <circle cx="12" cy="12" r="10"/>
+                                    <path d="M12 16v-4M12 8h.01"/>
+                                </svg>
+                            </button>
+                        </div>
+                        <div class="metric__value">
+                            <span class="metric__number metric__number--large" id="stat-rtfx">--</span>
+                            <span class="metric__unit">x</span>
+                        </div>
+                        <div class="metric__context" id="rtfx-context">&gt;1x = faster than real-time</div>
+                    </div>
+                    <div class="metric metric--status">
+                        <span class="metric__label">Model</span>
+                        <div class="model-status" id="model-status">
+                            <span class="model-status__dot"></span>
+                            <span class="model-status__text">Not loaded</span>
+                        </div>
+                    </div>
+                </div>
+            </section>
+        </main>
+        <footer class="footer">
+            <p>&copy; 2026 Soprano Audio Research</p>
+        </footer>
+    </div>
+    <script type="module" src="onnx-streaming.js?v=12"></script>
+</body>
+</html>

index.js ADDED Viewed

	@@ -0,0 +1,203 @@

+import { PCMPlayer } from './PCMPlayer.js';
+import { PCMPlayerWorklet } from './PCMPlayerWorklet.js';
+import { TTSStreamingClient } from './TTSStreamingClient.js';
+import { JitterBuffer } from './JitterBuffer.js';
+import { EventEmitter } from './EventEmitter.js';
+export class TTSPCMPlayer {
+  constructor(options = {}) {
+    // Configuration with defaults
+    this.config = {
+      endpoint: options.endpoint || 'http://localhost:8000/v1',
+      sampleRate: options.sampleRate || 24000,
+      audioBufferingMs: options.audioBufferingMs || 50,
+      minBufferBeforePlaybackMs: options.minBufferBeforePlaybackMs || 100,
+      autoPlay: options.autoPlay !== false,
+      volume: options.volume || 1.0,
+      voice: options.voice || 'broom_salesman',
+      model: options.model || 'echo-tts',
+      temperature: options.temperature || 1.0,
+      topP: options.topP || 0.95,
+      topK: options.topK || 50,
+      chatId: options.chatId || null,
+      useWorklet: options.useWorklet !== false, // Default to true for better performance
+      ...options
+    };
+    // Initialize audio context with specified sample rate
+    this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
+      sampleRate: this.config.sampleRate
+    });
+    // Initialize components - use worklet player by default for better performance
+    if (this.config.useWorklet) {
+      this.player = new PCMPlayerWorklet(this.audioContext, {
+        minBufferBeforePlaybackMs: this.config.minBufferBeforePlaybackMs
+      });
+    } else {
+      this.player = new PCMPlayer(this.audioContext);
+    }
+    this.client = new TTSStreamingClient({
+      endpoint: this.config.endpoint,
+      sampleRate: this.config.sampleRate,
+      audioBufferingMs: this.config.audioBufferingMs,
+      minBufferBeforePlaybackMs: this.config.minBufferBeforePlaybackMs,
+      voice: this.config.voice,
+      model: this.config.model,
+      temperature: this.config.temperature,
+      topP: this.config.topP,
+      topK: this.config.topK
+    });
+    // Set initial volume
+    this.player.volume = this.config.volume;
+    // Setup event forwarding
+    this.setupEventHandlers();
+  }
+  setupEventHandlers() {
+    // Forward audio data from client to player
+    this.client.addEventListener('audio', (event) => {
+      if (this.config.autoPlay && this.audioContext.state === 'running') {
+        this.player.playAudio(event.detail.data);
+      }
+    });
+    // Forward all client events
+    ['synthesisStarted', 'synthesisCompleted', 'synthesisCancelled', 'error', 'progress', 'firstByte', 'firstAudioChunk'].forEach(eventName => {
+      this.client.addEventListener(eventName, (event) => {
+        // Re-emit the event from this instance
+        if (this.onEvent) {
+          this.onEvent(eventName, event.detail);
+        }
+        // When synthesis is complete, notify the player that the stream has ended
+        if (eventName === 'synthesisCompleted' && this.config.useWorklet && this.player.notifyStreamEnded) {
+          this.player.notifyStreamEnded();
+        }
+      });
+    });
+    // Forward player events
+    ['volumeChange', 'audioStarted', 'audioEnded', 'firstPlayback'].forEach(eventName => {
+      this.player.addEventListener(eventName, (event) => {
+        if (this.onEvent) {
+          this.onEvent(eventName, event.detail);
+        }
+      });
+    });
+  }
+  async synthesize(text, userVoice = null, userPrompt = null, userVoiceFormat = null, chatId = null, extraBody = null) {
+    // Resume audio context if suspended
+    if (this.audioContext.state === 'suspended') {
+      await this.audioContext.resume();
+    }
+    // Cancel any ongoing synthesis before starting a new one
+    // This ensures immediate interrupt behavior
+    if (this.client.isStreaming) {
+      console.log('[TTSPCMPlayer] Auto-cancelling ongoing synthesis before starting new one');
+      this.cancel();
+    }
+    // Use provided chatId or fallback to config chatId
+    const effectiveChatId = chatId || this.config.chatId;
+    return this.client.synthesize(text, userVoice, userPrompt, userVoiceFormat, effectiveChatId, extraBody);
+  }
+  setChatId(chatId) {
+    this.config.chatId = chatId;
+    if (this.client) {
+      this.client.setChatId(chatId);
+    }
+  }
+  cancel() {
+    // Cancel the HTTP request
+    this.client.cancel();
+    // Stop audio playback immediately
+    this.player.reset();
+  }
+  async play() {
+    if (this.audioContext.state === 'suspended') {
+      await this.audioContext.resume();
+    }
+    this.config.autoPlay = true;
+  }
+  pause() {
+    this.config.autoPlay = false;
+  }
+  setVoice(voice) {
+    this.client.setVoice(voice);
+    this.config.voice = voice;
+  }
+  setEndpoint(endpoint) {
+    const result = this.client.setEndpoint(endpoint);
+    if (result) {
+      this.config.endpoint = endpoint;
+    }
+    return result;
+  }
+  updateConfig(config) {
+    this.client.updateConfig(config);
+    Object.assign(this.config, config);
+  }
+  get volume() {
+    return this.player.volume;
+  }
+  set volume(value) {
+    this.player.volume = value;
+  }
+  get volumePercentage() {
+    return this.player.volumePercentage;
+  }
+  set volumePercentage(value) {
+    this.player.volumePercentage = value;
+  }
+  getStatus() {
+    return {
+      client: this.client.getStatus(),
+      player: this.player.getPlaybackStatus(),
+      audioContext: {
+        state: this.audioContext.state,
+        sampleRate: this.audioContext.sampleRate,
+        currentTime: this.audioContext.currentTime
+      }
+    };
+  }
+  getAnalyserData() {
+    return this.player.getAnalyserData();
+  }
+  getTimeDomainData() {
+    return this.player.getTimeDomainData();
+  }
+  // Event handler callback (to be overridden by user)
+  onEvent(eventName, data) {
+    // Default implementation - can be overridden
+    console.log(`[TTSPCMPlayer] ${eventName}:`, data);
+  }
+}
+// Export all components for advanced usage
+export { PCMPlayer, PCMPlayerWorklet, TTSStreamingClient, JitterBuffer, EventEmitter };
+// Create a simple factory function for easy instantiation
+export function createTTSPlayer(options) {
+  return new TTSPCMPlayer(options);
+}

inference-worker.js ADDED Viewed

	@@ -0,0 +1,724 @@

+// ONNX Runtime Web Worker (Classic Script)
+console.log('Worker Script Starting (Classic)...');
+self.postMessage({ type: 'status', status: 'Worker Thread Started', state: 'idle' });
+try {
+    const ortVersion = '1.20.0';
+    importScripts(`https://cdn.jsdelivr.net/npm/onnxruntime-web@${ortVersion}/dist/ort.min.js`);
+} catch (e) {
+    console.error('Failed to load ORT in worker:', e);
+}
+// Configuration
+const MODELS = {
+    backbone: './onnx/soprano_backbone_kv_fp32.onnx',
+    decoder: './onnx/soprano_decoder_int8.onnx',
+    tokenizer: './' // Tokenizer loading still needs context, we'll see if it works in worker or needs to be passed
+};
+// We need to import the Hugging Face tokenizer library appropriately for a worker.
+// The main file import was dynamic. We'll try to do the same here.
+// Note: Transformers.js usually works in workers.
+const RECEPTIVE_FIELD = 4;
+const TOKEN_SIZE = 2048;
+const SAMPLE_RATE = 32000;
+// State
+let backboneSession = null;
+let decoderSession = null;
+let tokenizer = null;
+let isGenerating = false;
+let isReady = false;
+// FP16 Lookup Table
+let fp16Lookup = new Float32Array(65536);
+let isFp16Backbone = false;
+// Helpers
+function initFp16Lookup() {
+    for (let i = 0; i < 65536; i++) {
+        const s = (i & 0x8000) >> 15;
+        const e = (i & 0x7C00) >> 10;
+        const f = i & 0x03FF;
+        if (e === 0) {
+            fp16Lookup[i] = (s ? -1 : 1) * Math.pow(2, -14) * (f / 1024);
+        } else if (e === 31) {
+            fp16Lookup[i] = f ? NaN : ((s ? -1 : 1) * Infinity);
+        } else {
+            fp16Lookup[i] = (s ? -1 : 1) * Math.pow(2, e - 15) * (1 + f / 1024);
+        }
+    }
+}
+// ----------------------------------------------------------------------------
+// Text Preprocessing (Ported from onnx-streaming.js)
+// ----------------------------------------------------------------------------
+// ... (Including the full text preprocessing logic here to keep worker self-contained) ...
+// For brevity in this tool call, I will include the necessary functions.
+// Ideally, these would be in a shared utils file, but I'll paste them to ensure it works.
+const ONES = ['', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen'];
+const TENS = ['', '', 'twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy', 'eighty', 'ninety'];
+const ORDINAL_ONES = ['', 'first', 'second', 'third', 'fourth', 'fifth', 'sixth', 'seventh', 'eighth', 'ninth', 'tenth', 'eleventh', 'twelfth', 'thirteenth', 'fourteenth', 'fifteenth', 'sixteenth', 'seventeenth', 'eighteenth', 'nineteenth'];
+const ORDINAL_TENS = ['', '', 'twentieth', 'thirtieth', 'fortieth', 'fiftieth', 'sixtieth', 'seventieth', 'eightieth', 'ninetieth'];
+function numberToWords(num, options = {}) {
+    const { andword = '', zero = 'zero', group = 0 } = options;
+    if (num === 0) return zero;
+    const convert = (n) => {
+        if (n < 20) return ONES[n];
+        if (n < 100) return TENS[Math.floor(n / 10)] + (n % 10 ? ' ' + ONES[n % 10] : '');
+        if (n < 1000) {
+            const remainder = n % 100;
+            return ONES[Math.floor(n / 100)] + ' hundred' + (remainder ? (andword ? ' ' + andword + ' ' : ' ') + convert(remainder) : '');
+        }
+        if (n < 1000000) {
+            const thousands = Math.floor(n / 1000);
+            const remainder = n % 1000;
+            return convert(thousands) + ' thousand' + (remainder ? ' ' + convert(remainder) : '');
+        }
+        if (n < 1000000000) {
+            const millions = Math.floor(n / 1000000);
+            const remainder = n % 1000000;
+            return convert(millions) + ' million' + (remainder ? ' ' + convert(remainder) : '');
+        }
+        const billions = Math.floor(n / 1000000000);
+        const remainder = n % 1000000000;
+        return convert(billions) + ' billion' + (remainder ? ' ' + convert(remainder) : '');
+    };
+    if (group === 2 && num > 1000 && num < 10000) {
+        const high = Math.floor(num / 100);
+        const low = num % 100;
+        if (low === 0) return convert(high) + ' hundred';
+        else if (low < 10) return convert(high) + ' ' + (zero === 'oh' ? 'oh' : zero) + ' ' + ONES[low];
+        else return convert(high) + ' ' + convert(low);
+    }
+    return convert(num);
+}
+function ordinalToWords(num) {
+    if (num < 20) return ORDINAL_ONES[num] || numberToWords(num) + 'th';
+    if (num < 100) {
+        const tens = Math.floor(num / 10);
+        const ones = num % 10;
+        if (ones === 0) return ORDINAL_TENS[tens];
+        return TENS[tens] + ' ' + ORDINAL_ONES[ones];
+    }
+    const cardinal = numberToWords(num);
+    if (cardinal.endsWith('y')) return cardinal.slice(0, -1) + 'ieth';
+    if (cardinal.endsWith('one')) return cardinal.slice(0, -3) + 'first';
+    if (cardinal.endsWith('two')) return cardinal.slice(0, -3) + 'second';
+    if (cardinal.endsWith('three')) return cardinal.slice(0, -5) + 'third';
+    if (cardinal.endsWith('ve')) return cardinal.slice(0, -2) + 'fth';
+    if (cardinal.endsWith('e')) return cardinal.slice(0, -1) + 'th';
+    if (cardinal.endsWith('t')) return cardinal + 'h';
+    return cardinal + 'th';
+}
+const UNICODE_MAP = {
+    'à': 'a', 'á': 'a', 'â': 'a', 'ã': 'a', 'ä': 'a', 'å': 'a', 'æ': 'ae', 'ç': 'c', 'è': 'e', 'é': 'e', 'ê': 'e', 'ë': 'e', 'ì': 'i', 'í': 'i', 'î': 'i', 'ï': 'i', 'ñ': 'n', 'ò': 'o', 'ó': 'o', 'ô': 'o', 'õ': 'o', 'ö': 'o', 'ø': 'o', 'ù': 'u', 'ú': 'u', 'û': 'u', 'ü': 'u', 'ý': 'y', 'ÿ': 'y', 'ß': 'ss', 'œ': 'oe', 'ð': 'd', 'þ': 'th', 'À': 'A', 'Á': 'A', 'Â': 'A', 'Ã': 'A', 'Ä': 'A', 'Å': 'A', 'Æ': 'AE', 'Ç': 'C', 'È': 'E', 'É': 'E', 'Ê': 'E', 'Ë': 'E', 'Ì': 'I', 'Í': 'I', 'Î': 'I', 'Ï': 'I', 'Ñ': 'N', 'Ò': 'O', 'Ó': 'O', 'Ô': 'O', 'Õ': 'O', 'Ö': 'O', 'Ø': 'O', 'Ù': 'U', 'Ú': 'U', 'Û': 'U', 'Ü': 'U', 'Ý': 'Y', '\u201C': '"', '\u201D': '"', '\u2018': "'", '\u2019': "'", '\u2026': '...', '\u2013': '-', '\u2014': '-'
+};
+function convertToAscii(text) {
+    return text.split('').map(c => UNICODE_MAP[c] || c).join('').normalize('NFD').replace(/[\u0300-\u036f]/g, '');
+}
+const ABBREVIATIONS = [
+    [/\bmrs\./gi, 'misuss'], [/\bms\./gi, 'miss'], [/\bmr\./gi, 'mister'], [/\bdr\./gi, 'doctor'], [/\bst\./gi, 'saint'], [/\bco\./gi, 'company'], [/\bjr\./gi, 'junior'], [/\bmaj\./gi, 'major'], [/\bgen\./gi, 'general'], [/\bdrs\./gi, 'doctors'], [/\brev\./gi, 'reverend'], [/\blt\./gi, 'lieutenant'], [/\bhon\./gi, 'honorable'], [/\bsgt\./gi, 'sergeant'], [/\bcapt\./gi, 'captain'], [/\besq\./gi, 'esquire'], [/\bltd\./gi, 'limited'], [/\bcol\./gi, 'colonel'], [/\bft\./gi, 'fort']
+];
+const CASED_ABBREVIATIONS = [
+    [/\bTTS\b/g, 'text to speech'], [/\bHz\b/g, 'hertz'], [/\bkHz\b/g, 'kilohertz'], [/\bKBs\b/g, 'kilobytes'], [/\bKB\b/g, 'kilobyte'], [/\bMBs\b/g, 'megabytes'], [/\bMB\b/g, 'megabyte'], [/\bGBs\b/g, 'gigabytes'], [/\bGB\b/g, 'gigabyte'], [/\bTBs\b/g, 'terabytes'], [/\bTB\b/g, 'terabyte'], [/\bAPIs\b/g, "a p i's"], [/\bAPI\b/g, 'a p i'], [/\bCLIs\b/g, "c l i's"], [/\bCLI\b/g, 'c l i'], [/\bCPUs\b/g, "c p u's"], [/\bCPU\b/g, 'c p u'], [/\bGPUs\b/g, "g p u's"], [/\bGPU\b/g, 'g p u'], [/\bAve\b/g, 'avenue'], [/\betc\b/g, 'etcetera']
+];
+function expandAbbreviations(text) {
+    for (const [regex, replacement] of [...ABBREVIATIONS, ...CASED_ABBREVIATIONS]) text = text.replace(regex, replacement);
+    return text;
+}
+const NUM_PREFIX_RE = /#(\d)/g;
+const NUM_SUFFIX_RE = /(\d)([KMBT])/gi;
+const NUM_LETTER_SPLIT_RE = /(\d)([a-z])|([a-z])(\d)/gi;
+const COMMA_NUMBER_RE = /(\d[\d,]+\d)/g;
+const DATE_RE = /(^|[^/])(\d\d?[/-]\d\d?[/-]\d\d(?:\d\d)?)($|[^/])/g;
+const PHONE_NUMBER_RE = /\(?\d{3}\)?[-.\s]\d{3}[-.\s]?\d{4}/g;
+const TIME_RE = /(\d\d?):(\d\d)(?::(\d\d))?/g;
+const POUNDS_RE = /£([\d,]*\d+)/g;
+const DOLLARS_RE = /\$([\d.,]*\d+)/g;
+const DECIMAL_NUMBER_RE = /(\d+(?:\.\d+)+)/g;
+const MULTIPLY_RE = /(\d)\s?\*\s?(\d)/g;
+const DIVIDE_RE = /(\d)\s?\/\s?(\d)/g;
+const ADD_RE = /(\d)\s?\+\s?(\d)/g;
+const SUBTRACT_RE = /(\d)?\s?-\s?(\d)/g;
+const FRACTION_RE = /(\d+)\/(\d+)/g;
+const ORDINAL_RE = /(\d+)(st|nd|rd|th)/gi;
+const NUMBER_RE = /\d+/g;
+function normalizeNumbers(text) {
+    text = text.replace(NUM_PREFIX_RE, (_, d) => `number ${d}`);
+    text = text.replace(NUM_SUFFIX_RE, (_, num, suffix) => {
+        const map = { k: 'thousand', m: 'million', b: 'billion', t: 'trillion' };
+        return `${num} ${map[suffix.toLowerCase()]}`;
+    });
+    for (let i = 0; i < 2; i++) {
+        text = text.replace(NUM_LETTER_SPLIT_RE, (m, d1, l1, l2, d2) => {
+            if (d1 && l1) return `${d1} ${l1}`;
+            if (l2 && d2) return `${l2} ${d2}`;
+            return m;
+        });
+    }
+    text = text.replace(COMMA_NUMBER_RE, m => m.replace(/,/g, ''));
+    text = text.replace(DATE_RE, (_, pre, date, post) => pre + date.split(/[./-]/).join(' dash ') + post);
+    text = text.replace(PHONE_NUMBER_RE, m => {
+        const digits = m.replace(/\D/g, '');
+        return digits.length === 10 ? `${digits.slice(0, 3).split('').join(' ')}, ${digits.slice(3, 6).split('').join(' ')}, ${digits.slice(6).split('').join(' ')}` : m;
+    });
+    text = text.replace(TIME_RE, (_, hours, minutes, seconds) => {
+        const h = parseInt(hours), m = parseInt(minutes), s = seconds ? parseInt(seconds) : 0;
+        if (!seconds) return m === 0 ? (h === 0 ? '0' : h > 12 ? `${hours} minutes` : `${hours} o'clock`) : minutes.startsWith('0') ? `${hours} oh ${minutes[1]}` : `${hours} ${minutes}`;
+        let res = '';
+        if (h !== 0) res = hours + ' ' + (m === 0 ? 'oh oh' : minutes.startsWith('0') ? `oh ${minutes[1]}` : minutes);
+        else if (m !== 0) res = minutes + ' ' + (s === 0 ? 'oh oh' : seconds.startsWith('0') ? `oh ${seconds[1]}` : seconds);
+        else res = seconds;
+        return res + ' ' + (s === 0 ? '' : seconds.startsWith('0') ? `oh ${seconds[1]}` : seconds);
+    });
+    text = text.replace(POUNDS_RE, (_, amount) => `${amount.replace(/,/g, '')} pounds`);
+    text = text.replace(DOLLARS_RE, (_, amount) => {
+        const parts = amount.replace(/,/g, '').split('.');
+        const dollars = parseInt(parts[0]) || 0;
+        const cents = parts[1] ? parseInt(parts[1]) : 0;
+        if (dollars && cents) return `${dollars} ${dollars === 1 ? 'dollar' : 'dollars'}, ${cents} ${cents === 1 ? 'cent' : 'cents'}`;
+        if (dollars) return `${dollars} ${dollars === 1 ? 'dollar' : 'dollars'}`;
+        if (cents) return `${cents} ${cents === 1 ? 'cent' : 'cents'}`;
+        return 'zero dollars';
+    });
+    text = text.replace(DECIMAL_NUMBER_RE, m => m.split('.').join(' point ').split('').join(' ')); // Simplified
+    text = text.replace(MULTIPLY_RE, '$1 times $2');
+    text = text.replace(DIVIDE_RE, '$1 over $2');
+    text = text.replace(ADD_RE, '$1 plus $2');
+    text = text.replace(SUBTRACT_RE, (_, a, b) => (a ? a : '') + ' minus ' + b);
+    text = text.replace(FRACTION_RE, '$1 over $2');
+    text = text.replace(ORDINAL_RE, (_, num) => ordinalToWords(parseInt(num)));
+    text = text.replace(NUMBER_RE, m => {
+        const num = parseInt(m);
+        if (num > 1000 && num < 3000) {
+            if (num === 2000) return 'two thousand';
+            if (num > 2000 && num < 2010) return 'two thousand ' + numberToWords(num % 100);
+            if (num % 100 === 0) return numberToWords(Math.floor(num / 100)) + ' hundred';
+            return numberToWords(num, { zero: 'oh', group: 2 });
+        }
+        return numberToWords(num);
+    });
+    return text;
+}
+const SPECIAL_CHARACTERS = [
+    [/@/g, ' at '], [/&/g, ' and '], [/%/g, ' percent '], [/:/g, '.'], [/;/g, ','], [/\+/g, ' plus '], [/\\/g, ' backslash '], [/~/g, ' about '], [/(^| )<3/g, ' heart '], [/<=/g, ' less than or equal to '], [/>=/g, ' greater than or equal to '], [/</g, ' less than '], [/>/g, ' greater than '], [/=/g, ' equals '], [/\//g, ' slash '], [/_/g, ' '],
+];
+const LINK_HEADER_RE = /https?:\/\//gi;
+const DASH_RE = /(.) - (.)/g;
+const DOT_RE = /([A-Z])\.([A-Z])/gi;
+const PARENTHESES_RE = /[\(\[\{][^\)\]\}]*[\)\]\}](.)?/g;
+function normalizeSpecial(text) {
+    text = text.replace(LINK_HEADER_RE, 'h t t p s colon slash slash ');
+    text = text.replace(DASH_RE, '$1, $2');
+    text = text.replace(DOT_RE, '$1 dot $2');
+    text = text.replace(PARENTHESES_RE, (m, after) => {
+        let result = m.replace(/[\(\[\{]/g, ', ').replace(/[\)\]\}]/g, ', ');
+        if (after && /[$.!?,]/.test(after)) result = result.slice(0, -2) + after;
+        return result;
+    });
+    return text;
+}
+function expandSpecialCharacters(text) {
+    for (const [regex, replacement] of SPECIAL_CHARACTERS) text = text.replace(regex, replacement);
+    return text;
+}
+function normalizeNewlines(text) {
+    return text.split('\n').map(line => {
+        line = line.trim();
+        if (!line) return '';
+        if (!/[.!?]$/.test(line)) line += '.';
+        return line;
+    }).join(' ');
+}
+function removeUnknownCharacters(text) {
+    text = text.replace(/[^A-Za-z !\$%&'\*\+,\-./0123456789<>\?_]/g, '');
+    return text.replace(/[<>\/_+]/g, '');
+}
+function collapseWhitespace(text) {
+    return text.replace(/\s+/g, ' ').replace(/ ([.\?!,])/g, '$1');
+}
+function dedupPunctuation(text) {
+    return text.replace(/\.\.\.+/g, '[ELLIPSIS]').replace(/,+/g, ',').replace(/[.,]*\.[.,]*/g, '.').replace(/[.,!]*![.,!]*/g, '!').replace(/[.,!?]*\?[.,!?]*/g, '?').replace(/\[ELLIPSIS\]/g, '...');
+}
+function cleanText(text) {
+    text = convertToAscii(text);
+    text = normalizeNewlines(text);
+    text = normalizeNumbers(text);
+    text = normalizeSpecial(text);
+    text = expandAbbreviations(text);
+    text = expandSpecialCharacters(text);
+    text = text.toLowerCase();
+    text = removeUnknownCharacters(text);
+    text = collapseWhitespace(text);
+    text = dedupPunctuation(text);
+    return text.trim();
+}
+function preprocessText(text, batchSize = 3, minLength = 30) {
+    text = text.trim();
+    const cleanedText = cleanText(text);
+    let sentences = cleanedText.split(/(?<=[.!?])\s+/).filter(s => s.trim());
+    if (sentences.length === 0) return cleanedText ? [`[STOP][TEXT]${cleanedText}[START]`] : [];
+    if (minLength > 0 && sentences.length > 1) {
+        const merged = [];
+        for (let i = 0; i < sentences.length; i++) {
+            const cur = sentences[i];
+            if (cur.length < minLength) {
+                if (merged.length > 0) merged[merged.length - 1] = (merged[merged.length - 1] + ' ' + cur).trim();
+                else if (i + 1 < sentences.length) sentences[i + 1] = (cur + ' ' + sentences[i + 1]).trim();
+                else merged.push(cur);
+            } else merged.push(cur);
+        }
+        sentences = merged;
+    }
+    const prompts = [];
+    for (let i = 0; i < sentences.length; i += batchSize) {
+        const batch = sentences.slice(i, i + batchSize).join(' ');
+        prompts.push(`[STOP][TEXT]${batch}[START]`);
+    }
+    return prompts;
+}
+// ----------------------------------------------------------------------------
+// Worker Logic
+// ----------------------------------------------------------------------------
+self.onmessage = async (e) => {
+    const { type, data } = e.data;
+    console.log('Worker received message:', type);
+    if (type === 'load') {
+        try {
+            await loadModels();
+            postMessage({ type: 'loaded' });
+        } catch (err) {
+            postMessage({ type: 'error', error: err.toString() });
+        }
+    } else if (type === 'generate') {
+        if (!isReady) {
+            postMessage({ type: 'error', error: 'Models are not loaded yet.' });
+            return;
+        }
+        if (isGenerating) return;
+        try {
+            await startGeneration(data.text);
+        } catch (err) {
+            console.error('Generation Error:', err);
+            postMessage({ type: 'error', error: err.toString() });
+        }
+    }
+    else if (type === 'stop') {
+        isGenerating = false;
+        postMessage({ type: 'status', status: 'Stopped', state: 'idle' });
+    }
+};
+async function loadModels() {
+    if (backboneSession) return;
+    postMessage({ type: 'status', status: 'Loading models...', state: 'loading' });
+    // Configure WASM Paths to use EXACT same version as loader
+    const version = '1.20.0';
+    const cdnBase = `https://cdn.jsdelivr.net/npm/onnxruntime-web@${version}/dist/`;
+    ort.env.wasm.wasmPaths = cdnBase;
+    // Disable multi-threading if not in cross-origin isolated environment to avoid ERR_WASM_FILE_NOT_FOUND
+    if (!self.crossOriginIsolated) {
+        console.warn('Environment is not cross-origin isolated. Disabling WASM multi-threading.');
+        ort.env.wasm.numThreads = 1;
+    } else if (typeof navigator !== 'undefined' && navigator.hardwareConcurrency) {
+        ort.env.wasm.numThreads = Math.min(navigator.hardwareConcurrency, 8);
+    }
+    try {
+        const backboneOptions = {
+            executionProviders: ['wasm'],
+            freeDimensionOverrides: { 'batch': 1 },
+            graphOptimizationLevel: 'all'
+        };
+        // Initialize FP16 Lookup
+        isFp16Backbone = MODELS.backbone.includes('fp16');
+        if (isFp16Backbone) initFp16Lookup();
+        console.log('Loading Backbone...');
+        backboneSession = await ort.InferenceSession.create(MODELS.backbone, backboneOptions);
+        console.log('Loading Decoder...');
+        const decoderBuf = await fetch(MODELS.decoder).then(r => {
+            if (!r.ok) throw new Error(`Failed to load decoder: ${r.statusText}`);
+            return r.arrayBuffer();
+        });
+        // External data check
+        let dataBuf = null;
+        try {
+            const dataUrl = MODELS.decoder + '.data';
+            const dataRes = await fetch(dataUrl);
+            if (dataRes.ok) {
+                dataBuf = await dataRes.arrayBuffer();
+            }
+        } catch (e) { }
+        const decoderOptions = {
+            executionProviders: ['wasm'],
+            freeDimensionOverrides: { 'batch': 1 }
+        };
+        if (dataBuf) {
+            decoderOptions.externalData = [{
+                data: new Uint8Array(dataBuf),
+                path: MODELS.decoder.split('/').pop() + '.data'
+            }];
+        }
+        decoderSession = await ort.InferenceSession.create(new Uint8Array(decoderBuf), decoderOptions);
+        console.log('Loading Tokenizer...');
+        const transformers = await import('https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0');
+        const { AutoTokenizer, env } = transformers;
+        env.allowLocalModels = true;
+        env.allowRemoteModels = false;
+        env.localModelPath = new URL('.', self.location.href).pathname;
+        tokenizer = await AutoTokenizer.from_pretrained(MODELS.tokenizer, {
+            local_files_only: true
+        });
+        isReady = true;
+        postMessage({ type: 'status', status: 'Ready', state: 'idle' });
+        postMessage({ type: 'model_status', status: 'ready', text: 'Ready' });
+        postMessage({ type: 'loaded' });
+    } catch (err) {
+        console.error('Model load failed in worker:', err);
+        throw err;
+    }
+}
+async function startGeneration(text) {
+    isGenerating = true;
+    postMessage({ type: 'status', status: 'Generating...', state: 'running' });
+    const prompts = preprocessText(text);
+    const overallStartTime = performance.now();
+    let isFirstBatch = true;
+    let cumulativeSamples = 0;
+    // We need to pass the generation start time for RTF calculation logic
+    // But since logic is here, we can just handle it.
+    for (const prompt of prompts) {
+        if (!isGenerating) break;
+        const { input_ids } = await tokenizer(prompt);
+        // Note: tokenizer runs in worker, so input_ids.data is available
+        const batchSamples = await generationLoop(input_ids.data, overallStartTime, isFirstBatch, cumulativeSamples);
+        cumulativeSamples += batchSamples;
+        isFirstBatch = false;
+    }
+    if (isGenerating) {
+        postMessage({ type: 'stream_ended' });
+        postMessage({ type: 'status', status: 'Finished', state: 'idle' });
+    }
+    isGenerating = false;
+}
+// Sampling Cache
+let _topKIndices = null;
+let _topKScores = null;
+let _topKOrder = null;
+let _topKExp = null;
+const samplingParams = { temperature: 0.3, topK: 50, topP: 0.95, repetitionPenalty: 1.2 };
+async function generationLoop(promptTokens, startTime, isFirstBatch = true, cumulativeSamples = 0) {
+    const batch = 1;
+    const numLayers = 17;
+    const hiddenDim = 128; // This seems small, confirming config... yes from previous code.
+    const promptLen = promptTokens.length;
+    const vocabSize = 8192;
+    const maxNewTokens = 512;
+    const seenTokenMask = new Uint8Array(vocabSize);
+    for (let i = 0; i < promptTokens.length; i++) {
+        const tid = Number(promptTokens[i]);
+        if (tid >= 0 && tid < vocabSize) seenTokenMask[tid] = 1;
+    }
+    const kvType = isFp16Backbone ? 'float16' : 'float32';
+    const kvData = isFp16Backbone ? new Uint16Array(0) : new Float32Array(0);
+    let pastKeyValues = {};
+    for (let i = 0; i < numLayers; i++) {
+        pastKeyValues[`past_key_values.${i}.key`] = new ort.Tensor(kvType, kvData, [batch, 1, 0, hiddenDim]);
+        pastKeyValues[`past_key_values.${i}.value`] = new ort.Tensor(kvType, kvData, [batch, 1, 0, hiddenDim]);
+    }
+    const maxSeqLen = promptLen + maxNewTokens;
+    const attentionMaskData = new BigInt64Array(maxSeqLen);
+    attentionMaskData.fill(1n);
+    let currentSeqLen = promptLen;
+    const nextInputIdData = new BigInt64Array(1);
+    const nextPositionIdData = new BigInt64Array(1);
+    const nextInputIdsTensor = new ort.Tensor('int64', nextInputIdData, [batch, 1]);
+    const nextPositionIdsTensor = new ort.Tensor('int64', nextPositionIdData, [batch, 1]);
+    let currentInputIds = new ort.Tensor('int64', BigInt64Array.from(promptTokens), [batch, promptLen]);
+    let currentAttentionMask = new ort.Tensor('int64', attentionMaskData.subarray(0, currentSeqLen), [batch, currentSeqLen]);
+    let currentPositionIds = new ort.Tensor('int64', BigInt64Array.from({ length: promptLen }, (_, i) => BigInt(i)), [batch, promptLen]);
+    const hiddenStatesBuffer = [];
+    let totalSamples = 0;
+    const targetChunkSize = 8;
+    let chunkCounter = targetChunkSize;
+    let firstChunk = true;
+    // Pipelining
+    let lastDecoderPromise = Promise.resolve();
+    let chunkBackboneTime = 0;
+    // We'll track start time for generation inside this batch
+    if (isFirstBatch) {
+        postMessage({ type: 'generation_started', data: { time: performance.now() } });
+    }
+    for (let i = 0; i < maxNewTokens; i++) {
+        if (!isGenerating) break;
+        // Yield Check (Optimization)
+        if (i % 4 === 0) {
+            await new Promise(resolve => setTimeout(resolve, 0));
+        }
+        const inputs = {
+            input_ids: currentInputIds,
+            attention_mask: currentAttentionMask,
+            position_ids: currentPositionIds,
+            ...pastKeyValues
+        };
+        const bbStart = performance.now();
+        const outputs = await backboneSession.run(inputs);
+        chunkBackboneTime += (performance.now() - bbStart);
+        const backboneNames = backboneSession.outputNames;
+        const logits = outputs[backboneNames[0]];
+        const lastHiddenState = outputs[backboneNames[backboneNames.length - 1]];
+        for (let j = 0; j < numLayers; j++) {
+            pastKeyValues[`past_key_values.${j}.key`] = outputs[backboneNames[1 + j * 2]];
+            pastKeyValues[`past_key_values.${j}.value`] = outputs[backboneNames[2 + j * 2]];
+        }
+        const nextTokenId = sample(logits, seenTokenMask);
+        const finished = (nextTokenId === 3n);
+        const nextTokenIdNum = Number(nextTokenId);
+        if (nextTokenIdNum >= 0 && nextTokenIdNum < vocabSize) seenTokenMask[nextTokenIdNum] = 1;
+        const seqLen = lastHiddenState.dims[1];
+        const hiddenDimSize = lastHiddenState.dims[2];
+        const lastTokenStateRaw = lastHiddenState.data.subarray((seqLen - 1) * hiddenDimSize, seqLen * hiddenDimSize);
+        let lastTokenState;
+        if (lastTokenStateRaw instanceof Uint16Array) {
+            lastTokenState = new Float32Array(hiddenDimSize);
+            for (let j = 0; j < hiddenDimSize; j++) {
+                lastTokenState[j] = fp16Lookup[lastTokenStateRaw[j]];
+            }
+        } else {
+            lastTokenState = new Float32Array(lastTokenStateRaw);
+        }
+        if (i > 0 && !finished) {
+            hiddenStatesBuffer.push(new Float32Array(lastTokenState));
+            if (hiddenStatesBuffer.length > 2 * RECEPTIVE_FIELD + targetChunkSize) {
+                hiddenStatesBuffer.splice(0, hiddenStatesBuffer.length - (2 * RECEPTIVE_FIELD + targetChunkSize));
+            }
+        }
+        // Decode Logic
+        if (finished || hiddenStatesBuffer.length >= RECEPTIVE_FIELD + targetChunkSize) {
+            if (finished || chunkCounter === targetChunkSize) {
+                const window = hiddenStatesBuffer.slice(-hiddenStatesBuffer.length);
+                const currentWindowSize = window.length;
+                const decoderInput = new Float32Array(512 * currentWindowSize);
+                for (let w = 0; w < currentWindowSize; w++) {
+                    for (let d = 0; d < 512; d++) {
+                        decoderInput[d * currentWindowSize + w] = window[w][d];
+                    }
+                }
+                const isLast = finished;
+                const captureChunkCounter = chunkCounter;
+                const captureFirstChunk = firstChunk;
+                const captureBBTime = chunkBackboneTime;
+                chunkBackboneTime = 0;
+                // Send to decoder (in promise chain)
+                lastDecoderPromise = lastDecoderPromise.then(async () => {
+                    const decStart = performance.now();
+                    const decoderOutputs = await decoderSession.run({
+                        [decoderSession.inputNames[0]]: new ort.Tensor('float32', decoderInput, [1, 512, currentWindowSize])
+                    });
+                    const decDuration = performance.now() - decStart;
+                    const audio = decoderOutputs[decoderSession.outputNames[0]].data;
+                    let audioChunk;
+                    if (isLast) {
+                        const startIdx = audio.length - (RECEPTIVE_FIELD + captureChunkCounter - 1) * TOKEN_SIZE + TOKEN_SIZE;
+                        audioChunk = audio.subarray(startIdx);
+                    } else {
+                        const startIdx = audio.length - (RECEPTIVE_FIELD + targetChunkSize) * TOKEN_SIZE + TOKEN_SIZE;
+                        const endIdx = audio.length - RECEPTIVE_FIELD * TOKEN_SIZE + TOKEN_SIZE;
+                        audioChunk = audio.subarray(startIdx, endIdx);
+                    }
+                    // Post audio to main thread
+                    postMessage({
+                        type: 'audio_chunk',
+                        data: audioChunk,
+                        metrics: {
+                            bbTime: captureBBTime,
+                            decTime: decDuration,
+                            chunkDuration: audioChunk.length / SAMPLE_RATE,
+                            isFirst: captureFirstChunk && isFirstBatch
+                        }
+                    }, [audioChunk.buffer]); // Transferable
+                });
+                firstChunk = false;
+                chunkCounter = 0;
+            }
+            chunkCounter++;
+        }
+        if (finished) break;
+        nextInputIdData[0] = nextTokenId;
+        currentInputIds = nextInputIdsTensor;
+        currentSeqLen += 1;
+        currentAttentionMask = new ort.Tensor('int64', attentionMaskData.subarray(0, currentSeqLen), [1, currentSeqLen]);
+        nextPositionIdData[0] = BigInt(currentSeqLen - 1);
+        currentPositionIds = nextPositionIdsTensor;
+    }
+    await lastDecoderPromise;
+    return totalSamples;
+}
+function sample(logitsTensor, seenTokenMask) {
+    let rawData = logitsTensor.data;
+    const vocabSize = logitsTensor.dims[2];
+    const lastStepOffset = (logitsTensor.dims[1] - 1) * vocabSize;
+    let data;
+    if (rawData instanceof Uint16Array) {
+        data = new Float32Array(vocabSize);
+        for (let j = 0; j < vocabSize; j++) {
+            data[j] = fp16Lookup[rawData[lastStepOffset + j]];
+        }
+    } else {
+        data = rawData.subarray ? rawData.subarray(lastStepOffset) : rawData.slice(lastStepOffset);
+    }
+    const { temperature, topK, topP, repetitionPenalty } = samplingParams;
+    const useRepetitionPenalty = repetitionPenalty !== 1.0;
+    const invTemperature = 1.0 / temperature;
+    // Fast path: Top-K
+    const k = Math.min(topK, vocabSize);
+    if (k > 0 && k < vocabSize) {
+        if (!_topKIndices || _topKIndices.length !== k) {
+            _topKIndices = new Int32Array(k);
+            _topKScores = new Float32Array(k);
+            _topKExp = new Float64Array(k);
+            _topKOrder = Array.from({ length: k }, (_, i) => i);
+        }
+        const heapIndices = _topKIndices;
+        const heapScores = _topKScores;
+        let heapSize = 0;
+        for (let tokenId = 0; tokenId < vocabSize; tokenId++) {
+            let s = data[tokenId] * invTemperature;
+            if (useRepetitionPenalty && seenTokenMask[tokenId]) s = s < 0 ? (s * repetitionPenalty) : (s / repetitionPenalty);
+            if (heapSize < k) {
+                let pos = heapSize++;
+                while (pos > 0) {
+                    const parent = (pos - 1) >> 1;
+                    if (heapScores[parent] <= s) break;
+                    heapScores[pos] = heapScores[parent];
+                    heapIndices[pos] = heapIndices[parent];
+                    pos = parent;
+                }
+                heapScores[pos] = s;
+                heapIndices[pos] = tokenId;
+            } else if (s > heapScores[0]) {
+                let pos = 0;
+                while (pos < (k >> 1)) {
+                    let left = (pos << 1) + 1;
+                    let right = left + 1;
+                    let smallest = left;
+                    if (right < k && heapScores[right] < heapScores[left]) smallest = right;
+                    if (heapScores[smallest] >= s) break;
+                    heapScores[pos] = heapScores[smallest];
+                    heapIndices[pos] = heapIndices[smallest];
+                    pos = smallest;
+                }
+                heapScores[pos] = s;
+                heapIndices[pos] = tokenId;
+            }
+        }
+        const expBuf = _topKExp;
+        const order = _topKOrder;
+        // Sort
+        for (let i = 0; i < k; i++) order[i] = i;
+        order.sort((a, b) => heapScores[b] - heapScores[a]);
+        const maxScore = heapScores[order[0]];
+        let sumExp = 0;
+        for (let i = 0; i < k; i++) {
+            const w = Math.exp(heapScores[order[i]] - maxScore);
+            expBuf[i] = w;
+            sumExp += w;
+        }
+        let keep = k;
+        if (topP < 1.0) {
+            const threshold = topP * sumExp;
+            let cumulative = 0;
+            for (let i = 0; i < k; i++) {
+                cumulative += expBuf[i];
+                if (cumulative >= threshold) { keep = i + 1; break; }
+            }
+        }
+        let r = Math.random() * sumExp; // Technically simple top-k sample logic for now
+        // Correct Top-P sampling needs re-normalization of sumExp over 'keep'
+        // For simplicity/speed in this giant block, let's just sample from weighted top-k
+        r = Math.random() * ((topP < 1.0) ? _topKExp.slice(0, keep).reduce((a, b) => a + b, 0) : sumExp);
+        for (let i = 0; i < keep; i++) {
+            r -= expBuf[i];
+            if (r <= 0) return BigInt(heapIndices[order[i]]);
+        }
+        return BigInt(heapIndices[order[0]]);
+    }
+    return 0n; // Fallback
+}

onnx-streaming.js ADDED Viewed

	@@ -0,0 +1,380 @@

+import { PCMPlayerWorklet as PCMPlayer } from './PCMPlayerWorklet.js';
+// Configuration
+const SAMPLE_RATE = 32000;
+export class SopranoONNXStreaming {
+    constructor() {
+        this.worker = null;
+        this.player = null;
+        this.audioContext = null;
+        this.isGenerating = false;
+        this.isWorkerReady = false;
+        this.pendingGeneration = false;
+        // Metrics State
+        this.generationStartTime = 0;
+        this.lastChunkFinishTime = 0;
+        this.rtfMovingAverage = 0;
+        this.elements = {
+            textInput: document.getElementById('text-input'),
+            generateBtn: document.getElementById('generate-btn'),
+            stopBtn: document.getElementById('stop-btn'),
+            statusText: document.getElementById('stat-status'),
+            statusIndicator: document.getElementById('status-indicator'),
+            modelStatusIcon: document.querySelector('#model-status .model-status__dot'),
+            modelStatusText: document.querySelector('#model-status .model-status__text'),
+            btnLoader: document.getElementById('btn-loader'),
+            statTTFB: document.getElementById('stat-ttfb'),
+            statRTFx: document.getElementById('stat-rtfx'),
+            ttfbBar: document.getElementById('ttfb-bar'),
+            rtfxContext: document.getElementById('rtfx-context')
+        };
+        this.attachEventListeners();
+        this.init();
+        this.setupVisualization();
+    }
+    async init() {
+        console.log('Soprano v1.1 - Worker Edition');
+        this.updateStatus('Initializing Worker...', 'running');
+        // Initial button state
+        this.elements.generateBtn.disabled = true;
+        const btnText = this.elements.generateBtn.querySelector('.btn__text');
+        if (btnText) btnText.textContent = 'Loading Models...';
+        this.elements.btnLoader.style.display = 'block';
+        // Initialize Audio Context and Player
+        this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
+            sampleRate: SAMPLE_RATE,
+            latencyHint: 'interactive'
+        });
+        await this.audioContext.audioWorklet.addModule('PCMPlayerWorklet.js');
+        this.player = new PCMPlayer(this.audioContext);
+        // Initialize Worker
+        console.log('Spawning Inference Worker (Classic)...');
+        this.worker = new Worker('./inference-worker.js');
+        this.worker.onmessage = (e) => {
+            const { type, data, error, status, state, metrics, text } = e.data;
+            switch (type) {
+                case 'status':
+                    this.updateStatus(status, state);
+                    break;
+                case 'model_status':
+                    this.updateModelStatus(status, text);
+                    break;
+                case 'loaded':
+                    console.log('Worker confirmed models loaded.');
+                    this.isWorkerReady = true;
+                    this.elements.generateBtn.disabled = false;
+                    this.elements.btnLoader.style.display = 'none';
+                    const loadedBtnText = this.elements.generateBtn.querySelector('.btn__text');
+                    if (loadedBtnText) loadedBtnText.textContent = 'Generate Audio';
+                    if (this.pendingGeneration) {
+                        this.pendingGeneration = false;
+                        this.startGeneration();
+                    }
+                    break;
+                case 'generation_started':
+                    // The main thread already sets this in startGeneration for better precision
+                    break;
+                case 'audio_chunk':
+                    this.handleAudioChunk(data, metrics);
+                    break;
+                case 'stream_ended':
+                    this.handleStreamEnd();
+                    break;
+                case 'error':
+                    console.error('Worker Error:', error);
+                    this.updateStatus(`Error: ${error}`, 'error');
+                    this.resetUI();
+                    break;
+            }
+        };
+        // Trigger Model Load in Worker
+        this.worker.postMessage({ type: 'load' });
+    }
+    attachEventListeners() {
+        this.elements.generateBtn.addEventListener('click', () => this.startGeneration());
+        this.elements.stopBtn.addEventListener('click', () => this.stopGeneration());
+        // Sample buttons
+        document.querySelectorAll('.sample-btn').forEach(btn => {
+            btn.addEventListener('click', () => {
+                this.elements.textInput.value = btn.getAttribute('data-text');
+                // Trigger input event to update character count
+                this.elements.textInput.dispatchEvent(new Event('input'));
+            });
+        });
+        // Character count
+        this.elements.textInput.addEventListener('input', () => {
+            const count = this.elements.textInput.value.length;
+            const countEl = document.getElementById('char-count');
+            if (countEl) countEl.textContent = count;
+        });
+        this.elements.textInput.addEventListener('keydown', (e) => {
+            if ((e.ctrlKey || e.metaKey) && e.key === 'Enter') {
+                this.startGeneration();
+            }
+        });
+    }
+    async startGeneration() {
+        this.generationStartTime = performance.now();
+        try {
+            if (!this.isWorkerReady) {
+                this.pendingGeneration = true;
+                const btnText = this.elements.generateBtn.querySelector('.btn__text');
+                if (btnText) btnText.textContent = 'Starting soon...';
+                return;
+            }
+            if (this.isGenerating) return;
+            if (this.audioContext && this.audioContext.state === 'suspended') {
+                await this.audioContext.resume();
+            }
+            const text = this.elements.textInput.value.trim();
+            if (!text) return;
+            this.isGenerating = true;
+            this.elements.generateBtn.disabled = true;
+            this.elements.generateBtn.classList.add('btn--generating');
+            this.elements.stopBtn.disabled = false;
+            if (this.player) this.player.reset();
+            // Reset metrics
+            this.elements.statTTFB.textContent = '--';
+            this.elements.statRTFx.textContent = '--';
+            if (this.elements.ttfbBar) this.elements.ttfbBar.style.width = '0%';
+            this.rtfMovingAverage = 0;
+            this.worker.postMessage({ type: 'generate', data: { text } });
+        } catch (err) {
+            console.error('Error in startGeneration:', err);
+            this.updateStatus(`Error: ${err.message}`, 'error');
+            this.isGenerating = false;
+            this.resetUI();
+        }
+    }
+    stopGeneration() {
+        if (!this.isGenerating) return;
+        this.worker.postMessage({ type: 'stop' });
+        // Handle stop immediately in UI
+        this.handleStreamEnd();
+    }
+    handleAudioChunk(audioData, metrics) {
+        if (!this.isGenerating) return;
+        // console.log(`[DEBUG] Received audio chunk: ${audioData.length} samples`);
+        // Play audio
+        this.player.playAudio(audioData);
+        // Update Metrics
+        requestAnimationFrame(() => {
+            // TTFB (only on first chunk)
+            if (metrics.isFirst) {
+                const ttfb = performance.now() - this.generationStartTime;
+                this.updateTTFB(ttfb);
+            }
+            // Real-Time Factor (Output Arrival Rate)
+            const now = performance.now();
+            const timeSinceLastChunk = (now - this.lastChunkFinishTime) / 1000;
+            this.lastChunkFinishTime = now;
+            if (timeSinceLastChunk > 0) {
+                const chunkDurationSec = metrics.chunkDuration;
+                const instantaneousRTF = chunkDurationSec / timeSinceLastChunk;
+                if (this.rtfMovingAverage === 0) {
+                    this.rtfMovingAverage = instantaneousRTF;
+                } else {
+                    this.rtfMovingAverage = this.rtfMovingAverage * 0.8 + instantaneousRTF * 0.2;
+                }
+                this.updateRTFx(this.rtfMovingAverage);
+            }
+        });
+    }
+    handleStreamEnd() {
+        if (this.player.notifyStreamEnded) this.player.notifyStreamEnded();
+        this.resetUI();
+        this.isGenerating = false;
+    }
+    resetUI() {
+        this.elements.generateBtn.disabled = false;
+        this.elements.generateBtn.classList.remove('btn--generating');
+        const btnText = this.elements.generateBtn.querySelector('.btn__text');
+        if (btnText) btnText.textContent = 'Generate Audio';
+        this.elements.stopBtn.disabled = true;
+    }
+    updateStatus(text, state) {
+        this.elements.statusText.textContent = text;
+        this.elements.statusIndicator.className = `status-indicator status-${state}`;
+    }
+    updateModelStatus(state, text) {
+        this.elements.modelStatusText.textContent = text;
+        this.elements.modelStatusIcon.className = `status-icon status-${state}`;
+    }
+    updateTTFB(ms) {
+        this.elements.statTTFB.textContent = Math.round(ms);
+        const percentage = Math.min((ms / 2000) * 100, 100);
+        this.elements.ttfbBar.style.width = `${percentage}%`;
+        this.elements.ttfbBar.style.background = ms < 500 ? '#10b981' : ms < 1000 ? '#f59e0b' : '#ef4444';
+    }
+    updateRTFx(val) {
+        this.elements.statRTFx.textContent = `${val.toFixed(2)}x`;
+        this.elements.rtfxContext.style.color = val >= 1.0 ? '#10b981' : '#ef4444';
+    }
+    // -------------------------------------------------------------------------
+    // Visualization
+    // -------------------------------------------------------------------------
+    setupVisualization() {
+        this.waveformCanvas = document.getElementById('visualizer-waveform');
+        this.barsCanvas = document.getElementById('visualizer-bars');
+        if (!this.waveformCanvas || !this.barsCanvas) return;
+        this.waveformCtx = this.waveformCanvas.getContext('2d');
+        this.barsCtx = this.barsCanvas.getContext('2d');
+        // Initial resize
+        this.resizeCanvases();
+        window.addEventListener('resize', () => this.resizeCanvases());
+        // Start animation loop
+        requestAnimationFrame(() => this.draw());
+    }
+    resizeCanvases() {
+        if (!this.waveformCanvas || !this.barsCanvas) return;
+        const parent = this.waveformCanvas.parentElement;
+        const width = parent.clientWidth;
+        const height = parent.clientHeight;
+        const dpr = window.devicePixelRatio || 1;
+        [this.waveformCanvas, this.barsCanvas].forEach(canvas => {
+            canvas.width = width * dpr;
+            canvas.height = height * dpr;
+            canvas.style.width = `${width}px`;
+            canvas.style.height = `${height}px`;
+            const ctx = canvas.getContext('2d');
+            ctx.scale(dpr, dpr);
+        });
+    }
+    draw() {
+        requestAnimationFrame(() => this.draw());
+        if (!this.player || !this.player.analyser) return;
+        const bufferLength = this.player.analyser.frequencyBinCount;
+        const dataArray = new Uint8Array(bufferLength);
+        // Draw Bars (Frequency)
+        this.player.analyser.getByteFrequencyData(dataArray);
+        this.drawBars(dataArray);
+        // Draw Waveform (Time Domain)
+        this.player.analyser.getByteTimeDomainData(dataArray);
+        this.drawWaveform(dataArray);
+    }
+    drawWaveform(dataArray) {
+        const ctx = this.waveformCtx;
+        const canvas = this.waveformCanvas;
+        const width = canvas.width / (window.devicePixelRatio || 1);
+        const height = canvas.height / (window.devicePixelRatio || 1);
+        ctx.clearRect(0, 0, width, height);
+        ctx.lineWidth = 2;
+        ctx.strokeStyle = '#3b82f6'; // Blue primary
+        ctx.beginPath();
+        const sliceWidth = width / dataArray.length;
+        let x = 0;
+        for (let i = 0; i < dataArray.length; i++) {
+            const v = dataArray[i] / 128.0;
+            const y = (v * height) / 2;
+            if (i === 0) ctx.moveTo(x, y);
+            else ctx.lineTo(x, y);
+            x += sliceWidth;
+        }
+        ctx.lineTo(width, height / 2);
+        ctx.stroke();
+    }
+    drawBars(dataArray) {
+        const ctx = this.barsCtx;
+        const canvas = this.barsCanvas;
+        const width = canvas.width / (window.devicePixelRatio || 1);
+        const height = canvas.height / (window.devicePixelRatio || 1);
+        ctx.clearRect(0, 0, width, height);
+        const barCount = 120; // Number of bars to display
+        const barWidth = (width / barCount);
+        const samplesPerBar = Math.floor(dataArray.length / barCount);
+        for (let i = 0; i < barCount; i++) {
+            let sum = 0;
+            for (let j = 0; j < samplesPerBar; j++) {
+                sum += dataArray[i * samplesPerBar + j];
+            }
+            const average = sum / samplesPerBar;
+            const barHeight = (average / 255) * height * 0.8;
+            // Gradient for bar
+            const gradient = ctx.createLinearGradient(0, height, 0, height - barHeight);
+            gradient.addColorStop(0, '#3b82f644');
+            gradient.addColorStop(1, '#8b5cf6cc');
+            ctx.fillStyle = gradient;
+            // Rounded bars
+            const x = i * barWidth;
+            const y = height - barHeight;
+            const radius = barWidth / 2;
+            ctx.beginPath();
+            ctx.roundRect(x + 1, y, barWidth - 2, barHeight, [2, 2, 0, 0]);
+            ctx.fill();
+        }
+    }
+}
+// Start the app
+document.addEventListener('DOMContentLoaded', () => {
+    window.app = new SopranoONNXStreaming();
+});

onnx/soprano_backbone_kv_fp16.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9de7d23ba53431bfb3a3988cf11c7661865fde686a5d881eec3b3763a9a34596
+size 169082390

onnx/soprano_backbone_kv_fp32.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c4fc5e079488518a53caf55922f27bb3207fceb8f438413d58ec1876b1f28cee
+size 319548418

onnx/soprano_backbone_kv_int8.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a4b7b8406185396f14a0ee89e041e469edaeb7155efe7e5b6def57ff9acd15e4
+size 80938986

onnx/soprano_decoder_fp32.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1f6c3cd97c794d6c58c8edf4e3ce157c6465895098d062c5501d3170e837d947
+size 262812

onnx/soprano_decoder_fp32.onnx.data ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:02ba358c807c31966ec3f41ca1ceb9b5b82abf78786551b6067d7979e7dbf7fd
+size 121503744

onnx/soprano_decoder_int8.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:01f9bbe359a85e91fd47b7601d6030f4940a40d6c3677a58a7be82f57f6da11b
+size 30793092

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "pad_token": {
+    "content": "[STOP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

style.css ADDED Viewed

	@@ -0,0 +1,978 @@

+/* ============================================
+   SOPRANO TTS DEMO - NEURAL OBSERVATORY THEME
+   ============================================ */
+/* Design Tokens */
+:root {
+    /* Colors - Space & Neural */
+    --color-void: #050810;
+    --color-deep: #0a0f1a;
+    --color-surface: rgba(15, 23, 42, 0.85);
+    --color-surface-elevated: rgba(30, 41, 59, 0.9);
+    --color-glass-border: rgba(255, 255, 255, 0.08);
+    /* Gradient Spectrum */
+    --gradient-primary: linear-gradient(135deg, #3b82f6 0%, #8b5cf6 100%);
+    --gradient-primary-vivid: linear-gradient(135deg, #60a5fa 0%, #a78bfa 100%);
+    --gradient-glow: linear-gradient(135deg, rgba(59, 130, 246, 0.4) 0%, rgba(139, 92, 246, 0.4) 100%);
+    /* Accent Colors */
+    --color-blue: #3b82f6;
+    --color-violet: #8b5cf6;
+    --color-cyan: #22d3ee;
+    --color-success: #10b981;
+    --color-warning: #f59e0b;
+    --color-error: #ef4444;
+    /* Text */
+    --color-text-primary: #f1f5f9;
+    --color-text-secondary: #94a3b8;
+    --color-text-muted: #64748b;
+    /* Typography */
+    --font-display: 'Syne', sans-serif;
+    --font-body: 'DM Sans', sans-serif;
+    --font-mono: 'Space Mono', monospace;
+    /* Spacing */
+    --space-1: 0.25rem;
+    --space-2: 0.5rem;
+    --space-3: 0.75rem;
+    --space-4: 1rem;
+    --space-5: 1.25rem;
+    --space-6: 1.5rem;
+    --space-8: 2rem;
+    --space-10: 2.5rem;
+    --space-12: 3rem;
+    --space-16: 4rem;
+    /* Radii */
+    --radius-sm: 8px;
+    --radius-md: 12px;
+    --radius-lg: 16px;
+    --radius-xl: 24px;
+    --radius-full: 9999px;
+    /* Shadows */
+    --shadow-sm: 0 2px 8px rgba(0, 0, 0, 0.3);
+    --shadow-md: 0 4px 16px rgba(0, 0, 0, 0.4);
+    --shadow-lg: 0 8px 32px rgba(0, 0, 0, 0.5);
+    --shadow-glow-blue: 0 0 40px rgba(59, 130, 246, 0.3);
+    --shadow-glow-violet: 0 0 40px rgba(139, 92, 246, 0.3);
+    /* Transitions */
+    --ease-out: cubic-bezier(0.16, 1, 0.3, 1);
+    --ease-bounce: cubic-bezier(0.34, 1.56, 0.64, 1);
+    --duration-fast: 150ms;
+    --duration-base: 250ms;
+    --duration-slow: 400ms;
+}
+/* Reset */
+*, *::before, *::after {
+    box-sizing: border-box;
+    margin: 0;
+    padding: 0;
+}
+html {
+    font-size: 16px;
+    -webkit-font-smoothing: antialiased;
+    -moz-osx-font-smoothing: grayscale;
+}
+body {
+    font-family: var(--font-body);
+    background: var(--color-void);
+    color: var(--color-text-primary);
+    min-height: 100vh;
+    overflow-x: hidden;
+    line-height: 1.6;
+}
+/* Screen reader only */
+.sr-only {
+    position: absolute;
+    width: 1px;
+    height: 1px;
+    padding: 0;
+    margin: -1px;
+    overflow: hidden;
+    clip: rect(0, 0, 0, 0);
+    white-space: nowrap;
+    border: 0;
+}
+/* ============================================
+   AMBIENT BACKGROUND
+   ============================================ */
+.ambient-layer {
+    position: fixed;
+    inset: 0;
+    pointer-events: none;
+    z-index: 0;
+    overflow: hidden;
+}
+.orb {
+    position: absolute;
+    border-radius: 50%;
+    filter: blur(80px);
+    opacity: 0.6;
+    animation: orb-float 20s ease-in-out infinite;
+}
+.orb--primary {
+    width: 500px;
+    height: 500px;
+    background: radial-gradient(circle, rgba(59, 130, 246, 0.35) 0%, transparent 70%);
+    top: -15%;
+    right: -10%;
+    animation-delay: 0s;
+}
+.orb--secondary {
+    width: 400px;
+    height: 400px;
+    background: radial-gradient(circle, rgba(139, 92, 246, 0.3) 0%, transparent 70%);
+    bottom: -10%;
+    left: -10%;
+    animation-delay: -7s;
+}
+.orb--tertiary {
+    width: 300px;
+    height: 300px;
+    background: radial-gradient(circle, rgba(34, 211, 238, 0.2) 0%, transparent 70%);
+    top: 40%;
+    left: 50%;
+    transform: translateX(-50%);
+    animation-delay: -14s;
+}
+@keyframes orb-float {
+    0%, 100% {
+        transform: translate(0, 0) scale(1);
+    }
+    25% {
+        transform: translate(30px, -40px) scale(1.05);
+    }
+    50% {
+        transform: translate(-20px, 20px) scale(0.95);
+    }
+    75% {
+        transform: translate(40px, 30px) scale(1.02);
+    }
+}
+.grid-overlay {
+    position: absolute;
+    inset: 0;
+    background-image:
+        linear-gradient(rgba(59, 130, 246, 0.03) 1px, transparent 1px),
+        linear-gradient(90deg, rgba(59, 130, 246, 0.03) 1px, transparent 1px);
+    background-size: 60px 60px;
+    mask-image: radial-gradient(ellipse at center, black 0%, transparent 70%);
+    -webkit-mask-image: radial-gradient(ellipse at center, black 0%, transparent 70%);
+}
+/* ============================================
+   APP SHELL
+   ============================================ */
+.app-shell {
+    position: relative;
+    z-index: 1;
+    max-width: 960px;
+    margin: 0 auto;
+    padding: var(--space-8) var(--space-6);
+    min-height: 100vh;
+    display: flex;
+    flex-direction: column;
+    animation: shell-enter 0.8s var(--ease-out) both;
+}
+@keyframes shell-enter {
+    from {
+        opacity: 0;
+        transform: translateY(30px);
+    }
+    to {
+        opacity: 1;
+        transform: translateY(0);
+    }
+}
+/* ============================================
+   HERO HEADER
+   ============================================ */
+.hero {
+    text-align: center;
+    margin-bottom: var(--space-10);
+}
+.hero__brand {
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    gap: var(--space-4);
+    margin-bottom: var(--space-4);
+    flex-wrap: wrap;
+}
+.logo {
+    display: flex;
+    align-items: center;
+    gap: var(--space-3);
+}
+.logo__icon {
+    width: 40px;
+    height: 40px;
+    filter: drop-shadow(0 0 12px rgba(139, 92, 246, 0.5));
+}
+.logo__text {
+    font-family: var(--font-display);
+    font-size: 2.5rem;
+    font-weight: 800;
+    background: var(--gradient-primary);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    background-clip: text;
+    letter-spacing: -0.03em;
+}
+.badge {
+    display: inline-flex;
+    align-items: center;
+    padding: var(--space-1) var(--space-3);
+    background: var(--color-surface);
+    border: 1px solid var(--color-glass-border);
+    border-radius: var(--radius-full);
+    font-family: var(--font-mono);
+    font-size: 0.75rem;
+    font-weight: 700;
+    color: var(--color-text-secondary);
+    letter-spacing: 0.05em;
+    text-transform: uppercase;
+}
+.hero__tagline {
+    font-size: 1.1rem;
+    color: var(--color-text-secondary);
+    max-width: 500px;
+    margin: 0 auto;
+}
+/* ============================================
+   DEVICE SELECTION
+   ============================================ */
+.device-section {
+    margin-bottom: var(--space-8);
+}
+.device-cards {
+    display: grid;
+    grid-template-columns: repeat(2, 1fr);
+    gap: var(--space-4);
+    margin-bottom: var(--space-4);
+}
+.device-card {
+    position: relative;
+    display: flex;
+    align-items: center;
+    gap: var(--space-4);
+    padding: var(--space-5);
+    background: var(--color-surface);
+    border: 2px solid transparent;
+    border-radius: var(--radius-lg);
+    cursor: pointer;
+    transition: all var(--duration-base) var(--ease-out);
+    text-align: left;
+    color: var(--color-text-primary);
+    font-family: inherit;
+}
+.device-card::before {
+    content: '';
+    position: absolute;
+    inset: -2px;
+    border-radius: inherit;
+    background: var(--gradient-primary);
+    opacity: 0;
+    z-index: -1;
+    transition: opacity var(--duration-base) var(--ease-out);
+}
+.device-card:hover {
+    background: var(--color-surface-elevated);
+    transform: translateY(-2px);
+}
+.device-card:focus-visible {
+    outline: 2px solid var(--color-blue);
+    outline-offset: 2px;
+}
+.device-card[aria-checked="true"] {
+    border-color: transparent;
+    background: rgba(59, 130, 246, 0.1);
+}
+.device-card[aria-checked="true"]::before {
+    opacity: 1;
+}
+.device-card[aria-checked="true"] .device-card__check {
+    opacity: 1;
+    transform: scale(1);
+}
+.device-card[aria-disabled="true"] {
+    opacity: 0.4;
+    cursor: not-allowed;
+}
+.device-card[aria-disabled="true"]:hover {
+    transform: none;
+}
+.device-card__icon {
+    width: 48px;
+    height: 48px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    border-radius: var(--radius-md);
+    background: var(--color-surface-elevated);
+    color: var(--color-text-secondary);
+    flex-shrink: 0;
+}
+.device-card__icon svg {
+    width: 28px;
+    height: 28px;
+}
+.device-card[aria-checked="true"] .device-card__icon {
+    background: var(--gradient-primary);
+    color: white;
+}
+.device-card__content {
+    flex: 1;
+    display: flex;
+    flex-direction: column;
+    gap: var(--space-1);
+}
+.device-card__name {
+    font-family: var(--font-display);
+    font-size: 1.1rem;
+    font-weight: 600;
+}
+.device-card__sub {
+    font-size: 0.85rem;
+    color: var(--color-text-secondary);
+}
+.device-card__speed {
+    display: inline-flex;
+    align-items: center;
+    padding: 2px 8px;
+    background: var(--gradient-primary);
+    border-radius: var(--radius-full);
+    font-size: 0.7rem;
+    font-weight: 600;
+    color: white;
+    width: fit-content;
+    margin-top: var(--space-1);
+}
+.device-card__check {
+    width: 24px;
+    height: 24px;
+    color: var(--color-success);
+    opacity: 0;
+    transform: scale(0.8);
+    transition: all var(--duration-base) var(--ease-bounce);
+}
+/* GPU Banner */
+.gpu-banner {
+    display: flex;
+    align-items: center;
+    gap: var(--space-3);
+    padding: var(--space-3) var(--space-4);
+    background: var(--color-surface);
+    border: 1px solid var(--color-glass-border);
+    border-radius: var(--radius-md);
+    font-size: 0.9rem;
+}
+.gpu-banner--available {
+    border-color: rgba(16, 185, 129, 0.3);
+    background: rgba(16, 185, 129, 0.08);
+}
+.gpu-banner--unavailable {
+    border-color: rgba(245, 158, 11, 0.3);
+    background: rgba(245, 158, 11, 0.08);
+}
+.gpu-banner__icon {
+    width: 24px;
+    height: 24px;
+    flex-shrink: 0;
+}
+.gpu-banner--available .gpu-banner__icon {
+    color: var(--color-success);
+}
+.gpu-banner--unavailable .gpu-banner__icon {
+    color: var(--color-warning);
+}
+.gpu-banner__content {
+    display: flex;
+    flex-direction: column;
+    gap: 2px;
+}
+.gpu-banner__content strong {
+    color: var(--color-text-primary);
+}
+.gpu-banner__content span {
+    color: var(--color-text-secondary);
+    font-size: 0.85rem;
+}
+/* ============================================
+   INPUT SECTION
+   ============================================ */
+.input-section {
+    margin-bottom: var(--space-8);
+}
+.textarea-wrap {
+    position: relative;
+    margin-bottom: var(--space-4);
+}
+textarea {
+    width: 100%;
+    min-height: 120px;
+    padding: var(--space-4);
+    padding-bottom: var(--space-8);
+    background: var(--color-surface);
+    border: 1px solid var(--color-glass-border);
+    border-radius: var(--radius-lg);
+    color: var(--color-text-primary);
+    font-family: var(--font-body);
+    font-size: 1rem;
+    line-height: 1.6;
+    resize: vertical;
+    transition: all var(--duration-base) var(--ease-out);
+}
+textarea::placeholder {
+    color: var(--color-text-muted);
+}
+textarea:focus {
+    outline: none;
+    border-color: var(--color-blue);
+    box-shadow: 0 0 0 3px rgba(59, 130, 246, 0.15);
+}
+.textarea-meta {
+    position: absolute;
+    bottom: var(--space-3);
+    right: var(--space-4);
+    display: flex;
+    align-items: center;
+    gap: var(--space-4);
+}
+.char-count {
+    font-family: var(--font-mono);
+    font-size: 0.75rem;
+    color: var(--color-text-muted);
+}
+/* Sample Texts */
+.sample-texts {
+    display: flex;
+    align-items: center;
+    flex-wrap: wrap;
+    gap: var(--space-2);
+    margin-bottom: var(--space-4);
+}
+.sample-texts__label {
+    font-size: 0.85rem;
+    color: var(--color-text-muted);
+}
+.sample-btn {
+    padding: var(--space-1) var(--space-3);
+    background: var(--color-surface);
+    border: 1px solid var(--color-glass-border);
+    border-radius: var(--radius-full);
+    color: var(--color-text-secondary);
+    font-family: var(--font-body);
+    font-size: 0.8rem;
+    cursor: pointer;
+    transition: all var(--duration-fast) var(--ease-out);
+}
+.sample-btn:hover {
+    background: var(--color-surface-elevated);
+    color: var(--color-text-primary);
+    border-color: rgba(255, 255, 255, 0.15);
+}
+.sample-btn:active {
+    transform: scale(0.97);
+}
+/* Controls */
+.controls {
+    display: flex;
+    gap: var(--space-3);
+}
+.btn {
+    display: inline-flex;
+    align-items: center;
+    justify-content: center;
+    gap: var(--space-2);
+    padding: var(--space-4) var(--space-6);
+    border-radius: var(--radius-md);
+    font-family: var(--font-body);
+    font-size: 1rem;
+    font-weight: 600;
+    cursor: pointer;
+    border: none;
+    transition: all var(--duration-base) var(--ease-out);
+}
+.btn__icon {
+    width: 18px;
+    height: 18px;
+}
+.btn--primary {
+    flex: 1;
+    position: relative;
+    background: var(--gradient-primary);
+    color: white;
+    overflow: hidden;
+}
+.btn--primary::before {
+    content: '';
+    position: absolute;
+    inset: 0;
+    background: linear-gradient(135deg, rgba(255,255,255,0.2) 0%, transparent 50%);
+    opacity: 0;
+    transition: opacity var(--duration-fast);
+}
+.btn--primary:hover:not(:disabled)::before {
+    opacity: 1;
+}
+.btn--primary:hover:not(:disabled) {
+    transform: translateY(-2px);
+    box-shadow: var(--shadow-glow-blue);
+}
+.btn--primary:active:not(:disabled) {
+    transform: translateY(0);
+}
+.btn--primary.btn--generating {
+    animation: pulse-glow 2s ease-in-out infinite;
+}
+@keyframes pulse-glow {
+    0%, 100% {
+        box-shadow: 0 0 0 0 rgba(59, 130, 246, 0.4);
+    }
+    50% {
+        box-shadow: 0 0 0 15px rgba(59, 130, 246, 0);
+    }
+}
+.btn--secondary {
+    background: var(--color-surface);
+    color: var(--color-text-primary);
+    border: 1px solid var(--color-glass-border);
+}
+.btn--secondary:hover:not(:disabled) {
+    background: var(--color-surface-elevated);
+    border-color: rgba(255, 255, 255, 0.15);
+}
+.btn:disabled {
+    opacity: 0.4;
+    cursor: not-allowed;
+}
+.btn__loader {
+    width: 18px;
+    height: 18px;
+    border: 2px solid rgba(255, 255, 255, 0.3);
+    border-top-color: white;
+    border-radius: 50%;
+    animation: spin 0.8s linear infinite;
+    display: none;
+}
+@keyframes spin {
+    to { transform: rotate(360deg); }
+}
+/* ============================================
+   OUTPUT SECTION
+   ============================================ */
+.output-section {
+    display: grid;
+    grid-template-columns: 1fr 280px;
+    gap: var(--space-6);
+    margin-bottom: var(--space-8);
+}
+/* Visualizer Panel */
+.visualizer-panel {
+    background: var(--color-surface);
+    border: 1px solid var(--color-glass-border);
+    border-radius: var(--radius-xl);
+    overflow: hidden;
+}
+.visualizer-panel__header {
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+    padding: var(--space-4) var(--space-5);
+    border-bottom: 1px solid var(--color-glass-border);
+}
+.visualizer-panel__title {
+    font-family: var(--font-display);
+    font-size: 0.9rem;
+    font-weight: 600;
+    color: var(--color-text-secondary);
+    text-transform: uppercase;
+    letter-spacing: 0.05em;
+}
+.status-indicator {
+    display: flex;
+    align-items: center;
+    gap: var(--space-2);
+}
+.status-dot {
+    width: 8px;
+    height: 8px;
+    border-radius: 50%;
+    background: var(--color-text-muted);
+}
+.status-indicator--idle .status-dot {
+    background: var(--color-text-muted);
+}
+.status-indicator--running .status-dot {
+    background: var(--color-success);
+    animation: dot-pulse 1.5s ease-in-out infinite;
+}
+.status-indicator--error .status-dot {
+    background: var(--color-error);
+}
+@keyframes dot-pulse {
+    0%, 100% { opacity: 1; }
+    50% { opacity: 0.4; }
+}
+.status-text {
+    font-size: 0.85rem;
+    color: var(--color-text-secondary);
+}
+.visualizer-container {
+    position: relative;
+    height: 160px;
+    background: linear-gradient(180deg, transparent 0%, rgba(59, 130, 246, 0.03) 100%);
+}
+.visualizer-container canvas {
+    position: absolute;
+    inset: 0;
+    width: 100%;
+    height: 100%;
+}
+.visualizer-bars {
+    opacity: 0.7;
+    mix-blend-mode: screen;
+}
+/* Metrics Panel */
+.metrics-panel {
+    display: flex;
+    flex-direction: column;
+    gap: var(--space-4);
+}
+.metrics-panel__title {
+    font-family: var(--font-display);
+    font-size: 0.8rem;
+    font-weight: 600;
+    color: var(--color-text-muted);
+    text-transform: uppercase;
+    letter-spacing: 0.08em;
+}
+.metric {
+    padding: var(--space-4);
+    background: var(--color-surface);
+    border: 1px solid var(--color-glass-border);
+    border-radius: var(--radius-md);
+}
+.metric--highlight {
+    background: linear-gradient(135deg, rgba(59, 130, 246, 0.08) 0%, rgba(139, 92, 246, 0.08) 100%);
+    border-color: rgba(139, 92, 246, 0.2);
+}
+.metric__header {
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+    margin-bottom: var(--space-2);
+}
+.metric__label {
+    font-size: 0.8rem;
+    color: var(--color-text-secondary);
+}
+.metric__info {
+    width: 16px;
+    height: 16px;
+    padding: 0;
+    background: none;
+    border: none;
+    color: var(--color-text-muted);
+    cursor: help;
+    position: relative;
+}
+.metric__info:hover {
+    color: var(--color-text-secondary);
+}
+.metric__info::after {
+    content: attr(data-tooltip);
+    position: absolute;
+    bottom: calc(100% + 8px);
+    right: 0;
+    width: 200px;
+    padding: var(--space-2) var(--space-3);
+    background: var(--color-surface-elevated);
+    border: 1px solid var(--color-glass-border);
+    border-radius: var(--radius-sm);
+    font-size: 0.75rem;
+    color: var(--color-text-secondary);
+    text-align: left;
+    line-height: 1.4;
+    opacity: 0;
+    visibility: hidden;
+    transform: translateY(4px);
+    transition: all var(--duration-fast) var(--ease-out);
+    z-index: 10;
+    pointer-events: none;
+}
+.metric__info:hover::after {
+    opacity: 1;
+    visibility: visible;
+    transform: translateY(0);
+}
+.metric__value {
+    display: flex;
+    align-items: baseline;
+    gap: var(--space-1);
+}
+.metric__number {
+    font-family: var(--font-mono);
+    font-size: 1.75rem;
+    font-weight: 700;
+    background: var(--gradient-primary);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    background-clip: text;
+}
+.metric__number--large {
+    font-size: 2.25rem;
+}
+.metric__unit {
+    font-family: var(--font-mono);
+    font-size: 0.9rem;
+    color: var(--color-text-muted);
+}
+.metric__bar {
+    height: 4px;
+    background: rgba(255, 255, 255, 0.08);
+    border-radius: var(--radius-full);
+    margin-top: var(--space-3);
+    overflow: hidden;
+}
+.metric__bar-fill {
+    height: 100%;
+    background: var(--gradient-primary);
+    border-radius: var(--radius-full);
+    transition: width var(--duration-slow) var(--ease-out);
+    width: 0%;
+}
+.metric__context {
+    font-size: 0.75rem;
+    color: var(--color-text-muted);
+    margin-top: var(--space-2);
+}
+.metric--status {
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+}
+.model-status {
+    display: flex;
+    align-items: center;
+    gap: var(--space-2);
+}
+.model-status__dot {
+    width: 8px;
+    height: 8px;
+    border-radius: 50%;
+    background: var(--color-text-muted);
+}
+.model-status--loading .model-status__dot {
+    background: var(--color-warning);
+    animation: dot-pulse 1s ease-in-out infinite;
+}
+.model-status--ready .model-status__dot {
+    background: var(--color-success);
+}
+.model-status__text {
+    font-size: 0.85rem;
+    color: var(--color-text-secondary);
+}
+/* ============================================
+   FOOTER
+   ============================================ */
+.footer {
+    margin-top: auto;
+    padding-top: var(--space-8);
+    text-align: center;
+}
+.footer p {
+    font-size: 0.8rem;
+    color: var(--color-text-muted);
+}
+/* ============================================
+   RESPONSIVE
+   ============================================ */
+@media (max-width: 768px) {
+    .app-shell {
+        padding: var(--space-6) var(--space-4);
+    }
+    .logo__text {
+        font-size: 2rem;
+    }
+    .device-cards {
+        grid-template-columns: 1fr;
+    }
+    .output-section {
+        grid-template-columns: 1fr;
+    }
+    .metrics-panel {
+        display: grid;
+        grid-template-columns: repeat(2, 1fr);
+    }
+    .metric:last-child {
+        grid-column: span 2;
+    }
+}
+@media (max-width: 480px) {
+    .hero__brand {
+        flex-direction: column;
+        gap: var(--space-3);
+    }
+    .controls {
+        flex-direction: column;
+    }
+    .btn--primary {
+        width: 100%;
+    }
+    .sample-texts {
+        flex-direction: column;
+        align-items: flex-start;
+    }
+    .sample-btn {
+        width: 100%;
+        text-align: center;
+    }
+    .metrics-panel {
+        grid-template-columns: 1fr;
+    }
+    .metric:last-child {
+        grid-column: span 1;
+    }
+    .metric__number {
+        font-size: 1.5rem;
+    }
+    .metric__number--large {
+        font-size: 1.75rem;
+    }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff