KevinAHM commited on
Commit
9b19787
·
0 Parent(s):

Soprano 1.1

Browse files
.gitattributes ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *.onnx filter=lfs diff=lfs merge=lfs -text
2
+ *.onnx.data filter=lfs diff=lfs merge=lfs -text
EventEmitter.js ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export class EventEmitter {
2
+ constructor() {
3
+ this.events = {};
4
+ }
5
+
6
+ addEventListener(event, listener, options = {}) {
7
+ if (!this.events[event]) {
8
+ this.events[event] = [];
9
+ }
10
+
11
+ const wrappedListener = {
12
+ callback: listener,
13
+ once: options.once || false
14
+ };
15
+
16
+ this.events[event].push(wrappedListener);
17
+ }
18
+
19
+ removeEventListener(event, listener) {
20
+ if (!this.events[event]) return;
21
+
22
+ this.events[event] = this.events[event].filter(
23
+ wrappedListener => wrappedListener.callback !== listener
24
+ );
25
+ }
26
+
27
+ dispatchEvent(event) {
28
+ const eventName = event.type;
29
+ if (!this.events[eventName]) return;
30
+
31
+ this.events[eventName] = this.events[eventName].filter(wrappedListener => {
32
+ wrappedListener.callback.call(this, event);
33
+ return !wrappedListener.once;
34
+ });
35
+ }
36
+
37
+ emit(eventName, data) {
38
+ const event = new CustomEvent(eventName, { detail: data });
39
+ this.dispatchEvent(event);
40
+ }
41
+ }
42
+
43
+ export class CustomEvent {
44
+ constructor(type, options = {}) {
45
+ this.type = type;
46
+ this.detail = options.detail;
47
+ this.target = null;
48
+ this.currentTarget = null;
49
+ this.defaultPrevented = false;
50
+ this.bubbles = options.bubbles || false;
51
+ this.cancelable = options.cancelable || false;
52
+ }
53
+
54
+ preventDefault() {
55
+ if (this.cancelable) {
56
+ this.defaultPrevented = true;
57
+ }
58
+ }
59
+ }
JitterBuffer.js ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { EventEmitter, CustomEvent } from './EventEmitter.js';
2
+
3
+ export class JitterBuffer extends EventEmitter {
4
+ constructor(maxByteLength, sampleRate = 24000, minBufferBeforePlayback = 0) {
5
+ super();
6
+ this.maxByteLength = maxByteLength;
7
+ this.sampleRate = sampleRate;
8
+ this.minBufferBeforePlayback = minBufferBeforePlayback;
9
+ this.buffer = [];
10
+ this.bytesPerSample = 2; // PCM16 = 2 bytes per sample
11
+ this.hasStartedPlayback = false;
12
+ }
13
+
14
+ get byteLength() {
15
+ return this.buffer.reduce((sum, chunk) => sum + chunk.byteLength, 0);
16
+ }
17
+
18
+ get durationMs() {
19
+ const totalSamples = this.byteLength / this.bytesPerSample;
20
+ return (totalSamples / this.sampleRate) * 1000;
21
+ }
22
+
23
+ enqueue(data) {
24
+ if (!(data instanceof Int16Array)) {
25
+ throw new Error('JitterBuffer expects Int16Array data');
26
+ }
27
+
28
+ this.buffer.push(data);
29
+
30
+ const currentByteLength = this.byteLength;
31
+
32
+ // Check if we should start playback
33
+ if (!this.hasStartedPlayback && currentByteLength >= this.minBufferBeforePlayback) {
34
+ this.hasStartedPlayback = true;
35
+ this.flush();
36
+ }
37
+ // If we've started playback, flush when buffer is full
38
+ else if (this.hasStartedPlayback && currentByteLength >= this.maxByteLength) {
39
+ this.flush();
40
+ }
41
+ }
42
+
43
+ flush() {
44
+ if (this.buffer.length === 0) return;
45
+
46
+
47
+ const event = new CustomEvent('flush', { detail: this.buffer });
48
+ this.dispatchEvent(event);
49
+ this.buffer = [];
50
+ }
51
+
52
+ clear() {
53
+ this.buffer = [];
54
+ this.hasStartedPlayback = false;
55
+ }
56
+
57
+ forceFlush() {
58
+ // Force flush remaining buffer when stream ends
59
+ if (this.buffer.length > 0) {
60
+ console.log(`Force flushing ${this.byteLength} bytes at stream end`);
61
+ this.flush();
62
+ }
63
+ }
64
+
65
+ getBufferStatus() {
66
+ return {
67
+ chunks: this.buffer.length,
68
+ byteLength: this.byteLength,
69
+ durationMs: this.durationMs,
70
+ fillPercentage: (this.byteLength / this.maxByteLength) * 100
71
+ };
72
+ }
73
+ }
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
PCMPlayer.js ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { EventEmitter, CustomEvent } from './EventEmitter.js';
2
+
3
+ export class PCMPlayer extends EventEmitter {
4
+ constructor(audioContext) {
5
+ super();
6
+ this.audioContext = audioContext;
7
+ this.playbackTime = 0;
8
+ this.gainNode = this.audioContext.createGain();
9
+ this.gainNode.connect(this.audioContext.destination);
10
+ this.analyser = this.audioContext.createAnalyser();
11
+ this.gainNode.connect(this.analyser);
12
+
13
+ // Default settings
14
+ this.fadeInDuration = 0.01; // 10ms fade in to avoid clicks
15
+ this.fadeOutDuration = 0.01; // 10ms fade out
16
+ }
17
+
18
+ playAudio(data) {
19
+ if (this.audioContext.state !== 'running') {
20
+ console.warn(`Audio context is in ${this.audioContext.state} state`);
21
+ return;
22
+ }
23
+
24
+ const float32Array = data instanceof Int16Array
25
+ ? this.pcm16ToFloat32(data)
26
+ : data;
27
+
28
+ const audioBuffer = this.audioContext.createBuffer(
29
+ 1, // mono
30
+ float32Array.length,
31
+ this.audioContext.sampleRate
32
+ );
33
+
34
+ audioBuffer.copyToChannel(float32Array, 0);
35
+
36
+ const source = this.audioContext.createBufferSource();
37
+ source.buffer = audioBuffer;
38
+
39
+ // Create a gain node for this source to handle fading
40
+ const sourceGain = this.audioContext.createGain();
41
+ source.connect(sourceGain);
42
+ sourceGain.connect(this.gainNode);
43
+
44
+ const currentTime = this.audioContext.currentTime;
45
+ if (this.playbackTime < currentTime) {
46
+ this.playbackTime = currentTime;
47
+ }
48
+
49
+ // Apply fade in
50
+ sourceGain.gain.setValueAtTime(0, this.playbackTime);
51
+ sourceGain.gain.linearRampToValueAtTime(1, this.playbackTime + this.fadeInDuration);
52
+
53
+ // Apply fade out
54
+ const duration = audioBuffer.duration;
55
+ const fadeOutTime = this.playbackTime + duration - this.fadeOutDuration;
56
+ sourceGain.gain.setValueAtTime(1, fadeOutTime);
57
+ sourceGain.gain.linearRampToValueAtTime(0, this.playbackTime + duration);
58
+
59
+ source.start(this.playbackTime);
60
+ this.playbackTime += audioBuffer.duration;
61
+
62
+ // Emit audio started event
63
+ this.emit('audioStarted', {
64
+ startTime: this.playbackTime,
65
+ duration: audioBuffer.duration,
66
+ samples: float32Array.length
67
+ });
68
+
69
+ // Clean up after playback
70
+ source.onended = () => {
71
+ source.disconnect();
72
+ sourceGain.disconnect();
73
+ this.emit('audioEnded', {
74
+ endTime: this.playbackTime
75
+ });
76
+ };
77
+ }
78
+
79
+ pcm16ToFloat32(pcm16) {
80
+ const float32 = new Float32Array(pcm16.length);
81
+ for (let i = 0; i < pcm16.length; i++) {
82
+ float32[i] = pcm16[i] / 32768; // Convert PCM16 to Float32 (-1 to 1 range)
83
+ }
84
+ return float32;
85
+ }
86
+
87
+ get volume() {
88
+ return this.gainNode.gain.value;
89
+ }
90
+
91
+ set volume(value) {
92
+ // Clamp between 0 and 1
93
+ const clampedValue = Math.max(0, Math.min(1, value));
94
+ this.gainNode.gain.value = clampedValue;
95
+ this.emit('volumeChange', { volume: clampedValue });
96
+ }
97
+
98
+ get volumePercentage() {
99
+ return this.volume * 100;
100
+ }
101
+
102
+ set volumePercentage(percentage) {
103
+ this.volume = percentage / 100;
104
+ }
105
+
106
+ getAnalyserData() {
107
+ const bufferLength = this.analyser.frequencyBinCount;
108
+ const dataArray = new Uint8Array(bufferLength);
109
+ this.analyser.getByteFrequencyData(dataArray);
110
+ return dataArray;
111
+ }
112
+
113
+ getTimeDomainData() {
114
+ const bufferLength = this.analyser.frequencyBinCount;
115
+ const dataArray = new Uint8Array(bufferLength);
116
+ this.analyser.getByteTimeDomainData(dataArray);
117
+ return dataArray;
118
+ }
119
+
120
+ reset() {
121
+ this.playbackTime = 0;
122
+ // Stop all currently playing sources
123
+ this.stopAllSources();
124
+ }
125
+
126
+ stopAllSources() {
127
+ // Cancel all scheduled audio
128
+ if (this.audioContext) {
129
+ // Create a new gain node to effectively "disconnect" all audio
130
+ const oldGainNode = this.gainNode;
131
+ this.gainNode = this.audioContext.createGain();
132
+ this.gainNode.connect(this.audioContext.destination);
133
+ this.analyser = this.audioContext.createAnalyser();
134
+ this.gainNode.connect(this.analyser);
135
+
136
+ // Fade out old audio quickly
137
+ if (oldGainNode) {
138
+ const now = this.audioContext.currentTime;
139
+ oldGainNode.gain.setValueAtTime(oldGainNode.gain.value, now);
140
+ oldGainNode.gain.linearRampToValueAtTime(0, now + 0.05);
141
+ setTimeout(() => {
142
+ oldGainNode.disconnect();
143
+ }, 100);
144
+ }
145
+ }
146
+ }
147
+
148
+ async resume() {
149
+ if (this.audioContext.state === 'suspended') {
150
+ await this.audioContext.resume();
151
+ }
152
+ }
153
+
154
+ getPlaybackStatus() {
155
+ return {
156
+ currentTime: this.audioContext.currentTime,
157
+ scheduledTime: this.playbackTime,
158
+ bufferedDuration: Math.max(0, this.playbackTime - this.audioContext.currentTime),
159
+ state: this.audioContext.state
160
+ };
161
+ }
162
+ }
PCMPlayerWorklet.js ADDED
@@ -0,0 +1,563 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { EventEmitter, CustomEvent } from './EventEmitter.js';
2
+
3
+ /**
4
+ * PCMPlayerWorklet - Drop-in replacement for PCMPlayer using AudioWorklet
5
+ * Uses dynamic buffer management with backpressure for smooth playback
6
+ */
7
+ export class PCMPlayerWorklet extends EventEmitter {
8
+ constructor(audioContext, options = {}) {
9
+ super();
10
+ this.audioContext = audioContext;
11
+ this.options = options;
12
+ this.workletNode = null;
13
+ this.isInitialized = false;
14
+ this.playbackTime = 0; // For API compatibility
15
+
16
+ // Audio nodes
17
+ this.gainNode = this.audioContext.createGain();
18
+ this.gainNode.connect(this.audioContext.destination);
19
+ this.analyser = this.audioContext.createAnalyser();
20
+ this.gainNode.connect(this.analyser);
21
+
22
+ // Queue for chunks waiting to be sent
23
+ this.pendingChunks = [];
24
+ this.availableCapacity = 0;
25
+ this.isWorkletReady = false;
26
+ this.hasReceivedInitialCapacity = false;
27
+
28
+ // Metrics
29
+ this.metrics = {
30
+ chunksPlayed: 0,
31
+ underruns: 0,
32
+ bufferLevel: 0,
33
+ samplesPlayed: 0
34
+ };
35
+
36
+ // Initialize worklet
37
+ this.initPromise = this.initialize();
38
+ }
39
+
40
+ async initialize() {
41
+ if (this.isInitialized) return;
42
+
43
+ try {
44
+ // Calculate buffer parameters
45
+ const sampleRate = this.audioContext.sampleRate;
46
+ const minBufferMs = this.options.minBufferBeforePlaybackMs || 300;
47
+ const minBufferSamples = Math.floor(minBufferMs * sampleRate / 1000);
48
+
49
+ // Buffer size: enough for smooth playback but not excessive
50
+ // Target 60 seconds of buffer to prevent any overflow issues
51
+ const bufferSizeSamples = sampleRate * 60;
52
+
53
+ // Create the worklet processor code
54
+ const processorCode = `
55
+ class PCMProcessor extends AudioWorkletProcessor {
56
+ constructor() {
57
+ super();
58
+
59
+ // Ring buffer - sized appropriately
60
+ this.bufferSize = ${bufferSizeSamples};
61
+ this.ringBuffer = new Float32Array(this.bufferSize);
62
+ this.readPos = 0;
63
+ this.writePos = 0;
64
+ this.isPlaying = false;
65
+
66
+ // Configuration
67
+ this.minBufferSamples = ${minBufferSamples};
68
+ this.targetBufferSamples = ${minBufferSamples * 2}; // Target 2x min for stability
69
+
70
+ // State
71
+ this.streamEnded = false;
72
+ this.playbackCompleteReported = false;
73
+
74
+ // Stats reporting
75
+ this.frameCount = 0;
76
+ this.reportInterval = 256; // Report every ~5ms at 48kHz
77
+
78
+ this.port.onmessage = (e) => {
79
+ switch(e.data.type) {
80
+ case 'audio':
81
+ this.addAudio(e.data.data);
82
+ break;
83
+ case 'reset':
84
+ this.reset();
85
+ break;
86
+ case 'stream-ended':
87
+ this.streamEnded = true;
88
+ break;
89
+ }
90
+ };
91
+
92
+ // Send initial capacity
93
+ this.sendCapacityUpdate();
94
+ }
95
+
96
+ addAudio(float32Data) {
97
+ const samples = float32Data.length;
98
+ const available = this.getAvailableSpace();
99
+ const bufferedBefore = this.getBufferedSamples();
100
+
101
+
102
+ if (samples > available) {
103
+ // This shouldn't happen with proper backpressure
104
+ console.error('Buffer overflow - bug in backpressure. Samples:', samples, 'Available:', available, 'Buffered:', this.getBufferedSamples());
105
+ // Drop oldest data to recover
106
+ const overflow = samples - available;
107
+ this.readPos = (this.readPos + overflow) % this.bufferSize;
108
+ }
109
+
110
+ // Write to ring buffer
111
+ if (this.writePos + samples <= this.bufferSize) {
112
+ this.ringBuffer.set(float32Data, this.writePos);
113
+ this.writePos += samples;
114
+ if (this.writePos >= this.bufferSize) {
115
+ this.writePos = 0;
116
+ }
117
+ } else {
118
+ const firstPart = this.bufferSize - this.writePos;
119
+ const secondPart = samples - firstPart;
120
+ this.ringBuffer.set(float32Data.slice(0, firstPart), this.writePos);
121
+ this.ringBuffer.set(float32Data.slice(firstPart), 0);
122
+ this.writePos = secondPart;
123
+ }
124
+
125
+ // Auto-start when we have enough buffered
126
+ const buffered = this.getBufferedSamples();
127
+
128
+ if (!this.isPlaying && buffered >= this.minBufferSamples) {
129
+ const now = currentTime;
130
+ this.isPlaying = true;
131
+ // Notify that playback has started
132
+ this.port.postMessage({
133
+ type: 'playback-started',
134
+ buffered: buffered,
135
+ audioTime: now
136
+ });
137
+ }
138
+
139
+ // Report capacity after adding
140
+ this.sendCapacityUpdate();
141
+ }
142
+
143
+ getAvailableSpace() {
144
+ const used = this.getBufferedSamples();
145
+ return this.bufferSize - used - 128; // Leave small safety margin
146
+ }
147
+
148
+ getBufferedSamples() {
149
+ if (this.writePos >= this.readPos) {
150
+ return this.writePos - this.readPos;
151
+ } else {
152
+ return this.bufferSize - this.readPos + this.writePos;
153
+ }
154
+ }
155
+
156
+ sendCapacityUpdate() {
157
+ const buffered = this.getBufferedSamples();
158
+ const capacity = this.getAvailableSpace();
159
+
160
+ // Calculate how much we want to receive
161
+ // If buffer is low, request more; if it's full, request nothing
162
+ let requestSamples = 0;
163
+ if (buffered < this.targetBufferSamples) {
164
+ requestSamples = Math.min(capacity, this.targetBufferSamples - buffered);
165
+ }
166
+
167
+ this.port.postMessage({
168
+ type: 'capacity',
169
+ buffered: buffered,
170
+ capacity: capacity,
171
+ requestSamples: requestSamples,
172
+ isPlaying: this.isPlaying
173
+ });
174
+ }
175
+
176
+ process(inputs, outputs, parameters) {
177
+ const output = outputs[0];
178
+ if (!output || !output[0]) return true;
179
+
180
+ const outputChannel = output[0];
181
+ const numSamples = outputChannel.length;
182
+
183
+ // Report stats periodically
184
+ if (++this.frameCount % this.reportInterval === 0) {
185
+ this.sendCapacityUpdate();
186
+ }
187
+
188
+ if (!this.isPlaying) {
189
+ outputChannel.fill(0);
190
+ return true;
191
+ }
192
+
193
+ const buffered = this.getBufferedSamples();
194
+
195
+ if (buffered < numSamples) {
196
+ // Underrun - play what we have and fill rest with silence
197
+ let samplesRead = 0;
198
+
199
+ if (buffered > 0) {
200
+ // Play whatever samples we DO have
201
+ if (this.readPos + buffered <= this.bufferSize) {
202
+ for (let i = 0; i < buffered; i++) {
203
+ outputChannel[i] = this.ringBuffer[this.readPos + i];
204
+ }
205
+ this.readPos += buffered;
206
+ if (this.readPos >= this.bufferSize) {
207
+ this.readPos = 0;
208
+ }
209
+ } else {
210
+ // Wrap-around case
211
+ const firstPart = this.bufferSize - this.readPos;
212
+ const secondPart = buffered - firstPart;
213
+
214
+ for (let i = 0; i < firstPart; i++) {
215
+ outputChannel[i] = this.ringBuffer[this.readPos + i];
216
+ }
217
+ for (let i = 0; i < secondPart; i++) {
218
+ outputChannel[firstPart + i] = this.ringBuffer[i];
219
+ }
220
+
221
+ this.readPos = secondPart;
222
+ }
223
+ samplesRead = buffered;
224
+ }
225
+
226
+ // Fill remaining with silence
227
+ for (let i = samplesRead; i < numSamples; i++) {
228
+ outputChannel[i] = 0;
229
+ }
230
+
231
+ // Check for playback complete
232
+ if (this.streamEnded && buffered === 0) {
233
+ if (!this.playbackCompleteReported) {
234
+ this.port.postMessage({
235
+ type: 'playback-complete'
236
+ });
237
+ this.playbackCompleteReported = true;
238
+ }
239
+ this.isPlaying = false;
240
+ this.streamEnded = false;
241
+ } else {
242
+ // Request more data urgently
243
+ this.port.postMessage({
244
+ type: 'underrun',
245
+ buffered: buffered,
246
+ needed: numSamples
247
+ });
248
+ this.sendCapacityUpdate();
249
+ }
250
+ } else {
251
+ // Normal playback - read from ring buffer
252
+ if (this.readPos + numSamples <= this.bufferSize) {
253
+ for (let i = 0; i < numSamples; i++) {
254
+ outputChannel[i] = this.ringBuffer[this.readPos + i];
255
+ }
256
+ this.readPos += numSamples;
257
+ if (this.readPos >= this.bufferSize) {
258
+ this.readPos = 0;
259
+ }
260
+ } else {
261
+ // Wrap-around case
262
+ const firstPart = this.bufferSize - this.readPos;
263
+ const secondPart = numSamples - firstPart;
264
+
265
+ for (let i = 0; i < firstPart; i++) {
266
+ outputChannel[i] = this.ringBuffer[this.readPos + i];
267
+ }
268
+ for (let i = 0; i < secondPart; i++) {
269
+ outputChannel[firstPart + i] = this.ringBuffer[i];
270
+ }
271
+
272
+ this.readPos = secondPart;
273
+ }
274
+ }
275
+
276
+ return true;
277
+ }
278
+
279
+ reset() {
280
+ this.readPos = 0;
281
+ this.writePos = 0;
282
+ this.ringBuffer.fill(0);
283
+ this.isPlaying = false;
284
+ this.streamEnded = false;
285
+ this.playbackCompleteReported = false;
286
+ this.sendCapacityUpdate();
287
+ }
288
+ }
289
+
290
+ registerProcessor('pcm-processor', PCMProcessor);
291
+ `;
292
+
293
+ // Create and load worklet
294
+ const blob = new Blob([processorCode], { type: 'application/javascript' });
295
+ const workletUrl = URL.createObjectURL(blob);
296
+
297
+ await this.audioContext.audioWorklet.addModule(workletUrl);
298
+ URL.revokeObjectURL(workletUrl);
299
+
300
+ // Create worklet node
301
+ this.workletNode = new AudioWorkletNode(this.audioContext, 'pcm-processor');
302
+ this.workletNode.connect(this.gainNode);
303
+
304
+ // Handle messages from worklet
305
+ this.workletNode.port.onmessage = (e) => {
306
+ switch (e.data.type) {
307
+ case 'capacity':
308
+ this.handleCapacityUpdate(e.data);
309
+ break;
310
+
311
+ case 'underrun':
312
+ this.metrics.underruns++;
313
+ console.warn(`[MAIN THREAD] ⚠️ UNDERRUN #${this.metrics.underruns} detected! buffered=${e.data.buffered} samples, needed=${e.data.needed} samples`);
314
+ // Try to send more data immediately
315
+ this.processPendingChunks();
316
+ break;
317
+
318
+ case 'playback-started':
319
+ console.log(`[MAIN THREAD] Received playback-started at performance.now=${performance.now().toFixed(2)}ms, audioContext.currentTime=${this.audioContext.currentTime.toFixed(3)}s, worklet reported audioTime=${e.data.audioTime}s`);
320
+ this.emit('firstPlayback', {
321
+ startTime: this.audioContext.currentTime,
322
+ bufferedSamples: e.data.buffered
323
+ });
324
+ break;
325
+
326
+ case 'playback-complete':
327
+ this.emit('audioEnded', {
328
+ endTime: this.audioContext.currentTime
329
+ });
330
+ break;
331
+ }
332
+ };
333
+
334
+ this.isInitialized = true;
335
+ this.isWorkletReady = true;
336
+ } catch (error) {
337
+ console.error('Failed to initialize PCMPlayerWorklet:', error);
338
+ throw error;
339
+ }
340
+ }
341
+
342
+ handleCapacityUpdate(data) {
343
+ this.availableCapacity = data.capacity;
344
+ this.metrics.bufferLevel = data.buffered;
345
+
346
+ // console.log(`[CAPACITY] Update at ${performance.now().toFixed(2)}ms: capacity=${data.capacity}, buffered=${data.buffered}, pending=${this.pendingChunks.length}`);
347
+
348
+ // Mark that we've received initial capacity
349
+ if (!this.hasReceivedInitialCapacity) {
350
+ this.hasReceivedInitialCapacity = true;
351
+ // console.log(`[CAPACITY] *** FIRST capacity received at ${performance.now().toFixed(2)}ms, processing ${this.pendingChunks.length} pending chunks`);
352
+ // Process any chunks that were waiting for initial capacity
353
+ if (this.pendingChunks.length > 0) {
354
+ this.processPendingChunks();
355
+ }
356
+ }
357
+
358
+ // If worklet is requesting data, try to send it
359
+ if (data.requestSamples > 0 && this.pendingChunks.length > 0) {
360
+ this.processPendingChunks();
361
+ }
362
+ }
363
+
364
+ processPendingChunks() {
365
+ if (!this.isWorkletReady || this.pendingChunks.length === 0) {
366
+ return;
367
+ }
368
+
369
+ // Don't send if we don't know capacity yet
370
+ if (this.availableCapacity <= 0) {
371
+ return;
372
+ }
373
+
374
+ // Send ONE chunk if it fits, then wait for next capacity update
375
+ // This prevents race conditions from sending multiple chunks before worklet updates
376
+ const chunk = this.pendingChunks[0];
377
+
378
+ if (chunk.length <= this.availableCapacity) {
379
+ // Send the whole chunk
380
+ this.pendingChunks.shift();
381
+ this.workletNode.port.postMessage({
382
+ type: 'audio',
383
+ data: chunk
384
+ });
385
+ // Set capacity to 0 to prevent sending more until we get an update
386
+ this.availableCapacity = 0;
387
+ } else if (this.availableCapacity > 4096) {
388
+ // Send partial chunk only if we have significant space
389
+ const partial = chunk.slice(0, this.availableCapacity);
390
+ console.log(`Sending partial: ${partial.length} samples from ${chunk.length} (capacity: ${this.availableCapacity})`);
391
+ this.pendingChunks[0] = chunk.slice(this.availableCapacity);
392
+ this.workletNode.port.postMessage({
393
+ type: 'audio',
394
+ data: partial
395
+ });
396
+ // Set capacity to 0 to prevent sending more until we get an update
397
+ this.availableCapacity = 0;
398
+ } else {
399
+ console.log(`Not sending - chunk ${chunk.length} samples, capacity ${this.availableCapacity}`);
400
+ }
401
+ // else: Not enough space, wait for next capacity update
402
+
403
+ // If all chunks sent and stream ended, notify worklet
404
+ if (this.pendingChunks.length === 0 && this.pendingStreamEnd) {
405
+ this.workletNode.port.postMessage({ type: 'stream-ended' });
406
+ this.pendingStreamEnd = false;
407
+ }
408
+ }
409
+
410
+ playAudio(data) {
411
+ if (!this.isInitialized) {
412
+ // Queue the data if not initialized yet
413
+ if (!this.initPendingQueue) {
414
+ this.initPendingQueue = [];
415
+ this.initPromise.then(() => {
416
+ // Process queued data
417
+ const queue = this.initPendingQueue;
418
+ this.initPendingQueue = null;
419
+ for (const queuedData of queue) {
420
+ this.playAudio(queuedData);
421
+ }
422
+ });
423
+ }
424
+ this.initPendingQueue.push(data);
425
+ return;
426
+ }
427
+
428
+ if (this.audioContext.state !== 'running') {
429
+ return;
430
+ }
431
+
432
+ // Convert to Float32Array if needed
433
+ const float32Array = data instanceof Int16Array
434
+ ? this.pcm16ToFloat32(data)
435
+ : data;
436
+
437
+ // Add to pending queue
438
+ this.pendingChunks.push(float32Array);
439
+
440
+ // Only try to process if we've received initial capacity and have space
441
+ // Otherwise wait for capacity update from worklet
442
+ if (this.hasReceivedInitialCapacity && this.availableCapacity > 0) {
443
+ this.processPendingChunks();
444
+ }
445
+
446
+ // Update metrics
447
+ this.metrics.chunksPlayed++;
448
+
449
+ // Update playback time for compatibility
450
+ const duration = float32Array.length / this.audioContext.sampleRate;
451
+ this.playbackTime = this.audioContext.currentTime + duration;
452
+
453
+ // Emit events for compatibility
454
+ this.emit('audioStarted', {
455
+ startTime: this.audioContext.currentTime,
456
+ duration: duration,
457
+ samples: float32Array.length
458
+ });
459
+ }
460
+
461
+ notifyStreamEnded() {
462
+ if (this.pendingChunks.length > 0) {
463
+ // Still have chunks to send, mark for later
464
+ this.pendingStreamEnd = true;
465
+ } else {
466
+ // No chunks left, send immediately
467
+ if (this.workletNode) {
468
+ this.workletNode.port.postMessage({ type: 'stream-ended' });
469
+ }
470
+ }
471
+ }
472
+
473
+ pcm16ToFloat32(pcm16) {
474
+ const float32 = new Float32Array(pcm16.length);
475
+ for (let i = 0; i < pcm16.length; i++) {
476
+ float32[i] = pcm16[i] / 32768;
477
+ }
478
+ return float32;
479
+ }
480
+
481
+ reset() {
482
+ this.playbackTime = 0;
483
+ this.pendingChunks = [];
484
+ this.pendingStreamEnd = false;
485
+ this.availableCapacity = 0;
486
+
487
+ if (this.workletNode) {
488
+ this.workletNode.port.postMessage({ type: 'reset' });
489
+ }
490
+
491
+ // Quick fade out to avoid clicks
492
+ if (this.gainNode) {
493
+ const now = this.audioContext.currentTime;
494
+ this.gainNode.gain.setValueAtTime(this.gainNode.gain.value, now);
495
+ this.gainNode.gain.linearRampToValueAtTime(0, now + 0.05);
496
+ setTimeout(() => {
497
+ this.gainNode.gain.value = 1;
498
+ }, 100);
499
+ }
500
+ }
501
+
502
+ stopAllSources() {
503
+ this.reset();
504
+ }
505
+
506
+ async resume() {
507
+ if (this.audioContext.state === 'suspended') {
508
+ await this.audioContext.resume();
509
+ }
510
+ }
511
+
512
+ get volume() {
513
+ return this.gainNode.gain.value;
514
+ }
515
+
516
+ set volume(value) {
517
+ const clampedValue = Math.max(0, Math.min(1, value));
518
+ this.gainNode.gain.value = clampedValue;
519
+ this.emit('volumeChange', { volume: clampedValue });
520
+ }
521
+
522
+ get volumePercentage() {
523
+ return this.volume * 100;
524
+ }
525
+
526
+ set volumePercentage(percentage) {
527
+ this.volume = percentage / 100;
528
+ }
529
+
530
+ getAnalyserData() {
531
+ const bufferLength = this.analyser.frequencyBinCount;
532
+ const dataArray = new Uint8Array(bufferLength);
533
+ this.analyser.getByteFrequencyData(dataArray);
534
+ return dataArray;
535
+ }
536
+
537
+ getTimeDomainData() {
538
+ const bufferLength = this.analyser.frequencyBinCount;
539
+ const dataArray = new Uint8Array(bufferLength);
540
+ this.analyser.getByteTimeDomainData(dataArray);
541
+ return dataArray;
542
+ }
543
+
544
+ getPlaybackStatus() {
545
+ const bufferMs = this.metrics.bufferLevel
546
+ ? (this.metrics.bufferLevel / this.audioContext.sampleRate) * 1000
547
+ : 0;
548
+
549
+ return {
550
+ currentTime: this.audioContext.currentTime,
551
+ scheduledTime: this.playbackTime,
552
+ bufferedDuration: bufferMs / 1000,
553
+ state: this.audioContext.state,
554
+ worklet: {
555
+ bufferLevelSamples: this.metrics.bufferLevel,
556
+ bufferLevelMs: bufferMs,
557
+ underruns: this.metrics.underruns,
558
+ chunksPlayed: this.metrics.chunksPlayed,
559
+ pendingChunks: this.pendingChunks.length
560
+ }
561
+ };
562
+ }
563
+ }
README.md ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Soprano 1.1 ONNX Web Demo
3
+ emoji: 🎧
4
+ colorFrom: blue
5
+ colorTo: indigo
6
+ sdk: static
7
+ short_description: Real-time text-to-speech in the browser using ONNX
8
+ app_file: index.html
9
+ pinned: false
10
+ models:
11
+ - KevinAHM/soprano-1.1-onnx
12
+ license: apache-2.0
13
+ ---
14
+
15
+ <!-- Version 0.0.3 -->
16
+ <div align="center">
17
+
18
+ # Soprano 1.1 ONNX Streaming — Instant Text‑to‑Speech in the Browser (WASM)
19
+
20
+ [![Upstream](https://img.shields.io/badge/Upstream-ekwek1%2Fsoprano-black?logo=github)](https://github.com/ekwek1/soprano)
21
+ [![Hugging Face Model](https://img.shields.io/badge/HuggingFace-Model-orange?logo=huggingface)](https://huggingface.co/KevinAHM/soprano-onnx)
22
+ [![Hugging Face Demo for Soprano Web Onnx](https://img.shields.io/badge/HuggingFace-Demo-yellow?logo=huggingface)](https://huggingface.co/spaces/KevinAHM/soprano-web-onnx)
23
+
24
+ A **static, client-side** browser demo that runs the Soprano TTS pipeline using **onnxruntime-web**.
25
+
26
+ Soprano 1.1 features significant performance optimizations, including moving all heavy inference to a **Web Worker** and utilizing an **int8 quantized decoder** for superior real-time speeds on consumer CPUs.
27
+
28
+ ---
29
+
30
+ ## Requirements
31
+
32
+ - A modern browser (Chrome, Edge, Firefox, Safari).
33
+ - You must serve this folder over HTTP (opening `index.html` via `file://` usually breaks `fetch()` / module loading).
34
+ - The demo loads `onnxruntime-web` and `@huggingface/transformers` from a CDN by default (network required unless you vendor them).
35
+ - The model files are large; plan to use **Git LFS** or GitHub Releases if you publish them.
36
+
37
+ ---
38
+
39
+ ## Folder layout
40
+
41
+ Place model artifacts under `./models/`:
42
+
43
+ ```text
44
+ .
45
+ ├─ index.html
46
+ ├─ onnx-streaming.js (Main Thread Client)
47
+ ├─ inference-worker.js (Heavy Inference Engine)
48
+ ├─ PCMPlayerWorklet.js (Audio Playback Worklet)
49
+ ├─ style.css
50
+ ├─ onnx/
51
+ │ ├─ soprano_backbone_kv_fp32.onnx
52
+ │ └─ soprano_decoder_int8.onnx
53
+ ...
54
+ ```
55
+
56
+ Notes:
57
+ - ONNX models live in `onnx/` following HuggingFace convention.
58
+ - The decoder uses external weights (`.onnx.data` file must be present alongside the `.onnx` file).
59
+ - Tokenizer files are in the root directory.
60
+
61
+ ---
62
+
63
+ ## Run locally
64
+
65
+ Use any static file server from this directory, for example:
66
+
67
+ ```bash
68
+ python -m http.server 8085
69
+ ```
70
+
71
+ Then open `http://localhost:8085`.
72
+
73
+ ---
74
+
75
+ ## Configuration
76
+
77
+ Model paths are defined near the top of `onnx-streaming.js` in the `MODELS` object.
78
+
79
+ Sampling defaults are set in `onnx-streaming.js` (constructor):
80
+ - `temperature`
81
+ - `topK`
82
+ - `topP`
83
+ - `repetitionPenalty`
84
+
85
+ ---
86
+
87
+ ## Troubleshooting
88
+
89
+ - **"Load failed" / model never becomes Ready**
90
+ - Verify the `onnx/` filenames match `MODELS` in `onnx-streaming.js`
91
+ - Check DevTools → Network for a missing `.onnx.data` file (404)
92
+ - Confirm `/` contains `tokenizer.json` (and related files)
93
+ - **Performance notes**
94
+ - **Web Worker:** Keeps the UI responsive (no lag during generation).
95
+ - **int8 Decoder:** Optimized for high-throughput CPU inference.
96
+ - Achieves real-time streaming on modern hardware
97
+
98
+ ---
99
+
100
+ ## License & attribution
101
+
102
+ Soprano is released under **Apache-2.0** in the upstream repository:
103
+ https://github.com/ekwek1/soprano
104
+
105
+
106
+
107
+
config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 3,
8
+ "dtype": "bfloat16",
9
+ "eos_token_id": 3,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 512,
13
+ "initializer_range": 0.041666666666666664,
14
+ "intermediate_size": 2304,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention"
33
+ ],
34
+ "max_position_embeddings": 512,
35
+ "max_window_layers": 17,
36
+ "model_type": "qwen3",
37
+ "num_attention_heads": 4,
38
+ "num_hidden_layers": 17,
39
+ "num_key_value_heads": 1,
40
+ "rms_norm_eps": 1e-06,
41
+ "rope_scaling": null,
42
+ "rope_theta": 10000,
43
+ "sliding_window": null,
44
+ "tie_word_embeddings": false,
45
+ "transformers_version": "4.56.1",
46
+ "use_cache": true,
47
+ "use_sliding_window": false,
48
+ "vocab_size": 8192
49
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 3,
4
+ "eos_token_id": 3,
5
+ "transformers_version": "4.56.1"
6
+ }
index.html ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="UTF-8">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <title>Soprano - Neural TTS in Your Browser</title>
8
+ <link rel="stylesheet" href="style.css">
9
+ <link rel="preconnect" href="https://fonts.googleapis.com">
10
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
11
+ <link href="https://fonts.googleapis.com/css2?family=DM+Sans:wght@400;500;600&family=Syne:wght@500;600;700;800&family=Space+Mono:wght@400;700&display=swap" rel="stylesheet">
12
+ <!-- ONNX Runtime Web -->
13
+ <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.min.js"></script>
14
+ </head>
15
+
16
+ <body>
17
+ <!-- Ambient Background Effects -->
18
+ <div class="ambient-layer">
19
+ <div class="orb orb--primary"></div>
20
+ <div class="orb orb--secondary"></div>
21
+ <div class="orb orb--tertiary"></div>
22
+ <div class="grid-overlay"></div>
23
+ </div>
24
+
25
+ <div class="app-shell">
26
+ <!-- Hero Header -->
27
+ <header class="hero">
28
+ <div class="hero__brand">
29
+ <div class="logo">
30
+ <svg class="logo__icon" viewBox="0 0 32 32" fill="none">
31
+ <path d="M16 4C16 4 8 8 8 16C8 24 16 28 16 28" stroke="url(#logoGrad)" stroke-width="2" stroke-linecap="round"/>
32
+ <path d="M16 4C16 4 24 8 24 16C24 24 16 28 16 28" stroke="url(#logoGrad)" stroke-width="2" stroke-linecap="round"/>
33
+ <path d="M12 10V22" stroke="url(#logoGrad)" stroke-width="2" stroke-linecap="round"/>
34
+ <path d="M16 8V24" stroke="url(#logoGrad)" stroke-width="2" stroke-linecap="round"/>
35
+ <path d="M20 10V22" stroke="url(#logoGrad)" stroke-width="2" stroke-linecap="round"/>
36
+ <defs>
37
+ <linearGradient id="logoGrad" x1="8" y1="4" x2="24" y2="28" gradientUnits="userSpaceOnUse">
38
+ <stop stop-color="#3b82f6"/>
39
+ <stop offset="1" stop-color="#8b5cf6"/>
40
+ </linearGradient>
41
+ </defs>
42
+ </svg>
43
+ <span class="logo__text">Soprano</span>
44
+ </div>
45
+ <div class="hero__badge">
46
+ <span class="badge">ONNX Runtime</span>
47
+ </div>
48
+ </div>
49
+ <p class="hero__tagline">Real-time neural text-to-speech, running entirely in your browser</p>
50
+ </header>
51
+
52
+ <main class="main">
53
+ <!-- Input Section -->
54
+ <section class="input-section">
55
+ <div class="textarea-wrap">
56
+ <textarea
57
+ id="text-input"
58
+ placeholder="Type or paste text to synthesize..."
59
+ aria-label="Text to synthesize"
60
+ maxlength="500"
61
+ ></textarea>
62
+ <div class="textarea-meta">
63
+ <span class="char-count"><span id="char-count">0</span>/500</span>
64
+ </div>
65
+ </div>
66
+
67
+ <!-- Sample Texts -->
68
+ <div class="sample-texts">
69
+ <span class="sample-texts__label">Try:</span>
70
+ <button class="sample-btn" data-text="Hello, welcome to Soprano. This is a demonstration of real-time text to speech running entirely in your browser.">Demo greeting</button>
71
+ <button class="sample-btn" data-text="I completely understand how frustrating this must be for you. Let me take care of this right away and make sure we get it resolved.">Empathetic support</button>
72
+ <button class="sample-btn" data-text="Wow, congratulations! That's absolutely fantastic news! I'm so thrilled for you!">Excited</button>
73
+ <button class="sample-btn" data-text="I'm really sorry to hear about your loss. Please know that we're here for you, and take all the time you need.">Compassionate</button>
74
+ <button class="sample-btn" data-text="Great question! I'd be happy to walk you through this step by step. First, let's start with the basics.">Helpful guide</button>
75
+ </div>
76
+
77
+ <div class="controls">
78
+ <button id="generate-btn" class="btn btn--primary">
79
+ <svg class="btn__icon" viewBox="0 0 24 24" fill="currentColor">
80
+ <polygon points="5,3 19,12 5,21"/>
81
+ </svg>
82
+ <span class="btn__text">Generate Audio</span>
83
+ <div class="btn__loader" id="btn-loader"></div>
84
+ </button>
85
+ <button id="stop-btn" class="btn btn--secondary" disabled>
86
+ <svg class="btn__icon" viewBox="0 0 24 24" fill="currentColor">
87
+ <rect x="6" y="6" width="12" height="12" rx="1"/>
88
+ </svg>
89
+ <span class="btn__text">Stop</span>
90
+ </button>
91
+ </div>
92
+ </section>
93
+
94
+ <!-- Output Section: Visualizer + Metrics -->
95
+ <section class="output-section">
96
+ <div class="visualizer-panel">
97
+ <div class="visualizer-panel__header">
98
+ <span class="visualizer-panel__title">Audio Output</span>
99
+ <div class="status-indicator" id="status-indicator">
100
+ <span class="status-dot"></span>
101
+ <span class="status-text" id="stat-status">Idle</span>
102
+ </div>
103
+ </div>
104
+ <div class="visualizer-container">
105
+ <canvas id="visualizer-waveform"></canvas>
106
+ <canvas id="visualizer-bars" class="visualizer-bars"></canvas>
107
+ </div>
108
+ </div>
109
+
110
+ <div class="metrics-panel">
111
+ <h3 class="metrics-panel__title">Performance</h3>
112
+
113
+ <div class="metric">
114
+ <div class="metric__header">
115
+ <span class="metric__label">Time to First Byte</span>
116
+ <button class="metric__info" aria-label="TTFB explanation" data-tooltip="Time from request until first audio chunk is received">
117
+ <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
118
+ <circle cx="12" cy="12" r="10"/>
119
+ <path d="M12 16v-4M12 8h.01"/>
120
+ </svg>
121
+ </button>
122
+ </div>
123
+ <div class="metric__value">
124
+ <span class="metric__number" id="stat-ttfb">--</span>
125
+ <span class="metric__unit">ms</span>
126
+ </div>
127
+ <div class="metric__bar">
128
+ <div class="metric__bar-fill" id="ttfb-bar"></div>
129
+ </div>
130
+ </div>
131
+
132
+ <div class="metric metric--highlight">
133
+ <div class="metric__header">
134
+ <span class="metric__label">Real-Time Factor</span>
135
+ <button class="metric__info" aria-label="RTFx explanation" data-tooltip="Audio duration divided by processing time. Values above 1x mean faster than real-time playback.">
136
+ <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
137
+ <circle cx="12" cy="12" r="10"/>
138
+ <path d="M12 16v-4M12 8h.01"/>
139
+ </svg>
140
+ </button>
141
+ </div>
142
+ <div class="metric__value">
143
+ <span class="metric__number metric__number--large" id="stat-rtfx">--</span>
144
+ <span class="metric__unit">x</span>
145
+ </div>
146
+ <div class="metric__context" id="rtfx-context">&gt;1x = faster than real-time</div>
147
+ </div>
148
+
149
+ <div class="metric metric--status">
150
+ <span class="metric__label">Model</span>
151
+ <div class="model-status" id="model-status">
152
+ <span class="model-status__dot"></span>
153
+ <span class="model-status__text">Not loaded</span>
154
+ </div>
155
+ </div>
156
+ </div>
157
+ </section>
158
+ </main>
159
+
160
+ <footer class="footer">
161
+ <p>&copy; 2026 Soprano Audio Research</p>
162
+ </footer>
163
+ </div>
164
+
165
+ <script type="module" src="onnx-streaming.js?v=12"></script>
166
+ </body>
167
+
168
+ </html>
index.js ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { PCMPlayer } from './PCMPlayer.js';
2
+ import { PCMPlayerWorklet } from './PCMPlayerWorklet.js';
3
+ import { TTSStreamingClient } from './TTSStreamingClient.js';
4
+ import { JitterBuffer } from './JitterBuffer.js';
5
+ import { EventEmitter } from './EventEmitter.js';
6
+
7
+ export class TTSPCMPlayer {
8
+ constructor(options = {}) {
9
+ // Configuration with defaults
10
+ this.config = {
11
+ endpoint: options.endpoint || 'http://localhost:8000/v1',
12
+ sampleRate: options.sampleRate || 24000,
13
+ audioBufferingMs: options.audioBufferingMs || 50,
14
+ minBufferBeforePlaybackMs: options.minBufferBeforePlaybackMs || 100,
15
+ autoPlay: options.autoPlay !== false,
16
+ volume: options.volume || 1.0,
17
+ voice: options.voice || 'broom_salesman',
18
+ model: options.model || 'echo-tts',
19
+ temperature: options.temperature || 1.0,
20
+ topP: options.topP || 0.95,
21
+ topK: options.topK || 50,
22
+ chatId: options.chatId || null,
23
+ useWorklet: options.useWorklet !== false, // Default to true for better performance
24
+ ...options
25
+ };
26
+
27
+ // Initialize audio context with specified sample rate
28
+ this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
29
+ sampleRate: this.config.sampleRate
30
+ });
31
+
32
+ // Initialize components - use worklet player by default for better performance
33
+ if (this.config.useWorklet) {
34
+ this.player = new PCMPlayerWorklet(this.audioContext, {
35
+ minBufferBeforePlaybackMs: this.config.minBufferBeforePlaybackMs
36
+ });
37
+ } else {
38
+ this.player = new PCMPlayer(this.audioContext);
39
+ }
40
+ this.client = new TTSStreamingClient({
41
+ endpoint: this.config.endpoint,
42
+ sampleRate: this.config.sampleRate,
43
+ audioBufferingMs: this.config.audioBufferingMs,
44
+ minBufferBeforePlaybackMs: this.config.minBufferBeforePlaybackMs,
45
+ voice: this.config.voice,
46
+ model: this.config.model,
47
+ temperature: this.config.temperature,
48
+ topP: this.config.topP,
49
+ topK: this.config.topK
50
+ });
51
+
52
+ // Set initial volume
53
+ this.player.volume = this.config.volume;
54
+
55
+ // Setup event forwarding
56
+ this.setupEventHandlers();
57
+ }
58
+
59
+ setupEventHandlers() {
60
+ // Forward audio data from client to player
61
+ this.client.addEventListener('audio', (event) => {
62
+ if (this.config.autoPlay && this.audioContext.state === 'running') {
63
+ this.player.playAudio(event.detail.data);
64
+ }
65
+ });
66
+
67
+ // Forward all client events
68
+ ['synthesisStarted', 'synthesisCompleted', 'synthesisCancelled', 'error', 'progress', 'firstByte', 'firstAudioChunk'].forEach(eventName => {
69
+ this.client.addEventListener(eventName, (event) => {
70
+ // Re-emit the event from this instance
71
+ if (this.onEvent) {
72
+ this.onEvent(eventName, event.detail);
73
+ }
74
+
75
+ // When synthesis is complete, notify the player that the stream has ended
76
+ if (eventName === 'synthesisCompleted' && this.config.useWorklet && this.player.notifyStreamEnded) {
77
+ this.player.notifyStreamEnded();
78
+ }
79
+ });
80
+ });
81
+
82
+ // Forward player events
83
+ ['volumeChange', 'audioStarted', 'audioEnded', 'firstPlayback'].forEach(eventName => {
84
+ this.player.addEventListener(eventName, (event) => {
85
+ if (this.onEvent) {
86
+ this.onEvent(eventName, event.detail);
87
+ }
88
+ });
89
+ });
90
+ }
91
+
92
+ async synthesize(text, userVoice = null, userPrompt = null, userVoiceFormat = null, chatId = null, extraBody = null) {
93
+ // Resume audio context if suspended
94
+ if (this.audioContext.state === 'suspended') {
95
+ await this.audioContext.resume();
96
+ }
97
+
98
+ // Cancel any ongoing synthesis before starting a new one
99
+ // This ensures immediate interrupt behavior
100
+ if (this.client.isStreaming) {
101
+ console.log('[TTSPCMPlayer] Auto-cancelling ongoing synthesis before starting new one');
102
+ this.cancel();
103
+ }
104
+
105
+ // Use provided chatId or fallback to config chatId
106
+ const effectiveChatId = chatId || this.config.chatId;
107
+
108
+ return this.client.synthesize(text, userVoice, userPrompt, userVoiceFormat, effectiveChatId, extraBody);
109
+ }
110
+
111
+ setChatId(chatId) {
112
+ this.config.chatId = chatId;
113
+ if (this.client) {
114
+ this.client.setChatId(chatId);
115
+ }
116
+ }
117
+
118
+ cancel() {
119
+ // Cancel the HTTP request
120
+ this.client.cancel();
121
+ // Stop audio playback immediately
122
+ this.player.reset();
123
+ }
124
+
125
+ async play() {
126
+ if (this.audioContext.state === 'suspended') {
127
+ await this.audioContext.resume();
128
+ }
129
+ this.config.autoPlay = true;
130
+ }
131
+
132
+ pause() {
133
+ this.config.autoPlay = false;
134
+ }
135
+
136
+ setVoice(voice) {
137
+ this.client.setVoice(voice);
138
+ this.config.voice = voice;
139
+ }
140
+
141
+ setEndpoint(endpoint) {
142
+ const result = this.client.setEndpoint(endpoint);
143
+ if (result) {
144
+ this.config.endpoint = endpoint;
145
+ }
146
+ return result;
147
+ }
148
+
149
+ updateConfig(config) {
150
+ this.client.updateConfig(config);
151
+ Object.assign(this.config, config);
152
+ }
153
+
154
+ get volume() {
155
+ return this.player.volume;
156
+ }
157
+
158
+ set volume(value) {
159
+ this.player.volume = value;
160
+ }
161
+
162
+ get volumePercentage() {
163
+ return this.player.volumePercentage;
164
+ }
165
+
166
+ set volumePercentage(value) {
167
+ this.player.volumePercentage = value;
168
+ }
169
+
170
+ getStatus() {
171
+ return {
172
+ client: this.client.getStatus(),
173
+ player: this.player.getPlaybackStatus(),
174
+ audioContext: {
175
+ state: this.audioContext.state,
176
+ sampleRate: this.audioContext.sampleRate,
177
+ currentTime: this.audioContext.currentTime
178
+ }
179
+ };
180
+ }
181
+
182
+ getAnalyserData() {
183
+ return this.player.getAnalyserData();
184
+ }
185
+
186
+ getTimeDomainData() {
187
+ return this.player.getTimeDomainData();
188
+ }
189
+
190
+ // Event handler callback (to be overridden by user)
191
+ onEvent(eventName, data) {
192
+ // Default implementation - can be overridden
193
+ console.log(`[TTSPCMPlayer] ${eventName}:`, data);
194
+ }
195
+ }
196
+
197
+ // Export all components for advanced usage
198
+ export { PCMPlayer, PCMPlayerWorklet, TTSStreamingClient, JitterBuffer, EventEmitter };
199
+
200
+ // Create a simple factory function for easy instantiation
201
+ export function createTTSPlayer(options) {
202
+ return new TTSPCMPlayer(options);
203
+ }
inference-worker.js ADDED
@@ -0,0 +1,724 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ONNX Runtime Web Worker (Classic Script)
2
+ console.log('Worker Script Starting (Classic)...');
3
+ self.postMessage({ type: 'status', status: 'Worker Thread Started', state: 'idle' });
4
+
5
+ try {
6
+ const ortVersion = '1.20.0';
7
+ importScripts(`https://cdn.jsdelivr.net/npm/onnxruntime-web@${ortVersion}/dist/ort.min.js`);
8
+ } catch (e) {
9
+ console.error('Failed to load ORT in worker:', e);
10
+ }
11
+
12
+ // Configuration
13
+ const MODELS = {
14
+ backbone: './onnx/soprano_backbone_kv_fp32.onnx',
15
+ decoder: './onnx/soprano_decoder_int8.onnx',
16
+ tokenizer: './' // Tokenizer loading still needs context, we'll see if it works in worker or needs to be passed
17
+ };
18
+
19
+ // We need to import the Hugging Face tokenizer library appropriately for a worker.
20
+ // The main file import was dynamic. We'll try to do the same here.
21
+ // Note: Transformers.js usually works in workers.
22
+
23
+ const RECEPTIVE_FIELD = 4;
24
+ const TOKEN_SIZE = 2048;
25
+ const SAMPLE_RATE = 32000;
26
+
27
+ // State
28
+ let backboneSession = null;
29
+ let decoderSession = null;
30
+ let tokenizer = null;
31
+ let isGenerating = false;
32
+ let isReady = false;
33
+
34
+ // FP16 Lookup Table
35
+ let fp16Lookup = new Float32Array(65536);
36
+ let isFp16Backbone = false;
37
+
38
+ // Helpers
39
+ function initFp16Lookup() {
40
+ for (let i = 0; i < 65536; i++) {
41
+ const s = (i & 0x8000) >> 15;
42
+ const e = (i & 0x7C00) >> 10;
43
+ const f = i & 0x03FF;
44
+ if (e === 0) {
45
+ fp16Lookup[i] = (s ? -1 : 1) * Math.pow(2, -14) * (f / 1024);
46
+ } else if (e === 31) {
47
+ fp16Lookup[i] = f ? NaN : ((s ? -1 : 1) * Infinity);
48
+ } else {
49
+ fp16Lookup[i] = (s ? -1 : 1) * Math.pow(2, e - 15) * (1 + f / 1024);
50
+ }
51
+ }
52
+ }
53
+
54
+ // ----------------------------------------------------------------------------
55
+ // Text Preprocessing (Ported from onnx-streaming.js)
56
+ // ----------------------------------------------------------------------------
57
+ // ... (Including the full text preprocessing logic here to keep worker self-contained) ...
58
+ // For brevity in this tool call, I will include the necessary functions.
59
+ // Ideally, these would be in a shared utils file, but I'll paste them to ensure it works.
60
+
61
+ const ONES = ['', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen'];
62
+ const TENS = ['', '', 'twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy', 'eighty', 'ninety'];
63
+ const ORDINAL_ONES = ['', 'first', 'second', 'third', 'fourth', 'fifth', 'sixth', 'seventh', 'eighth', 'ninth', 'tenth', 'eleventh', 'twelfth', 'thirteenth', 'fourteenth', 'fifteenth', 'sixteenth', 'seventeenth', 'eighteenth', 'nineteenth'];
64
+ const ORDINAL_TENS = ['', '', 'twentieth', 'thirtieth', 'fortieth', 'fiftieth', 'sixtieth', 'seventieth', 'eightieth', 'ninetieth'];
65
+
66
+ function numberToWords(num, options = {}) {
67
+ const { andword = '', zero = 'zero', group = 0 } = options;
68
+ if (num === 0) return zero;
69
+ const convert = (n) => {
70
+ if (n < 20) return ONES[n];
71
+ if (n < 100) return TENS[Math.floor(n / 10)] + (n % 10 ? ' ' + ONES[n % 10] : '');
72
+ if (n < 1000) {
73
+ const remainder = n % 100;
74
+ return ONES[Math.floor(n / 100)] + ' hundred' + (remainder ? (andword ? ' ' + andword + ' ' : ' ') + convert(remainder) : '');
75
+ }
76
+ if (n < 1000000) {
77
+ const thousands = Math.floor(n / 1000);
78
+ const remainder = n % 1000;
79
+ return convert(thousands) + ' thousand' + (remainder ? ' ' + convert(remainder) : '');
80
+ }
81
+ if (n < 1000000000) {
82
+ const millions = Math.floor(n / 1000000);
83
+ const remainder = n % 1000000;
84
+ return convert(millions) + ' million' + (remainder ? ' ' + convert(remainder) : '');
85
+ }
86
+ const billions = Math.floor(n / 1000000000);
87
+ const remainder = n % 1000000000;
88
+ return convert(billions) + ' billion' + (remainder ? ' ' + convert(remainder) : '');
89
+ };
90
+ if (group === 2 && num > 1000 && num < 10000) {
91
+ const high = Math.floor(num / 100);
92
+ const low = num % 100;
93
+ if (low === 0) return convert(high) + ' hundred';
94
+ else if (low < 10) return convert(high) + ' ' + (zero === 'oh' ? 'oh' : zero) + ' ' + ONES[low];
95
+ else return convert(high) + ' ' + convert(low);
96
+ }
97
+ return convert(num);
98
+ }
99
+
100
+ function ordinalToWords(num) {
101
+ if (num < 20) return ORDINAL_ONES[num] || numberToWords(num) + 'th';
102
+ if (num < 100) {
103
+ const tens = Math.floor(num / 10);
104
+ const ones = num % 10;
105
+ if (ones === 0) return ORDINAL_TENS[tens];
106
+ return TENS[tens] + ' ' + ORDINAL_ONES[ones];
107
+ }
108
+ const cardinal = numberToWords(num);
109
+ if (cardinal.endsWith('y')) return cardinal.slice(0, -1) + 'ieth';
110
+ if (cardinal.endsWith('one')) return cardinal.slice(0, -3) + 'first';
111
+ if (cardinal.endsWith('two')) return cardinal.slice(0, -3) + 'second';
112
+ if (cardinal.endsWith('three')) return cardinal.slice(0, -5) + 'third';
113
+ if (cardinal.endsWith('ve')) return cardinal.slice(0, -2) + 'fth';
114
+ if (cardinal.endsWith('e')) return cardinal.slice(0, -1) + 'th';
115
+ if (cardinal.endsWith('t')) return cardinal + 'h';
116
+ return cardinal + 'th';
117
+ }
118
+
119
+ const UNICODE_MAP = {
120
+ 'à': 'a', 'á': 'a', 'â': 'a', 'ã': 'a', 'ä': 'a', 'å': 'a', 'æ': 'ae', 'ç': 'c', 'è': 'e', 'é': 'e', 'ê': 'e', 'ë': 'e', 'ì': 'i', 'í': 'i', 'î': 'i', 'ï': 'i', 'ñ': 'n', 'ò': 'o', 'ó': 'o', 'ô': 'o', 'õ': 'o', 'ö': 'o', 'ø': 'o', 'ù': 'u', 'ú': 'u', 'û': 'u', 'ü': 'u', 'ý': 'y', 'ÿ': 'y', 'ß': 'ss', 'œ': 'oe', 'ð': 'd', 'þ': 'th', 'À': 'A', 'Á': 'A', 'Â': 'A', 'Ã': 'A', 'Ä': 'A', 'Å': 'A', 'Æ': 'AE', 'Ç': 'C', 'È': 'E', 'É': 'E', 'Ê': 'E', 'Ë': 'E', 'Ì': 'I', 'Í': 'I', 'Î': 'I', 'Ï': 'I', 'Ñ': 'N', 'Ò': 'O', 'Ó': 'O', 'Ô': 'O', 'Õ': 'O', 'Ö': 'O', 'Ø': 'O', 'Ù': 'U', 'Ú': 'U', 'Û': 'U', 'Ü': 'U', 'Ý': 'Y', '\u201C': '"', '\u201D': '"', '\u2018': "'", '\u2019': "'", '\u2026': '...', '\u2013': '-', '\u2014': '-'
121
+ };
122
+
123
+ function convertToAscii(text) {
124
+ return text.split('').map(c => UNICODE_MAP[c] || c).join('').normalize('NFD').replace(/[\u0300-\u036f]/g, '');
125
+ }
126
+
127
+ const ABBREVIATIONS = [
128
+ [/\bmrs\./gi, 'misuss'], [/\bms\./gi, 'miss'], [/\bmr\./gi, 'mister'], [/\bdr\./gi, 'doctor'], [/\bst\./gi, 'saint'], [/\bco\./gi, 'company'], [/\bjr\./gi, 'junior'], [/\bmaj\./gi, 'major'], [/\bgen\./gi, 'general'], [/\bdrs\./gi, 'doctors'], [/\brev\./gi, 'reverend'], [/\blt\./gi, 'lieutenant'], [/\bhon\./gi, 'honorable'], [/\bsgt\./gi, 'sergeant'], [/\bcapt\./gi, 'captain'], [/\besq\./gi, 'esquire'], [/\bltd\./gi, 'limited'], [/\bcol\./gi, 'colonel'], [/\bft\./gi, 'fort']
129
+ ];
130
+ const CASED_ABBREVIATIONS = [
131
+ [/\bTTS\b/g, 'text to speech'], [/\bHz\b/g, 'hertz'], [/\bkHz\b/g, 'kilohertz'], [/\bKBs\b/g, 'kilobytes'], [/\bKB\b/g, 'kilobyte'], [/\bMBs\b/g, 'megabytes'], [/\bMB\b/g, 'megabyte'], [/\bGBs\b/g, 'gigabytes'], [/\bGB\b/g, 'gigabyte'], [/\bTBs\b/g, 'terabytes'], [/\bTB\b/g, 'terabyte'], [/\bAPIs\b/g, "a p i's"], [/\bAPI\b/g, 'a p i'], [/\bCLIs\b/g, "c l i's"], [/\bCLI\b/g, 'c l i'], [/\bCPUs\b/g, "c p u's"], [/\bCPU\b/g, 'c p u'], [/\bGPUs\b/g, "g p u's"], [/\bGPU\b/g, 'g p u'], [/\bAve\b/g, 'avenue'], [/\betc\b/g, 'etcetera']
132
+ ];
133
+ function expandAbbreviations(text) {
134
+ for (const [regex, replacement] of [...ABBREVIATIONS, ...CASED_ABBREVIATIONS]) text = text.replace(regex, replacement);
135
+ return text;
136
+ }
137
+
138
+ const NUM_PREFIX_RE = /#(\d)/g;
139
+ const NUM_SUFFIX_RE = /(\d)([KMBT])/gi;
140
+ const NUM_LETTER_SPLIT_RE = /(\d)([a-z])|([a-z])(\d)/gi;
141
+ const COMMA_NUMBER_RE = /(\d[\d,]+\d)/g;
142
+ const DATE_RE = /(^|[^/])(\d\d?[/-]\d\d?[/-]\d\d(?:\d\d)?)($|[^/])/g;
143
+ const PHONE_NUMBER_RE = /\(?\d{3}\)?[-.\s]\d{3}[-.\s]?\d{4}/g;
144
+ const TIME_RE = /(\d\d?):(\d\d)(?::(\d\d))?/g;
145
+ const POUNDS_RE = /£([\d,]*\d+)/g;
146
+ const DOLLARS_RE = /\$([\d.,]*\d+)/g;
147
+ const DECIMAL_NUMBER_RE = /(\d+(?:\.\d+)+)/g;
148
+ const MULTIPLY_RE = /(\d)\s?\*\s?(\d)/g;
149
+ const DIVIDE_RE = /(\d)\s?\/\s?(\d)/g;
150
+ const ADD_RE = /(\d)\s?\+\s?(\d)/g;
151
+ const SUBTRACT_RE = /(\d)?\s?-\s?(\d)/g;
152
+ const FRACTION_RE = /(\d+)\/(\d+)/g;
153
+ const ORDINAL_RE = /(\d+)(st|nd|rd|th)/gi;
154
+ const NUMBER_RE = /\d+/g;
155
+
156
+ function normalizeNumbers(text) {
157
+ text = text.replace(NUM_PREFIX_RE, (_, d) => `number ${d}`);
158
+ text = text.replace(NUM_SUFFIX_RE, (_, num, suffix) => {
159
+ const map = { k: 'thousand', m: 'million', b: 'billion', t: 'trillion' };
160
+ return `${num} ${map[suffix.toLowerCase()]}`;
161
+ });
162
+ for (let i = 0; i < 2; i++) {
163
+ text = text.replace(NUM_LETTER_SPLIT_RE, (m, d1, l1, l2, d2) => {
164
+ if (d1 && l1) return `${d1} ${l1}`;
165
+ if (l2 && d2) return `${l2} ${d2}`;
166
+ return m;
167
+ });
168
+ }
169
+ text = text.replace(COMMA_NUMBER_RE, m => m.replace(/,/g, ''));
170
+ text = text.replace(DATE_RE, (_, pre, date, post) => pre + date.split(/[./-]/).join(' dash ') + post);
171
+ text = text.replace(PHONE_NUMBER_RE, m => {
172
+ const digits = m.replace(/\D/g, '');
173
+ return digits.length === 10 ? `${digits.slice(0, 3).split('').join(' ')}, ${digits.slice(3, 6).split('').join(' ')}, ${digits.slice(6).split('').join(' ')}` : m;
174
+ });
175
+ text = text.replace(TIME_RE, (_, hours, minutes, seconds) => {
176
+ const h = parseInt(hours), m = parseInt(minutes), s = seconds ? parseInt(seconds) : 0;
177
+ if (!seconds) return m === 0 ? (h === 0 ? '0' : h > 12 ? `${hours} minutes` : `${hours} o'clock`) : minutes.startsWith('0') ? `${hours} oh ${minutes[1]}` : `${hours} ${minutes}`;
178
+ let res = '';
179
+ if (h !== 0) res = hours + ' ' + (m === 0 ? 'oh oh' : minutes.startsWith('0') ? `oh ${minutes[1]}` : minutes);
180
+ else if (m !== 0) res = minutes + ' ' + (s === 0 ? 'oh oh' : seconds.startsWith('0') ? `oh ${seconds[1]}` : seconds);
181
+ else res = seconds;
182
+ return res + ' ' + (s === 0 ? '' : seconds.startsWith('0') ? `oh ${seconds[1]}` : seconds);
183
+ });
184
+ text = text.replace(POUNDS_RE, (_, amount) => `${amount.replace(/,/g, '')} pounds`);
185
+ text = text.replace(DOLLARS_RE, (_, amount) => {
186
+ const parts = amount.replace(/,/g, '').split('.');
187
+ const dollars = parseInt(parts[0]) || 0;
188
+ const cents = parts[1] ? parseInt(parts[1]) : 0;
189
+ if (dollars && cents) return `${dollars} ${dollars === 1 ? 'dollar' : 'dollars'}, ${cents} ${cents === 1 ? 'cent' : 'cents'}`;
190
+ if (dollars) return `${dollars} ${dollars === 1 ? 'dollar' : 'dollars'}`;
191
+ if (cents) return `${cents} ${cents === 1 ? 'cent' : 'cents'}`;
192
+ return 'zero dollars';
193
+ });
194
+ text = text.replace(DECIMAL_NUMBER_RE, m => m.split('.').join(' point ').split('').join(' ')); // Simplified
195
+ text = text.replace(MULTIPLY_RE, '$1 times $2');
196
+ text = text.replace(DIVIDE_RE, '$1 over $2');
197
+ text = text.replace(ADD_RE, '$1 plus $2');
198
+ text = text.replace(SUBTRACT_RE, (_, a, b) => (a ? a : '') + ' minus ' + b);
199
+ text = text.replace(FRACTION_RE, '$1 over $2');
200
+ text = text.replace(ORDINAL_RE, (_, num) => ordinalToWords(parseInt(num)));
201
+ text = text.replace(NUMBER_RE, m => {
202
+ const num = parseInt(m);
203
+ if (num > 1000 && num < 3000) {
204
+ if (num === 2000) return 'two thousand';
205
+ if (num > 2000 && num < 2010) return 'two thousand ' + numberToWords(num % 100);
206
+ if (num % 100 === 0) return numberToWords(Math.floor(num / 100)) + ' hundred';
207
+ return numberToWords(num, { zero: 'oh', group: 2 });
208
+ }
209
+ return numberToWords(num);
210
+ });
211
+ return text;
212
+ }
213
+ const SPECIAL_CHARACTERS = [
214
+ [/@/g, ' at '], [/&/g, ' and '], [/%/g, ' percent '], [/:/g, '.'], [/;/g, ','], [/\+/g, ' plus '], [/\\/g, ' backslash '], [/~/g, ' about '], [/(^| )<3/g, ' heart '], [/<=/g, ' less than or equal to '], [/>=/g, ' greater than or equal to '], [/</g, ' less than '], [/>/g, ' greater than '], [/=/g, ' equals '], [/\//g, ' slash '], [/_/g, ' '],
215
+ ];
216
+ const LINK_HEADER_RE = /https?:\/\//gi;
217
+ const DASH_RE = /(.) - (.)/g;
218
+ const DOT_RE = /([A-Z])\.([A-Z])/gi;
219
+ const PARENTHESES_RE = /[\(\[\{][^\)\]\}]*[\)\]\}](.)?/g;
220
+
221
+ function normalizeSpecial(text) {
222
+ text = text.replace(LINK_HEADER_RE, 'h t t p s colon slash slash ');
223
+ text = text.replace(DASH_RE, '$1, $2');
224
+ text = text.replace(DOT_RE, '$1 dot $2');
225
+ text = text.replace(PARENTHESES_RE, (m, after) => {
226
+ let result = m.replace(/[\(\[\{]/g, ', ').replace(/[\)\]\}]/g, ', ');
227
+ if (after && /[$.!?,]/.test(after)) result = result.slice(0, -2) + after;
228
+ return result;
229
+ });
230
+ return text;
231
+ }
232
+ function expandSpecialCharacters(text) {
233
+ for (const [regex, replacement] of SPECIAL_CHARACTERS) text = text.replace(regex, replacement);
234
+ return text;
235
+ }
236
+ function normalizeNewlines(text) {
237
+ return text.split('\n').map(line => {
238
+ line = line.trim();
239
+ if (!line) return '';
240
+ if (!/[.!?]$/.test(line)) line += '.';
241
+ return line;
242
+ }).join(' ');
243
+ }
244
+ function removeUnknownCharacters(text) {
245
+ text = text.replace(/[^A-Za-z !\$%&'\*\+,\-./0123456789<>\?_]/g, '');
246
+ return text.replace(/[<>\/_+]/g, '');
247
+ }
248
+ function collapseWhitespace(text) {
249
+ return text.replace(/\s+/g, ' ').replace(/ ([.\?!,])/g, '$1');
250
+ }
251
+ function dedupPunctuation(text) {
252
+ return text.replace(/\.\.\.+/g, '[ELLIPSIS]').replace(/,+/g, ',').replace(/[.,]*\.[.,]*/g, '.').replace(/[.,!]*![.,!]*/g, '!').replace(/[.,!?]*\?[.,!?]*/g, '?').replace(/\[ELLIPSIS\]/g, '...');
253
+ }
254
+ function cleanText(text) {
255
+ text = convertToAscii(text);
256
+ text = normalizeNewlines(text);
257
+ text = normalizeNumbers(text);
258
+ text = normalizeSpecial(text);
259
+ text = expandAbbreviations(text);
260
+ text = expandSpecialCharacters(text);
261
+ text = text.toLowerCase();
262
+ text = removeUnknownCharacters(text);
263
+ text = collapseWhitespace(text);
264
+ text = dedupPunctuation(text);
265
+ return text.trim();
266
+ }
267
+ function preprocessText(text, batchSize = 3, minLength = 30) {
268
+ text = text.trim();
269
+ const cleanedText = cleanText(text);
270
+ let sentences = cleanedText.split(/(?<=[.!?])\s+/).filter(s => s.trim());
271
+ if (sentences.length === 0) return cleanedText ? [`[STOP][TEXT]${cleanedText}[START]`] : [];
272
+ if (minLength > 0 && sentences.length > 1) {
273
+ const merged = [];
274
+ for (let i = 0; i < sentences.length; i++) {
275
+ const cur = sentences[i];
276
+ if (cur.length < minLength) {
277
+ if (merged.length > 0) merged[merged.length - 1] = (merged[merged.length - 1] + ' ' + cur).trim();
278
+ else if (i + 1 < sentences.length) sentences[i + 1] = (cur + ' ' + sentences[i + 1]).trim();
279
+ else merged.push(cur);
280
+ } else merged.push(cur);
281
+ }
282
+ sentences = merged;
283
+ }
284
+ const prompts = [];
285
+ for (let i = 0; i < sentences.length; i += batchSize) {
286
+ const batch = sentences.slice(i, i + batchSize).join(' ');
287
+ prompts.push(`[STOP][TEXT]${batch}[START]`);
288
+ }
289
+ return prompts;
290
+ }
291
+
292
+ // ----------------------------------------------------------------------------
293
+ // Worker Logic
294
+ // ----------------------------------------------------------------------------
295
+
296
+ self.onmessage = async (e) => {
297
+ const { type, data } = e.data;
298
+ console.log('Worker received message:', type);
299
+
300
+ if (type === 'load') {
301
+ try {
302
+ await loadModels();
303
+ postMessage({ type: 'loaded' });
304
+ } catch (err) {
305
+ postMessage({ type: 'error', error: err.toString() });
306
+ }
307
+ } else if (type === 'generate') {
308
+ if (!isReady) {
309
+ postMessage({ type: 'error', error: 'Models are not loaded yet.' });
310
+ return;
311
+ }
312
+ if (isGenerating) return;
313
+ try {
314
+ await startGeneration(data.text);
315
+ } catch (err) {
316
+ console.error('Generation Error:', err);
317
+ postMessage({ type: 'error', error: err.toString() });
318
+ }
319
+ }
320
+ else if (type === 'stop') {
321
+ isGenerating = false;
322
+ postMessage({ type: 'status', status: 'Stopped', state: 'idle' });
323
+ }
324
+ };
325
+
326
+ async function loadModels() {
327
+ if (backboneSession) return;
328
+
329
+ postMessage({ type: 'status', status: 'Loading models...', state: 'loading' });
330
+
331
+ // Configure WASM Paths to use EXACT same version as loader
332
+ const version = '1.20.0';
333
+ const cdnBase = `https://cdn.jsdelivr.net/npm/onnxruntime-web@${version}/dist/`;
334
+ ort.env.wasm.wasmPaths = cdnBase;
335
+
336
+ // Disable multi-threading if not in cross-origin isolated environment to avoid ERR_WASM_FILE_NOT_FOUND
337
+ if (!self.crossOriginIsolated) {
338
+ console.warn('Environment is not cross-origin isolated. Disabling WASM multi-threading.');
339
+ ort.env.wasm.numThreads = 1;
340
+ } else if (typeof navigator !== 'undefined' && navigator.hardwareConcurrency) {
341
+ ort.env.wasm.numThreads = Math.min(navigator.hardwareConcurrency, 8);
342
+ }
343
+
344
+ try {
345
+ const backboneOptions = {
346
+ executionProviders: ['wasm'],
347
+ freeDimensionOverrides: { 'batch': 1 },
348
+ graphOptimizationLevel: 'all'
349
+ };
350
+
351
+ // Initialize FP16 Lookup
352
+ isFp16Backbone = MODELS.backbone.includes('fp16');
353
+ if (isFp16Backbone) initFp16Lookup();
354
+
355
+ console.log('Loading Backbone...');
356
+ backboneSession = await ort.InferenceSession.create(MODELS.backbone, backboneOptions);
357
+
358
+ console.log('Loading Decoder...');
359
+ const decoderBuf = await fetch(MODELS.decoder).then(r => {
360
+ if (!r.ok) throw new Error(`Failed to load decoder: ${r.statusText}`);
361
+ return r.arrayBuffer();
362
+ });
363
+
364
+ // External data check
365
+ let dataBuf = null;
366
+ try {
367
+ const dataUrl = MODELS.decoder + '.data';
368
+ const dataRes = await fetch(dataUrl);
369
+ if (dataRes.ok) {
370
+ dataBuf = await dataRes.arrayBuffer();
371
+ }
372
+ } catch (e) { }
373
+
374
+ const decoderOptions = {
375
+ executionProviders: ['wasm'],
376
+ freeDimensionOverrides: { 'batch': 1 }
377
+ };
378
+
379
+ if (dataBuf) {
380
+ decoderOptions.externalData = [{
381
+ data: new Uint8Array(dataBuf),
382
+ path: MODELS.decoder.split('/').pop() + '.data'
383
+ }];
384
+ }
385
+
386
+ decoderSession = await ort.InferenceSession.create(new Uint8Array(decoderBuf), decoderOptions);
387
+
388
+ console.log('Loading Tokenizer...');
389
+ const transformers = await import('https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0');
390
+ const { AutoTokenizer, env } = transformers;
391
+
392
+ env.allowLocalModels = true;
393
+ env.allowRemoteModels = false;
394
+ env.localModelPath = new URL('.', self.location.href).pathname;
395
+
396
+ tokenizer = await AutoTokenizer.from_pretrained(MODELS.tokenizer, {
397
+ local_files_only: true
398
+ });
399
+
400
+ isReady = true;
401
+ postMessage({ type: 'status', status: 'Ready', state: 'idle' });
402
+ postMessage({ type: 'model_status', status: 'ready', text: 'Ready' });
403
+ postMessage({ type: 'loaded' });
404
+
405
+ } catch (err) {
406
+ console.error('Model load failed in worker:', err);
407
+ throw err;
408
+ }
409
+ }
410
+
411
+ async function startGeneration(text) {
412
+ isGenerating = true;
413
+ postMessage({ type: 'status', status: 'Generating...', state: 'running' });
414
+
415
+ const prompts = preprocessText(text);
416
+ const overallStartTime = performance.now();
417
+ let isFirstBatch = true;
418
+ let cumulativeSamples = 0;
419
+
420
+ // We need to pass the generation start time for RTF calculation logic
421
+ // But since logic is here, we can just handle it.
422
+
423
+ for (const prompt of prompts) {
424
+ if (!isGenerating) break;
425
+ const { input_ids } = await tokenizer(prompt);
426
+ // Note: tokenizer runs in worker, so input_ids.data is available
427
+ const batchSamples = await generationLoop(input_ids.data, overallStartTime, isFirstBatch, cumulativeSamples);
428
+ cumulativeSamples += batchSamples;
429
+ isFirstBatch = false;
430
+ }
431
+
432
+ if (isGenerating) {
433
+ postMessage({ type: 'stream_ended' });
434
+ postMessage({ type: 'status', status: 'Finished', state: 'idle' });
435
+ }
436
+ isGenerating = false;
437
+ }
438
+
439
+ // Sampling Cache
440
+ let _topKIndices = null;
441
+ let _topKScores = null;
442
+ let _topKOrder = null;
443
+ let _topKExp = null;
444
+ const samplingParams = { temperature: 0.3, topK: 50, topP: 0.95, repetitionPenalty: 1.2 };
445
+
446
+ async function generationLoop(promptTokens, startTime, isFirstBatch = true, cumulativeSamples = 0) {
447
+ const batch = 1;
448
+ const numLayers = 17;
449
+ const hiddenDim = 128; // This seems small, confirming config... yes from previous code.
450
+ const promptLen = promptTokens.length;
451
+ const vocabSize = 8192;
452
+ const maxNewTokens = 512;
453
+
454
+ const seenTokenMask = new Uint8Array(vocabSize);
455
+ for (let i = 0; i < promptTokens.length; i++) {
456
+ const tid = Number(promptTokens[i]);
457
+ if (tid >= 0 && tid < vocabSize) seenTokenMask[tid] = 1;
458
+ }
459
+
460
+ const kvType = isFp16Backbone ? 'float16' : 'float32';
461
+ const kvData = isFp16Backbone ? new Uint16Array(0) : new Float32Array(0);
462
+
463
+ let pastKeyValues = {};
464
+ for (let i = 0; i < numLayers; i++) {
465
+ pastKeyValues[`past_key_values.${i}.key`] = new ort.Tensor(kvType, kvData, [batch, 1, 0, hiddenDim]);
466
+ pastKeyValues[`past_key_values.${i}.value`] = new ort.Tensor(kvType, kvData, [batch, 1, 0, hiddenDim]);
467
+ }
468
+
469
+ const maxSeqLen = promptLen + maxNewTokens;
470
+ const attentionMaskData = new BigInt64Array(maxSeqLen);
471
+ attentionMaskData.fill(1n);
472
+ let currentSeqLen = promptLen;
473
+
474
+ const nextInputIdData = new BigInt64Array(1);
475
+ const nextPositionIdData = new BigInt64Array(1);
476
+ const nextInputIdsTensor = new ort.Tensor('int64', nextInputIdData, [batch, 1]);
477
+ const nextPositionIdsTensor = new ort.Tensor('int64', nextPositionIdData, [batch, 1]);
478
+
479
+ let currentInputIds = new ort.Tensor('int64', BigInt64Array.from(promptTokens), [batch, promptLen]);
480
+ let currentAttentionMask = new ort.Tensor('int64', attentionMaskData.subarray(0, currentSeqLen), [batch, currentSeqLen]);
481
+ let currentPositionIds = new ort.Tensor('int64', BigInt64Array.from({ length: promptLen }, (_, i) => BigInt(i)), [batch, promptLen]);
482
+
483
+ const hiddenStatesBuffer = [];
484
+ let totalSamples = 0;
485
+ const targetChunkSize = 8;
486
+ let chunkCounter = targetChunkSize;
487
+ let firstChunk = true;
488
+
489
+ // Pipelining
490
+ let lastDecoderPromise = Promise.resolve();
491
+ let chunkBackboneTime = 0;
492
+
493
+ // We'll track start time for generation inside this batch
494
+ if (isFirstBatch) {
495
+ postMessage({ type: 'generation_started', data: { time: performance.now() } });
496
+ }
497
+
498
+ for (let i = 0; i < maxNewTokens; i++) {
499
+ if (!isGenerating) break;
500
+
501
+ // Yield Check (Optimization)
502
+ if (i % 4 === 0) {
503
+ await new Promise(resolve => setTimeout(resolve, 0));
504
+ }
505
+
506
+ const inputs = {
507
+ input_ids: currentInputIds,
508
+ attention_mask: currentAttentionMask,
509
+ position_ids: currentPositionIds,
510
+ ...pastKeyValues
511
+ };
512
+
513
+ const bbStart = performance.now();
514
+ const outputs = await backboneSession.run(inputs);
515
+ chunkBackboneTime += (performance.now() - bbStart);
516
+
517
+ const backboneNames = backboneSession.outputNames;
518
+ const logits = outputs[backboneNames[0]];
519
+ const lastHiddenState = outputs[backboneNames[backboneNames.length - 1]];
520
+
521
+ for (let j = 0; j < numLayers; j++) {
522
+ pastKeyValues[`past_key_values.${j}.key`] = outputs[backboneNames[1 + j * 2]];
523
+ pastKeyValues[`past_key_values.${j}.value`] = outputs[backboneNames[2 + j * 2]];
524
+ }
525
+
526
+ const nextTokenId = sample(logits, seenTokenMask);
527
+ const finished = (nextTokenId === 3n);
528
+ const nextTokenIdNum = Number(nextTokenId);
529
+ if (nextTokenIdNum >= 0 && nextTokenIdNum < vocabSize) seenTokenMask[nextTokenIdNum] = 1;
530
+
531
+ const seqLen = lastHiddenState.dims[1];
532
+ const hiddenDimSize = lastHiddenState.dims[2];
533
+ const lastTokenStateRaw = lastHiddenState.data.subarray((seqLen - 1) * hiddenDimSize, seqLen * hiddenDimSize);
534
+
535
+ let lastTokenState;
536
+ if (lastTokenStateRaw instanceof Uint16Array) {
537
+ lastTokenState = new Float32Array(hiddenDimSize);
538
+ for (let j = 0; j < hiddenDimSize; j++) {
539
+ lastTokenState[j] = fp16Lookup[lastTokenStateRaw[j]];
540
+ }
541
+ } else {
542
+ lastTokenState = new Float32Array(lastTokenStateRaw);
543
+ }
544
+
545
+ if (i > 0 && !finished) {
546
+ hiddenStatesBuffer.push(new Float32Array(lastTokenState));
547
+ if (hiddenStatesBuffer.length > 2 * RECEPTIVE_FIELD + targetChunkSize) {
548
+ hiddenStatesBuffer.splice(0, hiddenStatesBuffer.length - (2 * RECEPTIVE_FIELD + targetChunkSize));
549
+ }
550
+ }
551
+
552
+ // Decode Logic
553
+ if (finished || hiddenStatesBuffer.length >= RECEPTIVE_FIELD + targetChunkSize) {
554
+ if (finished || chunkCounter === targetChunkSize) {
555
+ const window = hiddenStatesBuffer.slice(-hiddenStatesBuffer.length);
556
+ const currentWindowSize = window.length;
557
+
558
+ const decoderInput = new Float32Array(512 * currentWindowSize);
559
+ for (let w = 0; w < currentWindowSize; w++) {
560
+ for (let d = 0; d < 512; d++) {
561
+ decoderInput[d * currentWindowSize + w] = window[w][d];
562
+ }
563
+ }
564
+
565
+ const isLast = finished;
566
+ const captureChunkCounter = chunkCounter;
567
+ const captureFirstChunk = firstChunk;
568
+ const captureBBTime = chunkBackboneTime;
569
+ chunkBackboneTime = 0;
570
+
571
+ // Send to decoder (in promise chain)
572
+ lastDecoderPromise = lastDecoderPromise.then(async () => {
573
+ const decStart = performance.now();
574
+ const decoderOutputs = await decoderSession.run({
575
+ [decoderSession.inputNames[0]]: new ort.Tensor('float32', decoderInput, [1, 512, currentWindowSize])
576
+ });
577
+ const decDuration = performance.now() - decStart;
578
+
579
+ const audio = decoderOutputs[decoderSession.outputNames[0]].data;
580
+ let audioChunk;
581
+ if (isLast) {
582
+ const startIdx = audio.length - (RECEPTIVE_FIELD + captureChunkCounter - 1) * TOKEN_SIZE + TOKEN_SIZE;
583
+ audioChunk = audio.subarray(startIdx);
584
+ } else {
585
+ const startIdx = audio.length - (RECEPTIVE_FIELD + targetChunkSize) * TOKEN_SIZE + TOKEN_SIZE;
586
+ const endIdx = audio.length - RECEPTIVE_FIELD * TOKEN_SIZE + TOKEN_SIZE;
587
+ audioChunk = audio.subarray(startIdx, endIdx);
588
+ }
589
+
590
+ // Post audio to main thread
591
+ postMessage({
592
+ type: 'audio_chunk',
593
+ data: audioChunk,
594
+ metrics: {
595
+ bbTime: captureBBTime,
596
+ decTime: decDuration,
597
+ chunkDuration: audioChunk.length / SAMPLE_RATE,
598
+ isFirst: captureFirstChunk && isFirstBatch
599
+ }
600
+ }, [audioChunk.buffer]); // Transferable
601
+ });
602
+
603
+ firstChunk = false;
604
+ chunkCounter = 0;
605
+ }
606
+ chunkCounter++;
607
+ }
608
+
609
+ if (finished) break;
610
+
611
+ nextInputIdData[0] = nextTokenId;
612
+ currentInputIds = nextInputIdsTensor;
613
+ currentSeqLen += 1;
614
+ currentAttentionMask = new ort.Tensor('int64', attentionMaskData.subarray(0, currentSeqLen), [1, currentSeqLen]);
615
+ nextPositionIdData[0] = BigInt(currentSeqLen - 1);
616
+ currentPositionIds = nextPositionIdsTensor;
617
+ }
618
+
619
+ await lastDecoderPromise;
620
+ return totalSamples;
621
+ }
622
+
623
+ function sample(logitsTensor, seenTokenMask) {
624
+ let rawData = logitsTensor.data;
625
+ const vocabSize = logitsTensor.dims[2];
626
+ const lastStepOffset = (logitsTensor.dims[1] - 1) * vocabSize;
627
+
628
+ let data;
629
+ if (rawData instanceof Uint16Array) {
630
+ data = new Float32Array(vocabSize);
631
+ for (let j = 0; j < vocabSize; j++) {
632
+ data[j] = fp16Lookup[rawData[lastStepOffset + j]];
633
+ }
634
+ } else {
635
+ data = rawData.subarray ? rawData.subarray(lastStepOffset) : rawData.slice(lastStepOffset);
636
+ }
637
+
638
+ const { temperature, topK, topP, repetitionPenalty } = samplingParams;
639
+ const useRepetitionPenalty = repetitionPenalty !== 1.0;
640
+ const invTemperature = 1.0 / temperature;
641
+
642
+ // Fast path: Top-K
643
+ const k = Math.min(topK, vocabSize);
644
+ if (k > 0 && k < vocabSize) {
645
+ if (!_topKIndices || _topKIndices.length !== k) {
646
+ _topKIndices = new Int32Array(k);
647
+ _topKScores = new Float32Array(k);
648
+ _topKExp = new Float64Array(k);
649
+ _topKOrder = Array.from({ length: k }, (_, i) => i);
650
+ }
651
+ const heapIndices = _topKIndices;
652
+ const heapScores = _topKScores;
653
+
654
+ let heapSize = 0;
655
+ for (let tokenId = 0; tokenId < vocabSize; tokenId++) {
656
+ let s = data[tokenId] * invTemperature;
657
+ if (useRepetitionPenalty && seenTokenMask[tokenId]) s = s < 0 ? (s * repetitionPenalty) : (s / repetitionPenalty);
658
+
659
+ if (heapSize < k) {
660
+ let pos = heapSize++;
661
+ while (pos > 0) {
662
+ const parent = (pos - 1) >> 1;
663
+ if (heapScores[parent] <= s) break;
664
+ heapScores[pos] = heapScores[parent];
665
+ heapIndices[pos] = heapIndices[parent];
666
+ pos = parent;
667
+ }
668
+ heapScores[pos] = s;
669
+ heapIndices[pos] = tokenId;
670
+ } else if (s > heapScores[0]) {
671
+ let pos = 0;
672
+ while (pos < (k >> 1)) {
673
+ let left = (pos << 1) + 1;
674
+ let right = left + 1;
675
+ let smallest = left;
676
+ if (right < k && heapScores[right] < heapScores[left]) smallest = right;
677
+ if (heapScores[smallest] >= s) break;
678
+ heapScores[pos] = heapScores[smallest];
679
+ heapIndices[pos] = heapIndices[smallest];
680
+ pos = smallest;
681
+ }
682
+ heapScores[pos] = s;
683
+ heapIndices[pos] = tokenId;
684
+ }
685
+ }
686
+
687
+ const expBuf = _topKExp;
688
+ const order = _topKOrder;
689
+ // Sort
690
+ for (let i = 0; i < k; i++) order[i] = i;
691
+ order.sort((a, b) => heapScores[b] - heapScores[a]);
692
+
693
+ const maxScore = heapScores[order[0]];
694
+ let sumExp = 0;
695
+ for (let i = 0; i < k; i++) {
696
+ const w = Math.exp(heapScores[order[i]] - maxScore);
697
+ expBuf[i] = w;
698
+ sumExp += w;
699
+ }
700
+
701
+ let keep = k;
702
+ if (topP < 1.0) {
703
+ const threshold = topP * sumExp;
704
+ let cumulative = 0;
705
+ for (let i = 0; i < k; i++) {
706
+ cumulative += expBuf[i];
707
+ if (cumulative >= threshold) { keep = i + 1; break; }
708
+ }
709
+ }
710
+
711
+ let r = Math.random() * sumExp; // Technically simple top-k sample logic for now
712
+ // Correct Top-P sampling needs re-normalization of sumExp over 'keep'
713
+ // For simplicity/speed in this giant block, let's just sample from weighted top-k
714
+ r = Math.random() * ((topP < 1.0) ? _topKExp.slice(0, keep).reduce((a, b) => a + b, 0) : sumExp);
715
+
716
+ for (let i = 0; i < keep; i++) {
717
+ r -= expBuf[i];
718
+ if (r <= 0) return BigInt(heapIndices[order[i]]);
719
+ }
720
+ return BigInt(heapIndices[order[0]]);
721
+ }
722
+
723
+ return 0n; // Fallback
724
+ }
onnx-streaming.js ADDED
@@ -0,0 +1,380 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import { PCMPlayerWorklet as PCMPlayer } from './PCMPlayerWorklet.js';
3
+
4
+ // Configuration
5
+ const SAMPLE_RATE = 32000;
6
+
7
+ export class SopranoONNXStreaming {
8
+ constructor() {
9
+ this.worker = null;
10
+ this.player = null;
11
+ this.audioContext = null;
12
+ this.isGenerating = false;
13
+ this.isWorkerReady = false;
14
+ this.pendingGeneration = false;
15
+
16
+ // Metrics State
17
+ this.generationStartTime = 0;
18
+ this.lastChunkFinishTime = 0;
19
+ this.rtfMovingAverage = 0;
20
+
21
+ this.elements = {
22
+ textInput: document.getElementById('text-input'),
23
+ generateBtn: document.getElementById('generate-btn'),
24
+ stopBtn: document.getElementById('stop-btn'),
25
+ statusText: document.getElementById('stat-status'),
26
+ statusIndicator: document.getElementById('status-indicator'),
27
+ modelStatusIcon: document.querySelector('#model-status .model-status__dot'),
28
+ modelStatusText: document.querySelector('#model-status .model-status__text'),
29
+ btnLoader: document.getElementById('btn-loader'),
30
+ statTTFB: document.getElementById('stat-ttfb'),
31
+ statRTFx: document.getElementById('stat-rtfx'),
32
+ ttfbBar: document.getElementById('ttfb-bar'),
33
+ rtfxContext: document.getElementById('rtfx-context')
34
+ };
35
+
36
+ this.attachEventListeners();
37
+ this.init();
38
+ this.setupVisualization();
39
+ }
40
+
41
+ async init() {
42
+ console.log('Soprano v1.1 - Worker Edition');
43
+ this.updateStatus('Initializing Worker...', 'running');
44
+
45
+ // Initial button state
46
+ this.elements.generateBtn.disabled = true;
47
+ const btnText = this.elements.generateBtn.querySelector('.btn__text');
48
+ if (btnText) btnText.textContent = 'Loading Models...';
49
+ this.elements.btnLoader.style.display = 'block';
50
+
51
+ // Initialize Audio Context and Player
52
+ this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
53
+ sampleRate: SAMPLE_RATE,
54
+ latencyHint: 'interactive'
55
+ });
56
+
57
+ await this.audioContext.audioWorklet.addModule('PCMPlayerWorklet.js');
58
+ this.player = new PCMPlayer(this.audioContext);
59
+
60
+ // Initialize Worker
61
+ console.log('Spawning Inference Worker (Classic)...');
62
+ this.worker = new Worker('./inference-worker.js');
63
+
64
+ this.worker.onmessage = (e) => {
65
+ const { type, data, error, status, state, metrics, text } = e.data;
66
+
67
+ switch (type) {
68
+ case 'status':
69
+ this.updateStatus(status, state);
70
+ break;
71
+ case 'model_status':
72
+ this.updateModelStatus(status, text);
73
+ break;
74
+ case 'loaded':
75
+ console.log('Worker confirmed models loaded.');
76
+ this.isWorkerReady = true;
77
+ this.elements.generateBtn.disabled = false;
78
+ this.elements.btnLoader.style.display = 'none';
79
+ const loadedBtnText = this.elements.generateBtn.querySelector('.btn__text');
80
+ if (loadedBtnText) loadedBtnText.textContent = 'Generate Audio';
81
+
82
+ if (this.pendingGeneration) {
83
+ this.pendingGeneration = false;
84
+ this.startGeneration();
85
+ }
86
+ break;
87
+ case 'generation_started':
88
+ // The main thread already sets this in startGeneration for better precision
89
+ break;
90
+ case 'audio_chunk':
91
+ this.handleAudioChunk(data, metrics);
92
+ break;
93
+ case 'stream_ended':
94
+ this.handleStreamEnd();
95
+ break;
96
+ case 'error':
97
+ console.error('Worker Error:', error);
98
+ this.updateStatus(`Error: ${error}`, 'error');
99
+ this.resetUI();
100
+ break;
101
+ }
102
+ };
103
+
104
+ // Trigger Model Load in Worker
105
+ this.worker.postMessage({ type: 'load' });
106
+ }
107
+
108
+ attachEventListeners() {
109
+ this.elements.generateBtn.addEventListener('click', () => this.startGeneration());
110
+ this.elements.stopBtn.addEventListener('click', () => this.stopGeneration());
111
+
112
+ // Sample buttons
113
+ document.querySelectorAll('.sample-btn').forEach(btn => {
114
+ btn.addEventListener('click', () => {
115
+ this.elements.textInput.value = btn.getAttribute('data-text');
116
+ // Trigger input event to update character count
117
+ this.elements.textInput.dispatchEvent(new Event('input'));
118
+ });
119
+ });
120
+
121
+ // Character count
122
+ this.elements.textInput.addEventListener('input', () => {
123
+ const count = this.elements.textInput.value.length;
124
+ const countEl = document.getElementById('char-count');
125
+ if (countEl) countEl.textContent = count;
126
+ });
127
+
128
+ this.elements.textInput.addEventListener('keydown', (e) => {
129
+ if ((e.ctrlKey || e.metaKey) && e.key === 'Enter') {
130
+ this.startGeneration();
131
+ }
132
+ });
133
+ }
134
+
135
+ async startGeneration() {
136
+ this.generationStartTime = performance.now();
137
+ try {
138
+ if (!this.isWorkerReady) {
139
+ this.pendingGeneration = true;
140
+ const btnText = this.elements.generateBtn.querySelector('.btn__text');
141
+ if (btnText) btnText.textContent = 'Starting soon...';
142
+ return;
143
+ }
144
+
145
+ if (this.isGenerating) return;
146
+
147
+ if (this.audioContext && this.audioContext.state === 'suspended') {
148
+ await this.audioContext.resume();
149
+ }
150
+
151
+ const text = this.elements.textInput.value.trim();
152
+ if (!text) return;
153
+
154
+ this.isGenerating = true;
155
+ this.elements.generateBtn.disabled = true;
156
+ this.elements.generateBtn.classList.add('btn--generating');
157
+ this.elements.stopBtn.disabled = false;
158
+
159
+ if (this.player) this.player.reset();
160
+
161
+ // Reset metrics
162
+ this.elements.statTTFB.textContent = '--';
163
+ this.elements.statRTFx.textContent = '--';
164
+ if (this.elements.ttfbBar) this.elements.ttfbBar.style.width = '0%';
165
+
166
+ this.rtfMovingAverage = 0;
167
+
168
+ this.worker.postMessage({ type: 'generate', data: { text } });
169
+ } catch (err) {
170
+ console.error('Error in startGeneration:', err);
171
+ this.updateStatus(`Error: ${err.message}`, 'error');
172
+ this.isGenerating = false;
173
+ this.resetUI();
174
+ }
175
+ }
176
+
177
+ stopGeneration() {
178
+ if (!this.isGenerating) return;
179
+ this.worker.postMessage({ type: 'stop' });
180
+ // Handle stop immediately in UI
181
+ this.handleStreamEnd();
182
+ }
183
+
184
+ handleAudioChunk(audioData, metrics) {
185
+ if (!this.isGenerating) return;
186
+
187
+ // console.log(`[DEBUG] Received audio chunk: ${audioData.length} samples`);
188
+
189
+ // Play audio
190
+ this.player.playAudio(audioData);
191
+
192
+ // Update Metrics
193
+ requestAnimationFrame(() => {
194
+ // TTFB (only on first chunk)
195
+ if (metrics.isFirst) {
196
+ const ttfb = performance.now() - this.generationStartTime;
197
+ this.updateTTFB(ttfb);
198
+ }
199
+
200
+ // Real-Time Factor (Output Arrival Rate)
201
+ const now = performance.now();
202
+ const timeSinceLastChunk = (now - this.lastChunkFinishTime) / 1000;
203
+ this.lastChunkFinishTime = now;
204
+
205
+ if (timeSinceLastChunk > 0) {
206
+ const chunkDurationSec = metrics.chunkDuration;
207
+ const instantaneousRTF = chunkDurationSec / timeSinceLastChunk;
208
+
209
+ if (this.rtfMovingAverage === 0) {
210
+ this.rtfMovingAverage = instantaneousRTF;
211
+ } else {
212
+ this.rtfMovingAverage = this.rtfMovingAverage * 0.8 + instantaneousRTF * 0.2;
213
+ }
214
+ this.updateRTFx(this.rtfMovingAverage);
215
+ }
216
+ });
217
+ }
218
+
219
+ handleStreamEnd() {
220
+ if (this.player.notifyStreamEnded) this.player.notifyStreamEnded();
221
+ this.resetUI();
222
+ this.isGenerating = false;
223
+ }
224
+
225
+ resetUI() {
226
+ this.elements.generateBtn.disabled = false;
227
+ this.elements.generateBtn.classList.remove('btn--generating');
228
+ const btnText = this.elements.generateBtn.querySelector('.btn__text');
229
+ if (btnText) btnText.textContent = 'Generate Audio';
230
+ this.elements.stopBtn.disabled = true;
231
+ }
232
+
233
+ updateStatus(text, state) {
234
+ this.elements.statusText.textContent = text;
235
+ this.elements.statusIndicator.className = `status-indicator status-${state}`;
236
+ }
237
+
238
+ updateModelStatus(state, text) {
239
+ this.elements.modelStatusText.textContent = text;
240
+ this.elements.modelStatusIcon.className = `status-icon status-${state}`;
241
+ }
242
+
243
+ updateTTFB(ms) {
244
+ this.elements.statTTFB.textContent = Math.round(ms);
245
+ const percentage = Math.min((ms / 2000) * 100, 100);
246
+ this.elements.ttfbBar.style.width = `${percentage}%`;
247
+ this.elements.ttfbBar.style.background = ms < 500 ? '#10b981' : ms < 1000 ? '#f59e0b' : '#ef4444';
248
+ }
249
+
250
+ updateRTFx(val) {
251
+ this.elements.statRTFx.textContent = `${val.toFixed(2)}x`;
252
+ this.elements.rtfxContext.style.color = val >= 1.0 ? '#10b981' : '#ef4444';
253
+ }
254
+
255
+ // -------------------------------------------------------------------------
256
+ // Visualization
257
+ // -------------------------------------------------------------------------
258
+ setupVisualization() {
259
+ this.waveformCanvas = document.getElementById('visualizer-waveform');
260
+ this.barsCanvas = document.getElementById('visualizer-bars');
261
+ if (!this.waveformCanvas || !this.barsCanvas) return;
262
+
263
+ this.waveformCtx = this.waveformCanvas.getContext('2d');
264
+ this.barsCtx = this.barsCanvas.getContext('2d');
265
+
266
+ // Initial resize
267
+ this.resizeCanvases();
268
+ window.addEventListener('resize', () => this.resizeCanvases());
269
+
270
+ // Start animation loop
271
+ requestAnimationFrame(() => this.draw());
272
+ }
273
+
274
+ resizeCanvases() {
275
+ if (!this.waveformCanvas || !this.barsCanvas) return;
276
+
277
+ const parent = this.waveformCanvas.parentElement;
278
+ const width = parent.clientWidth;
279
+ const height = parent.clientHeight;
280
+
281
+ const dpr = window.devicePixelRatio || 1;
282
+
283
+ [this.waveformCanvas, this.barsCanvas].forEach(canvas => {
284
+ canvas.width = width * dpr;
285
+ canvas.height = height * dpr;
286
+ canvas.style.width = `${width}px`;
287
+ canvas.style.height = `${height}px`;
288
+ const ctx = canvas.getContext('2d');
289
+ ctx.scale(dpr, dpr);
290
+ });
291
+ }
292
+
293
+ draw() {
294
+ requestAnimationFrame(() => this.draw());
295
+
296
+ if (!this.player || !this.player.analyser) return;
297
+
298
+ const bufferLength = this.player.analyser.frequencyBinCount;
299
+ const dataArray = new Uint8Array(bufferLength);
300
+
301
+ // Draw Bars (Frequency)
302
+ this.player.analyser.getByteFrequencyData(dataArray);
303
+ this.drawBars(dataArray);
304
+
305
+ // Draw Waveform (Time Domain)
306
+ this.player.analyser.getByteTimeDomainData(dataArray);
307
+ this.drawWaveform(dataArray);
308
+ }
309
+
310
+ drawWaveform(dataArray) {
311
+ const ctx = this.waveformCtx;
312
+ const canvas = this.waveformCanvas;
313
+ const width = canvas.width / (window.devicePixelRatio || 1);
314
+ const height = canvas.height / (window.devicePixelRatio || 1);
315
+
316
+ ctx.clearRect(0, 0, width, height);
317
+ ctx.lineWidth = 2;
318
+ ctx.strokeStyle = '#3b82f6'; // Blue primary
319
+ ctx.beginPath();
320
+
321
+ const sliceWidth = width / dataArray.length;
322
+ let x = 0;
323
+
324
+ for (let i = 0; i < dataArray.length; i++) {
325
+ const v = dataArray[i] / 128.0;
326
+ const y = (v * height) / 2;
327
+
328
+ if (i === 0) ctx.moveTo(x, y);
329
+ else ctx.lineTo(x, y);
330
+
331
+ x += sliceWidth;
332
+ }
333
+
334
+ ctx.lineTo(width, height / 2);
335
+ ctx.stroke();
336
+ }
337
+
338
+ drawBars(dataArray) {
339
+ const ctx = this.barsCtx;
340
+ const canvas = this.barsCanvas;
341
+ const width = canvas.width / (window.devicePixelRatio || 1);
342
+ const height = canvas.height / (window.devicePixelRatio || 1);
343
+
344
+ ctx.clearRect(0, 0, width, height);
345
+
346
+ const barCount = 120; // Number of bars to display
347
+ const barWidth = (width / barCount);
348
+ const samplesPerBar = Math.floor(dataArray.length / barCount);
349
+
350
+ for (let i = 0; i < barCount; i++) {
351
+ let sum = 0;
352
+ for (let j = 0; j < samplesPerBar; j++) {
353
+ sum += dataArray[i * samplesPerBar + j];
354
+ }
355
+ const average = sum / samplesPerBar;
356
+ const barHeight = (average / 255) * height * 0.8;
357
+
358
+ // Gradient for bar
359
+ const gradient = ctx.createLinearGradient(0, height, 0, height - barHeight);
360
+ gradient.addColorStop(0, '#3b82f644');
361
+ gradient.addColorStop(1, '#8b5cf6cc');
362
+
363
+ ctx.fillStyle = gradient;
364
+
365
+ // Rounded bars
366
+ const x = i * barWidth;
367
+ const y = height - barHeight;
368
+ const radius = barWidth / 2;
369
+
370
+ ctx.beginPath();
371
+ ctx.roundRect(x + 1, y, barWidth - 2, barHeight, [2, 2, 0, 0]);
372
+ ctx.fill();
373
+ }
374
+ }
375
+ }
376
+
377
+ // Start the app
378
+ document.addEventListener('DOMContentLoaded', () => {
379
+ window.app = new SopranoONNXStreaming();
380
+ });
onnx/soprano_backbone_kv_fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9de7d23ba53431bfb3a3988cf11c7661865fde686a5d881eec3b3763a9a34596
3
+ size 169082390
onnx/soprano_backbone_kv_fp32.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4fc5e079488518a53caf55922f27bb3207fceb8f438413d58ec1876b1f28cee
3
+ size 319548418
onnx/soprano_backbone_kv_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4b7b8406185396f14a0ee89e041e469edaeb7155efe7e5b6def57ff9acd15e4
3
+ size 80938986
onnx/soprano_decoder_fp32.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f6c3cd97c794d6c58c8edf4e3ce157c6465895098d062c5501d3170e837d947
3
+ size 262812
onnx/soprano_decoder_fp32.onnx.data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02ba358c807c31966ec3f41ca1ceb9b5b82abf78786551b6067d7979e7dbf7fd
3
+ size 121503744
onnx/soprano_decoder_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01f9bbe359a85e91fd47b7601d6030f4940a40d6c3677a58a7be82f57f6da11b
3
+ size 30793092
special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "pad_token": {
3
+ "content": "[STOP]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ }
9
+ }
style.css ADDED
@@ -0,0 +1,978 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* ============================================
2
+ SOPRANO TTS DEMO - NEURAL OBSERVATORY THEME
3
+ ============================================ */
4
+
5
+ /* Design Tokens */
6
+ :root {
7
+ /* Colors - Space & Neural */
8
+ --color-void: #050810;
9
+ --color-deep: #0a0f1a;
10
+ --color-surface: rgba(15, 23, 42, 0.85);
11
+ --color-surface-elevated: rgba(30, 41, 59, 0.9);
12
+ --color-glass-border: rgba(255, 255, 255, 0.08);
13
+
14
+ /* Gradient Spectrum */
15
+ --gradient-primary: linear-gradient(135deg, #3b82f6 0%, #8b5cf6 100%);
16
+ --gradient-primary-vivid: linear-gradient(135deg, #60a5fa 0%, #a78bfa 100%);
17
+ --gradient-glow: linear-gradient(135deg, rgba(59, 130, 246, 0.4) 0%, rgba(139, 92, 246, 0.4) 100%);
18
+
19
+ /* Accent Colors */
20
+ --color-blue: #3b82f6;
21
+ --color-violet: #8b5cf6;
22
+ --color-cyan: #22d3ee;
23
+ --color-success: #10b981;
24
+ --color-warning: #f59e0b;
25
+ --color-error: #ef4444;
26
+
27
+ /* Text */
28
+ --color-text-primary: #f1f5f9;
29
+ --color-text-secondary: #94a3b8;
30
+ --color-text-muted: #64748b;
31
+
32
+ /* Typography */
33
+ --font-display: 'Syne', sans-serif;
34
+ --font-body: 'DM Sans', sans-serif;
35
+ --font-mono: 'Space Mono', monospace;
36
+
37
+ /* Spacing */
38
+ --space-1: 0.25rem;
39
+ --space-2: 0.5rem;
40
+ --space-3: 0.75rem;
41
+ --space-4: 1rem;
42
+ --space-5: 1.25rem;
43
+ --space-6: 1.5rem;
44
+ --space-8: 2rem;
45
+ --space-10: 2.5rem;
46
+ --space-12: 3rem;
47
+ --space-16: 4rem;
48
+
49
+ /* Radii */
50
+ --radius-sm: 8px;
51
+ --radius-md: 12px;
52
+ --radius-lg: 16px;
53
+ --radius-xl: 24px;
54
+ --radius-full: 9999px;
55
+
56
+ /* Shadows */
57
+ --shadow-sm: 0 2px 8px rgba(0, 0, 0, 0.3);
58
+ --shadow-md: 0 4px 16px rgba(0, 0, 0, 0.4);
59
+ --shadow-lg: 0 8px 32px rgba(0, 0, 0, 0.5);
60
+ --shadow-glow-blue: 0 0 40px rgba(59, 130, 246, 0.3);
61
+ --shadow-glow-violet: 0 0 40px rgba(139, 92, 246, 0.3);
62
+
63
+ /* Transitions */
64
+ --ease-out: cubic-bezier(0.16, 1, 0.3, 1);
65
+ --ease-bounce: cubic-bezier(0.34, 1.56, 0.64, 1);
66
+ --duration-fast: 150ms;
67
+ --duration-base: 250ms;
68
+ --duration-slow: 400ms;
69
+ }
70
+
71
+ /* Reset */
72
+ *, *::before, *::after {
73
+ box-sizing: border-box;
74
+ margin: 0;
75
+ padding: 0;
76
+ }
77
+
78
+ html {
79
+ font-size: 16px;
80
+ -webkit-font-smoothing: antialiased;
81
+ -moz-osx-font-smoothing: grayscale;
82
+ }
83
+
84
+ body {
85
+ font-family: var(--font-body);
86
+ background: var(--color-void);
87
+ color: var(--color-text-primary);
88
+ min-height: 100vh;
89
+ overflow-x: hidden;
90
+ line-height: 1.6;
91
+ }
92
+
93
+ /* Screen reader only */
94
+ .sr-only {
95
+ position: absolute;
96
+ width: 1px;
97
+ height: 1px;
98
+ padding: 0;
99
+ margin: -1px;
100
+ overflow: hidden;
101
+ clip: rect(0, 0, 0, 0);
102
+ white-space: nowrap;
103
+ border: 0;
104
+ }
105
+
106
+ /* ============================================
107
+ AMBIENT BACKGROUND
108
+ ============================================ */
109
+ .ambient-layer {
110
+ position: fixed;
111
+ inset: 0;
112
+ pointer-events: none;
113
+ z-index: 0;
114
+ overflow: hidden;
115
+ }
116
+
117
+ .orb {
118
+ position: absolute;
119
+ border-radius: 50%;
120
+ filter: blur(80px);
121
+ opacity: 0.6;
122
+ animation: orb-float 20s ease-in-out infinite;
123
+ }
124
+
125
+ .orb--primary {
126
+ width: 500px;
127
+ height: 500px;
128
+ background: radial-gradient(circle, rgba(59, 130, 246, 0.35) 0%, transparent 70%);
129
+ top: -15%;
130
+ right: -10%;
131
+ animation-delay: 0s;
132
+ }
133
+
134
+ .orb--secondary {
135
+ width: 400px;
136
+ height: 400px;
137
+ background: radial-gradient(circle, rgba(139, 92, 246, 0.3) 0%, transparent 70%);
138
+ bottom: -10%;
139
+ left: -10%;
140
+ animation-delay: -7s;
141
+ }
142
+
143
+ .orb--tertiary {
144
+ width: 300px;
145
+ height: 300px;
146
+ background: radial-gradient(circle, rgba(34, 211, 238, 0.2) 0%, transparent 70%);
147
+ top: 40%;
148
+ left: 50%;
149
+ transform: translateX(-50%);
150
+ animation-delay: -14s;
151
+ }
152
+
153
+ @keyframes orb-float {
154
+ 0%, 100% {
155
+ transform: translate(0, 0) scale(1);
156
+ }
157
+ 25% {
158
+ transform: translate(30px, -40px) scale(1.05);
159
+ }
160
+ 50% {
161
+ transform: translate(-20px, 20px) scale(0.95);
162
+ }
163
+ 75% {
164
+ transform: translate(40px, 30px) scale(1.02);
165
+ }
166
+ }
167
+
168
+ .grid-overlay {
169
+ position: absolute;
170
+ inset: 0;
171
+ background-image:
172
+ linear-gradient(rgba(59, 130, 246, 0.03) 1px, transparent 1px),
173
+ linear-gradient(90deg, rgba(59, 130, 246, 0.03) 1px, transparent 1px);
174
+ background-size: 60px 60px;
175
+ mask-image: radial-gradient(ellipse at center, black 0%, transparent 70%);
176
+ -webkit-mask-image: radial-gradient(ellipse at center, black 0%, transparent 70%);
177
+ }
178
+
179
+ /* ============================================
180
+ APP SHELL
181
+ ============================================ */
182
+ .app-shell {
183
+ position: relative;
184
+ z-index: 1;
185
+ max-width: 960px;
186
+ margin: 0 auto;
187
+ padding: var(--space-8) var(--space-6);
188
+ min-height: 100vh;
189
+ display: flex;
190
+ flex-direction: column;
191
+ animation: shell-enter 0.8s var(--ease-out) both;
192
+ }
193
+
194
+ @keyframes shell-enter {
195
+ from {
196
+ opacity: 0;
197
+ transform: translateY(30px);
198
+ }
199
+ to {
200
+ opacity: 1;
201
+ transform: translateY(0);
202
+ }
203
+ }
204
+
205
+ /* ============================================
206
+ HERO HEADER
207
+ ============================================ */
208
+ .hero {
209
+ text-align: center;
210
+ margin-bottom: var(--space-10);
211
+ }
212
+
213
+ .hero__brand {
214
+ display: flex;
215
+ align-items: center;
216
+ justify-content: center;
217
+ gap: var(--space-4);
218
+ margin-bottom: var(--space-4);
219
+ flex-wrap: wrap;
220
+ }
221
+
222
+ .logo {
223
+ display: flex;
224
+ align-items: center;
225
+ gap: var(--space-3);
226
+ }
227
+
228
+ .logo__icon {
229
+ width: 40px;
230
+ height: 40px;
231
+ filter: drop-shadow(0 0 12px rgba(139, 92, 246, 0.5));
232
+ }
233
+
234
+ .logo__text {
235
+ font-family: var(--font-display);
236
+ font-size: 2.5rem;
237
+ font-weight: 800;
238
+ background: var(--gradient-primary);
239
+ -webkit-background-clip: text;
240
+ -webkit-text-fill-color: transparent;
241
+ background-clip: text;
242
+ letter-spacing: -0.03em;
243
+ }
244
+
245
+ .badge {
246
+ display: inline-flex;
247
+ align-items: center;
248
+ padding: var(--space-1) var(--space-3);
249
+ background: var(--color-surface);
250
+ border: 1px solid var(--color-glass-border);
251
+ border-radius: var(--radius-full);
252
+ font-family: var(--font-mono);
253
+ font-size: 0.75rem;
254
+ font-weight: 700;
255
+ color: var(--color-text-secondary);
256
+ letter-spacing: 0.05em;
257
+ text-transform: uppercase;
258
+ }
259
+
260
+ .hero__tagline {
261
+ font-size: 1.1rem;
262
+ color: var(--color-text-secondary);
263
+ max-width: 500px;
264
+ margin: 0 auto;
265
+ }
266
+
267
+ /* ============================================
268
+ DEVICE SELECTION
269
+ ============================================ */
270
+ .device-section {
271
+ margin-bottom: var(--space-8);
272
+ }
273
+
274
+ .device-cards {
275
+ display: grid;
276
+ grid-template-columns: repeat(2, 1fr);
277
+ gap: var(--space-4);
278
+ margin-bottom: var(--space-4);
279
+ }
280
+
281
+ .device-card {
282
+ position: relative;
283
+ display: flex;
284
+ align-items: center;
285
+ gap: var(--space-4);
286
+ padding: var(--space-5);
287
+ background: var(--color-surface);
288
+ border: 2px solid transparent;
289
+ border-radius: var(--radius-lg);
290
+ cursor: pointer;
291
+ transition: all var(--duration-base) var(--ease-out);
292
+ text-align: left;
293
+ color: var(--color-text-primary);
294
+ font-family: inherit;
295
+ }
296
+
297
+ .device-card::before {
298
+ content: '';
299
+ position: absolute;
300
+ inset: -2px;
301
+ border-radius: inherit;
302
+ background: var(--gradient-primary);
303
+ opacity: 0;
304
+ z-index: -1;
305
+ transition: opacity var(--duration-base) var(--ease-out);
306
+ }
307
+
308
+ .device-card:hover {
309
+ background: var(--color-surface-elevated);
310
+ transform: translateY(-2px);
311
+ }
312
+
313
+ .device-card:focus-visible {
314
+ outline: 2px solid var(--color-blue);
315
+ outline-offset: 2px;
316
+ }
317
+
318
+ .device-card[aria-checked="true"] {
319
+ border-color: transparent;
320
+ background: rgba(59, 130, 246, 0.1);
321
+ }
322
+
323
+ .device-card[aria-checked="true"]::before {
324
+ opacity: 1;
325
+ }
326
+
327
+ .device-card[aria-checked="true"] .device-card__check {
328
+ opacity: 1;
329
+ transform: scale(1);
330
+ }
331
+
332
+ .device-card[aria-disabled="true"] {
333
+ opacity: 0.4;
334
+ cursor: not-allowed;
335
+ }
336
+
337
+ .device-card[aria-disabled="true"]:hover {
338
+ transform: none;
339
+ }
340
+
341
+ .device-card__icon {
342
+ width: 48px;
343
+ height: 48px;
344
+ display: flex;
345
+ align-items: center;
346
+ justify-content: center;
347
+ border-radius: var(--radius-md);
348
+ background: var(--color-surface-elevated);
349
+ color: var(--color-text-secondary);
350
+ flex-shrink: 0;
351
+ }
352
+
353
+ .device-card__icon svg {
354
+ width: 28px;
355
+ height: 28px;
356
+ }
357
+
358
+ .device-card[aria-checked="true"] .device-card__icon {
359
+ background: var(--gradient-primary);
360
+ color: white;
361
+ }
362
+
363
+ .device-card__content {
364
+ flex: 1;
365
+ display: flex;
366
+ flex-direction: column;
367
+ gap: var(--space-1);
368
+ }
369
+
370
+ .device-card__name {
371
+ font-family: var(--font-display);
372
+ font-size: 1.1rem;
373
+ font-weight: 600;
374
+ }
375
+
376
+ .device-card__sub {
377
+ font-size: 0.85rem;
378
+ color: var(--color-text-secondary);
379
+ }
380
+
381
+ .device-card__speed {
382
+ display: inline-flex;
383
+ align-items: center;
384
+ padding: 2px 8px;
385
+ background: var(--gradient-primary);
386
+ border-radius: var(--radius-full);
387
+ font-size: 0.7rem;
388
+ font-weight: 600;
389
+ color: white;
390
+ width: fit-content;
391
+ margin-top: var(--space-1);
392
+ }
393
+
394
+ .device-card__check {
395
+ width: 24px;
396
+ height: 24px;
397
+ color: var(--color-success);
398
+ opacity: 0;
399
+ transform: scale(0.8);
400
+ transition: all var(--duration-base) var(--ease-bounce);
401
+ }
402
+
403
+ /* GPU Banner */
404
+ .gpu-banner {
405
+ display: flex;
406
+ align-items: center;
407
+ gap: var(--space-3);
408
+ padding: var(--space-3) var(--space-4);
409
+ background: var(--color-surface);
410
+ border: 1px solid var(--color-glass-border);
411
+ border-radius: var(--radius-md);
412
+ font-size: 0.9rem;
413
+ }
414
+
415
+ .gpu-banner--available {
416
+ border-color: rgba(16, 185, 129, 0.3);
417
+ background: rgba(16, 185, 129, 0.08);
418
+ }
419
+
420
+ .gpu-banner--unavailable {
421
+ border-color: rgba(245, 158, 11, 0.3);
422
+ background: rgba(245, 158, 11, 0.08);
423
+ }
424
+
425
+ .gpu-banner__icon {
426
+ width: 24px;
427
+ height: 24px;
428
+ flex-shrink: 0;
429
+ }
430
+
431
+ .gpu-banner--available .gpu-banner__icon {
432
+ color: var(--color-success);
433
+ }
434
+
435
+ .gpu-banner--unavailable .gpu-banner__icon {
436
+ color: var(--color-warning);
437
+ }
438
+
439
+ .gpu-banner__content {
440
+ display: flex;
441
+ flex-direction: column;
442
+ gap: 2px;
443
+ }
444
+
445
+ .gpu-banner__content strong {
446
+ color: var(--color-text-primary);
447
+ }
448
+
449
+ .gpu-banner__content span {
450
+ color: var(--color-text-secondary);
451
+ font-size: 0.85rem;
452
+ }
453
+
454
+ /* ============================================
455
+ INPUT SECTION
456
+ ============================================ */
457
+ .input-section {
458
+ margin-bottom: var(--space-8);
459
+ }
460
+
461
+ .textarea-wrap {
462
+ position: relative;
463
+ margin-bottom: var(--space-4);
464
+ }
465
+
466
+ textarea {
467
+ width: 100%;
468
+ min-height: 120px;
469
+ padding: var(--space-4);
470
+ padding-bottom: var(--space-8);
471
+ background: var(--color-surface);
472
+ border: 1px solid var(--color-glass-border);
473
+ border-radius: var(--radius-lg);
474
+ color: var(--color-text-primary);
475
+ font-family: var(--font-body);
476
+ font-size: 1rem;
477
+ line-height: 1.6;
478
+ resize: vertical;
479
+ transition: all var(--duration-base) var(--ease-out);
480
+ }
481
+
482
+ textarea::placeholder {
483
+ color: var(--color-text-muted);
484
+ }
485
+
486
+ textarea:focus {
487
+ outline: none;
488
+ border-color: var(--color-blue);
489
+ box-shadow: 0 0 0 3px rgba(59, 130, 246, 0.15);
490
+ }
491
+
492
+ .textarea-meta {
493
+ position: absolute;
494
+ bottom: var(--space-3);
495
+ right: var(--space-4);
496
+ display: flex;
497
+ align-items: center;
498
+ gap: var(--space-4);
499
+ }
500
+
501
+ .char-count {
502
+ font-family: var(--font-mono);
503
+ font-size: 0.75rem;
504
+ color: var(--color-text-muted);
505
+ }
506
+
507
+ /* Sample Texts */
508
+ .sample-texts {
509
+ display: flex;
510
+ align-items: center;
511
+ flex-wrap: wrap;
512
+ gap: var(--space-2);
513
+ margin-bottom: var(--space-4);
514
+ }
515
+
516
+ .sample-texts__label {
517
+ font-size: 0.85rem;
518
+ color: var(--color-text-muted);
519
+ }
520
+
521
+ .sample-btn {
522
+ padding: var(--space-1) var(--space-3);
523
+ background: var(--color-surface);
524
+ border: 1px solid var(--color-glass-border);
525
+ border-radius: var(--radius-full);
526
+ color: var(--color-text-secondary);
527
+ font-family: var(--font-body);
528
+ font-size: 0.8rem;
529
+ cursor: pointer;
530
+ transition: all var(--duration-fast) var(--ease-out);
531
+ }
532
+
533
+ .sample-btn:hover {
534
+ background: var(--color-surface-elevated);
535
+ color: var(--color-text-primary);
536
+ border-color: rgba(255, 255, 255, 0.15);
537
+ }
538
+
539
+ .sample-btn:active {
540
+ transform: scale(0.97);
541
+ }
542
+
543
+ /* Controls */
544
+ .controls {
545
+ display: flex;
546
+ gap: var(--space-3);
547
+ }
548
+
549
+ .btn {
550
+ display: inline-flex;
551
+ align-items: center;
552
+ justify-content: center;
553
+ gap: var(--space-2);
554
+ padding: var(--space-4) var(--space-6);
555
+ border-radius: var(--radius-md);
556
+ font-family: var(--font-body);
557
+ font-size: 1rem;
558
+ font-weight: 600;
559
+ cursor: pointer;
560
+ border: none;
561
+ transition: all var(--duration-base) var(--ease-out);
562
+ }
563
+
564
+ .btn__icon {
565
+ width: 18px;
566
+ height: 18px;
567
+ }
568
+
569
+ .btn--primary {
570
+ flex: 1;
571
+ position: relative;
572
+ background: var(--gradient-primary);
573
+ color: white;
574
+ overflow: hidden;
575
+ }
576
+
577
+ .btn--primary::before {
578
+ content: '';
579
+ position: absolute;
580
+ inset: 0;
581
+ background: linear-gradient(135deg, rgba(255,255,255,0.2) 0%, transparent 50%);
582
+ opacity: 0;
583
+ transition: opacity var(--duration-fast);
584
+ }
585
+
586
+ .btn--primary:hover:not(:disabled)::before {
587
+ opacity: 1;
588
+ }
589
+
590
+ .btn--primary:hover:not(:disabled) {
591
+ transform: translateY(-2px);
592
+ box-shadow: var(--shadow-glow-blue);
593
+ }
594
+
595
+ .btn--primary:active:not(:disabled) {
596
+ transform: translateY(0);
597
+ }
598
+
599
+ .btn--primary.btn--generating {
600
+ animation: pulse-glow 2s ease-in-out infinite;
601
+ }
602
+
603
+ @keyframes pulse-glow {
604
+ 0%, 100% {
605
+ box-shadow: 0 0 0 0 rgba(59, 130, 246, 0.4);
606
+ }
607
+ 50% {
608
+ box-shadow: 0 0 0 15px rgba(59, 130, 246, 0);
609
+ }
610
+ }
611
+
612
+ .btn--secondary {
613
+ background: var(--color-surface);
614
+ color: var(--color-text-primary);
615
+ border: 1px solid var(--color-glass-border);
616
+ }
617
+
618
+ .btn--secondary:hover:not(:disabled) {
619
+ background: var(--color-surface-elevated);
620
+ border-color: rgba(255, 255, 255, 0.15);
621
+ }
622
+
623
+ .btn:disabled {
624
+ opacity: 0.4;
625
+ cursor: not-allowed;
626
+ }
627
+
628
+ .btn__loader {
629
+ width: 18px;
630
+ height: 18px;
631
+ border: 2px solid rgba(255, 255, 255, 0.3);
632
+ border-top-color: white;
633
+ border-radius: 50%;
634
+ animation: spin 0.8s linear infinite;
635
+ display: none;
636
+ }
637
+
638
+ @keyframes spin {
639
+ to { transform: rotate(360deg); }
640
+ }
641
+
642
+ /* ============================================
643
+ OUTPUT SECTION
644
+ ============================================ */
645
+ .output-section {
646
+ display: grid;
647
+ grid-template-columns: 1fr 280px;
648
+ gap: var(--space-6);
649
+ margin-bottom: var(--space-8);
650
+ }
651
+
652
+ /* Visualizer Panel */
653
+ .visualizer-panel {
654
+ background: var(--color-surface);
655
+ border: 1px solid var(--color-glass-border);
656
+ border-radius: var(--radius-xl);
657
+ overflow: hidden;
658
+ }
659
+
660
+ .visualizer-panel__header {
661
+ display: flex;
662
+ align-items: center;
663
+ justify-content: space-between;
664
+ padding: var(--space-4) var(--space-5);
665
+ border-bottom: 1px solid var(--color-glass-border);
666
+ }
667
+
668
+ .visualizer-panel__title {
669
+ font-family: var(--font-display);
670
+ font-size: 0.9rem;
671
+ font-weight: 600;
672
+ color: var(--color-text-secondary);
673
+ text-transform: uppercase;
674
+ letter-spacing: 0.05em;
675
+ }
676
+
677
+ .status-indicator {
678
+ display: flex;
679
+ align-items: center;
680
+ gap: var(--space-2);
681
+ }
682
+
683
+ .status-dot {
684
+ width: 8px;
685
+ height: 8px;
686
+ border-radius: 50%;
687
+ background: var(--color-text-muted);
688
+ }
689
+
690
+ .status-indicator--idle .status-dot {
691
+ background: var(--color-text-muted);
692
+ }
693
+
694
+ .status-indicator--running .status-dot {
695
+ background: var(--color-success);
696
+ animation: dot-pulse 1.5s ease-in-out infinite;
697
+ }
698
+
699
+ .status-indicator--error .status-dot {
700
+ background: var(--color-error);
701
+ }
702
+
703
+ @keyframes dot-pulse {
704
+ 0%, 100% { opacity: 1; }
705
+ 50% { opacity: 0.4; }
706
+ }
707
+
708
+ .status-text {
709
+ font-size: 0.85rem;
710
+ color: var(--color-text-secondary);
711
+ }
712
+
713
+ .visualizer-container {
714
+ position: relative;
715
+ height: 160px;
716
+ background: linear-gradient(180deg, transparent 0%, rgba(59, 130, 246, 0.03) 100%);
717
+ }
718
+
719
+ .visualizer-container canvas {
720
+ position: absolute;
721
+ inset: 0;
722
+ width: 100%;
723
+ height: 100%;
724
+ }
725
+
726
+ .visualizer-bars {
727
+ opacity: 0.7;
728
+ mix-blend-mode: screen;
729
+ }
730
+
731
+ /* Metrics Panel */
732
+ .metrics-panel {
733
+ display: flex;
734
+ flex-direction: column;
735
+ gap: var(--space-4);
736
+ }
737
+
738
+ .metrics-panel__title {
739
+ font-family: var(--font-display);
740
+ font-size: 0.8rem;
741
+ font-weight: 600;
742
+ color: var(--color-text-muted);
743
+ text-transform: uppercase;
744
+ letter-spacing: 0.08em;
745
+ }
746
+
747
+ .metric {
748
+ padding: var(--space-4);
749
+ background: var(--color-surface);
750
+ border: 1px solid var(--color-glass-border);
751
+ border-radius: var(--radius-md);
752
+ }
753
+
754
+ .metric--highlight {
755
+ background: linear-gradient(135deg, rgba(59, 130, 246, 0.08) 0%, rgba(139, 92, 246, 0.08) 100%);
756
+ border-color: rgba(139, 92, 246, 0.2);
757
+ }
758
+
759
+ .metric__header {
760
+ display: flex;
761
+ align-items: center;
762
+ justify-content: space-between;
763
+ margin-bottom: var(--space-2);
764
+ }
765
+
766
+ .metric__label {
767
+ font-size: 0.8rem;
768
+ color: var(--color-text-secondary);
769
+ }
770
+
771
+ .metric__info {
772
+ width: 16px;
773
+ height: 16px;
774
+ padding: 0;
775
+ background: none;
776
+ border: none;
777
+ color: var(--color-text-muted);
778
+ cursor: help;
779
+ position: relative;
780
+ }
781
+
782
+ .metric__info:hover {
783
+ color: var(--color-text-secondary);
784
+ }
785
+
786
+ .metric__info::after {
787
+ content: attr(data-tooltip);
788
+ position: absolute;
789
+ bottom: calc(100% + 8px);
790
+ right: 0;
791
+ width: 200px;
792
+ padding: var(--space-2) var(--space-3);
793
+ background: var(--color-surface-elevated);
794
+ border: 1px solid var(--color-glass-border);
795
+ border-radius: var(--radius-sm);
796
+ font-size: 0.75rem;
797
+ color: var(--color-text-secondary);
798
+ text-align: left;
799
+ line-height: 1.4;
800
+ opacity: 0;
801
+ visibility: hidden;
802
+ transform: translateY(4px);
803
+ transition: all var(--duration-fast) var(--ease-out);
804
+ z-index: 10;
805
+ pointer-events: none;
806
+ }
807
+
808
+ .metric__info:hover::after {
809
+ opacity: 1;
810
+ visibility: visible;
811
+ transform: translateY(0);
812
+ }
813
+
814
+ .metric__value {
815
+ display: flex;
816
+ align-items: baseline;
817
+ gap: var(--space-1);
818
+ }
819
+
820
+ .metric__number {
821
+ font-family: var(--font-mono);
822
+ font-size: 1.75rem;
823
+ font-weight: 700;
824
+ background: var(--gradient-primary);
825
+ -webkit-background-clip: text;
826
+ -webkit-text-fill-color: transparent;
827
+ background-clip: text;
828
+ }
829
+
830
+ .metric__number--large {
831
+ font-size: 2.25rem;
832
+ }
833
+
834
+ .metric__unit {
835
+ font-family: var(--font-mono);
836
+ font-size: 0.9rem;
837
+ color: var(--color-text-muted);
838
+ }
839
+
840
+ .metric__bar {
841
+ height: 4px;
842
+ background: rgba(255, 255, 255, 0.08);
843
+ border-radius: var(--radius-full);
844
+ margin-top: var(--space-3);
845
+ overflow: hidden;
846
+ }
847
+
848
+ .metric__bar-fill {
849
+ height: 100%;
850
+ background: var(--gradient-primary);
851
+ border-radius: var(--radius-full);
852
+ transition: width var(--duration-slow) var(--ease-out);
853
+ width: 0%;
854
+ }
855
+
856
+ .metric__context {
857
+ font-size: 0.75rem;
858
+ color: var(--color-text-muted);
859
+ margin-top: var(--space-2);
860
+ }
861
+
862
+ .metric--status {
863
+ display: flex;
864
+ align-items: center;
865
+ justify-content: space-between;
866
+ }
867
+
868
+ .model-status {
869
+ display: flex;
870
+ align-items: center;
871
+ gap: var(--space-2);
872
+ }
873
+
874
+ .model-status__dot {
875
+ width: 8px;
876
+ height: 8px;
877
+ border-radius: 50%;
878
+ background: var(--color-text-muted);
879
+ }
880
+
881
+ .model-status--loading .model-status__dot {
882
+ background: var(--color-warning);
883
+ animation: dot-pulse 1s ease-in-out infinite;
884
+ }
885
+
886
+ .model-status--ready .model-status__dot {
887
+ background: var(--color-success);
888
+ }
889
+
890
+ .model-status__text {
891
+ font-size: 0.85rem;
892
+ color: var(--color-text-secondary);
893
+ }
894
+
895
+ /* ============================================
896
+ FOOTER
897
+ ============================================ */
898
+ .footer {
899
+ margin-top: auto;
900
+ padding-top: var(--space-8);
901
+ text-align: center;
902
+ }
903
+
904
+ .footer p {
905
+ font-size: 0.8rem;
906
+ color: var(--color-text-muted);
907
+ }
908
+
909
+ /* ============================================
910
+ RESPONSIVE
911
+ ============================================ */
912
+ @media (max-width: 768px) {
913
+ .app-shell {
914
+ padding: var(--space-6) var(--space-4);
915
+ }
916
+
917
+ .logo__text {
918
+ font-size: 2rem;
919
+ }
920
+
921
+ .device-cards {
922
+ grid-template-columns: 1fr;
923
+ }
924
+
925
+ .output-section {
926
+ grid-template-columns: 1fr;
927
+ }
928
+
929
+ .metrics-panel {
930
+ display: grid;
931
+ grid-template-columns: repeat(2, 1fr);
932
+ }
933
+
934
+ .metric:last-child {
935
+ grid-column: span 2;
936
+ }
937
+ }
938
+
939
+ @media (max-width: 480px) {
940
+ .hero__brand {
941
+ flex-direction: column;
942
+ gap: var(--space-3);
943
+ }
944
+
945
+ .controls {
946
+ flex-direction: column;
947
+ }
948
+
949
+ .btn--primary {
950
+ width: 100%;
951
+ }
952
+
953
+ .sample-texts {
954
+ flex-direction: column;
955
+ align-items: flex-start;
956
+ }
957
+
958
+ .sample-btn {
959
+ width: 100%;
960
+ text-align: center;
961
+ }
962
+
963
+ .metrics-panel {
964
+ grid-template-columns: 1fr;
965
+ }
966
+
967
+ .metric:last-child {
968
+ grid-column: span 1;
969
+ }
970
+
971
+ .metric__number {
972
+ font-size: 1.5rem;
973
+ }
974
+
975
+ .metric__number--large {
976
+ font-size: 1.75rem;
977
+ }
978
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff