Spaces:
Running
Running
Initial commit
Browse files- CODE-LICENSE +201 -0
- EventEmitter.js +59 -0
- PCMPlayerWorklet.js +563 -0
- README.md +77 -12
- index.html +193 -19
- inference-worker.js +1044 -0
- onnx-streaming.js +638 -0
- onnx/ONNX-LICENSE +408 -0
- onnx/flow_lm_flow_int8.onnx +3 -0
- onnx/flow_lm_main_int8.onnx +3 -0
- onnx/mimi_decoder_int8.onnx +3 -0
- onnx/mimi_encoder.onnx +3 -0
- onnx/text_conditioner.onnx +3 -0
- sentencepiece-browser.js +60 -0
- sentencepiece.js +0 -0
- server.py +36 -0
- style.css +948 -18
- tokenizer.model +3 -0
- voices.bin +3 -0
CODE-LICENSE
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Apache License
|
| 2 |
+
Version 2.0, January 2004
|
| 3 |
+
http://www.apache.org/licenses/
|
| 4 |
+
|
| 5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
| 6 |
+
|
| 7 |
+
1. Definitions.
|
| 8 |
+
|
| 9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
| 10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
| 11 |
+
|
| 12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
| 13 |
+
the copyright owner that is granting the License.
|
| 14 |
+
|
| 15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
| 16 |
+
other entities that control, are controlled by, or are under common
|
| 17 |
+
control with that entity. For the purposes of this definition,
|
| 18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
| 19 |
+
direction or management of such entity, whether by contract or
|
| 20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
| 21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
| 22 |
+
|
| 23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
| 24 |
+
exercising permissions granted by this License.
|
| 25 |
+
|
| 26 |
+
"Source" form shall mean the preferred form for making modifications,
|
| 27 |
+
including but not limited to software source code, documentation
|
| 28 |
+
source, and configuration files.
|
| 29 |
+
|
| 30 |
+
"Object" form shall mean any form resulting from mechanical
|
| 31 |
+
transformation or translation of a Source form, including but
|
| 32 |
+
not limited to compiled object code, generated documentation,
|
| 33 |
+
and conversions to other media types.
|
| 34 |
+
|
| 35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
| 36 |
+
Object form, made available under the License, as indicated by a
|
| 37 |
+
copyright notice that is included in or attached to the work
|
| 38 |
+
(an example is provided in the Appendix below).
|
| 39 |
+
|
| 40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
| 41 |
+
form, that is based on (or derived from) the Work and for which the
|
| 42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
| 43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
| 44 |
+
of this License, Derivative Works shall not include works that remain
|
| 45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
| 46 |
+
the Work and Derivative Works thereof.
|
| 47 |
+
|
| 48 |
+
"Contribution" shall mean any work of authorship, including
|
| 49 |
+
the original version of the Work and any modifications or additions
|
| 50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
| 51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
| 52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
| 53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
| 54 |
+
means any form of electronic, verbal, or written communication sent
|
| 55 |
+
to the Licensor or its representatives, including but not limited to
|
| 56 |
+
communication on electronic mailing lists, source code control systems,
|
| 57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
| 58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
| 59 |
+
excluding communication that is conspicuously marked or otherwise
|
| 60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
| 61 |
+
|
| 62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
| 63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
| 64 |
+
subsequently incorporated within the Work.
|
| 65 |
+
|
| 66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
| 67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
| 70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
| 71 |
+
Work and such Derivative Works in Source or Object form.
|
| 72 |
+
|
| 73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
| 74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 76 |
+
(except as stated in this section) patent license to make, have made,
|
| 77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
| 78 |
+
where such license applies only to those patent claims licensable
|
| 79 |
+
by such Contributor that are necessarily infringed by their
|
| 80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
| 81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
| 82 |
+
institute patent litigation against any entity (including a
|
| 83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
| 84 |
+
or a Contribution incorporated within the Work constitutes direct
|
| 85 |
+
or contributory patent infringement, then any patent licenses
|
| 86 |
+
granted to You under this License for that Work shall terminate
|
| 87 |
+
as of the date such litigation is filed.
|
| 88 |
+
|
| 89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
| 90 |
+
Work or Derivative Works thereof in any medium, with or without
|
| 91 |
+
modifications, and in Source or Object form, provided that You
|
| 92 |
+
meet the following conditions:
|
| 93 |
+
|
| 94 |
+
(a) You must give any other recipients of the Work or
|
| 95 |
+
Derivative Works a copy of this License; and
|
| 96 |
+
|
| 97 |
+
(b) You must cause any modified files to carry prominent notices
|
| 98 |
+
stating that You changed the files; and
|
| 99 |
+
|
| 100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
| 101 |
+
that You distribute, all copyright, patent, trademark, and
|
| 102 |
+
attribution notices from the Source form of the Work,
|
| 103 |
+
excluding those notices that do not pertain to any part of
|
| 104 |
+
the Derivative Works; and
|
| 105 |
+
|
| 106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
| 107 |
+
distribution, then any Derivative Works that You distribute must
|
| 108 |
+
include a readable copy of the attribution notices contained
|
| 109 |
+
within such NOTICE file, excluding those notices that do not
|
| 110 |
+
pertain to any part of the Derivative Works, in at least one
|
| 111 |
+
of the following places: within a NOTICE text file distributed
|
| 112 |
+
as part of the Derivative Works; within the Source form or
|
| 113 |
+
documentation, if provided along with the Derivative Works; or,
|
| 114 |
+
within a display generated by the Derivative Works, if and
|
| 115 |
+
wherever such third-party notices normally appear. The contents
|
| 116 |
+
of the NOTICE file are for informational purposes only and
|
| 117 |
+
do not modify the License. You may add Your own attribution
|
| 118 |
+
notices within Derivative Works that You distribute, alongside
|
| 119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
| 120 |
+
that such additional attribution notices cannot be construed
|
| 121 |
+
as modifying the License.
|
| 122 |
+
|
| 123 |
+
You may add Your own copyright statement to Your modifications and
|
| 124 |
+
may provide additional or different license terms and conditions
|
| 125 |
+
for use, reproduction, or distribution of Your modifications, or
|
| 126 |
+
for any such Derivative Works as a whole, provided Your use,
|
| 127 |
+
reproduction, and distribution of the Work otherwise complies with
|
| 128 |
+
the conditions stated in this License.
|
| 129 |
+
|
| 130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
| 131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
| 132 |
+
by You to the Licensor shall be under the terms and conditions of
|
| 133 |
+
this License, without any additional terms or conditions.
|
| 134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
| 135 |
+
the terms of any separate license agreement you may have executed
|
| 136 |
+
with Licensor regarding such Contributions.
|
| 137 |
+
|
| 138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
| 139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
| 140 |
+
except as required for reasonable and customary use in describing the
|
| 141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
| 142 |
+
|
| 143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
| 144 |
+
agreed to in writing, Licensor provides the Work (and each
|
| 145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
| 146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
| 147 |
+
implied, including, without limitation, any warranties or conditions
|
| 148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
| 149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
| 150 |
+
appropriateness of using or redistributing the Work and assume any
|
| 151 |
+
risks associated with Your exercise of permissions under this License.
|
| 152 |
+
|
| 153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
| 154 |
+
whether in tort (including negligence), contract, or otherwise,
|
| 155 |
+
unless required by applicable law (such as deliberate and grossly
|
| 156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
| 157 |
+
liable to You for damages, including any direct, indirect, special,
|
| 158 |
+
incidental, or consequential damages of any character arising as a
|
| 159 |
+
result of this License or out of the use or inability to use the
|
| 160 |
+
Work (including but not limited to damages for loss of goodwill,
|
| 161 |
+
work stoppage, computer failure or malfunction, or any and all
|
| 162 |
+
other commercial damages or losses), even if such Contributor
|
| 163 |
+
has been advised of the possibility of such damages.
|
| 164 |
+
|
| 165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
| 166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
| 167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
| 168 |
+
or other liability obligations and/or rights consistent with this
|
| 169 |
+
License. However, in accepting such obligations, You may act only
|
| 170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
| 171 |
+
of any other Contributor, and only if You agree to indemnify,
|
| 172 |
+
defend, and hold each Contributor harmless for any liability
|
| 173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
| 174 |
+
of your accepting any such warranty or additional liability.
|
| 175 |
+
|
| 176 |
+
END OF TERMS AND CONDITIONS
|
| 177 |
+
|
| 178 |
+
APPENDIX: How to apply the Apache License to your work.
|
| 179 |
+
|
| 180 |
+
To apply the Apache License to your work, attach the following
|
| 181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
| 182 |
+
replaced with your own identifying information. (Don't include
|
| 183 |
+
the brackets!) The text should be enclosed in the appropriate
|
| 184 |
+
comment syntax for the file format. We also recommend that a
|
| 185 |
+
file or class name and description of purpose be included on the
|
| 186 |
+
same "printed page" as the copyright notice for easier
|
| 187 |
+
identification within third-party archives.
|
| 188 |
+
|
| 189 |
+
Copyright [yyyy] [name of copyright owner]
|
| 190 |
+
|
| 191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
| 192 |
+
you may not use this file except in compliance with the License.
|
| 193 |
+
You may obtain a copy of the License at
|
| 194 |
+
|
| 195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
| 196 |
+
|
| 197 |
+
Unless required by applicable law or agreed to in writing, software
|
| 198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
| 199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 200 |
+
See the License for the specific language governing permissions and
|
| 201 |
+
limitations under the License.
|
EventEmitter.js
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
export class EventEmitter {
|
| 2 |
+
constructor() {
|
| 3 |
+
this.events = {};
|
| 4 |
+
}
|
| 5 |
+
|
| 6 |
+
addEventListener(event, listener, options = {}) {
|
| 7 |
+
if (!this.events[event]) {
|
| 8 |
+
this.events[event] = [];
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
const wrappedListener = {
|
| 12 |
+
callback: listener,
|
| 13 |
+
once: options.once || false
|
| 14 |
+
};
|
| 15 |
+
|
| 16 |
+
this.events[event].push(wrappedListener);
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
removeEventListener(event, listener) {
|
| 20 |
+
if (!this.events[event]) return;
|
| 21 |
+
|
| 22 |
+
this.events[event] = this.events[event].filter(
|
| 23 |
+
wrappedListener => wrappedListener.callback !== listener
|
| 24 |
+
);
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
dispatchEvent(event) {
|
| 28 |
+
const eventName = event.type;
|
| 29 |
+
if (!this.events[eventName]) return;
|
| 30 |
+
|
| 31 |
+
this.events[eventName] = this.events[eventName].filter(wrappedListener => {
|
| 32 |
+
wrappedListener.callback.call(this, event);
|
| 33 |
+
return !wrappedListener.once;
|
| 34 |
+
});
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
emit(eventName, data) {
|
| 38 |
+
const event = new CustomEvent(eventName, { detail: data });
|
| 39 |
+
this.dispatchEvent(event);
|
| 40 |
+
}
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
export class CustomEvent {
|
| 44 |
+
constructor(type, options = {}) {
|
| 45 |
+
this.type = type;
|
| 46 |
+
this.detail = options.detail;
|
| 47 |
+
this.target = null;
|
| 48 |
+
this.currentTarget = null;
|
| 49 |
+
this.defaultPrevented = false;
|
| 50 |
+
this.bubbles = options.bubbles || false;
|
| 51 |
+
this.cancelable = options.cancelable || false;
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
preventDefault() {
|
| 55 |
+
if (this.cancelable) {
|
| 56 |
+
this.defaultPrevented = true;
|
| 57 |
+
}
|
| 58 |
+
}
|
| 59 |
+
}
|
PCMPlayerWorklet.js
ADDED
|
@@ -0,0 +1,563 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { EventEmitter, CustomEvent } from './EventEmitter.js';
|
| 2 |
+
|
| 3 |
+
/**
|
| 4 |
+
* PCMPlayerWorklet - Drop-in replacement for PCMPlayer using AudioWorklet
|
| 5 |
+
* Uses dynamic buffer management with backpressure for smooth playback
|
| 6 |
+
*/
|
| 7 |
+
export class PCMPlayerWorklet extends EventEmitter {
|
| 8 |
+
constructor(audioContext, options = {}) {
|
| 9 |
+
super();
|
| 10 |
+
this.audioContext = audioContext;
|
| 11 |
+
this.options = options;
|
| 12 |
+
this.workletNode = null;
|
| 13 |
+
this.isInitialized = false;
|
| 14 |
+
this.playbackTime = 0; // For API compatibility
|
| 15 |
+
|
| 16 |
+
// Audio nodes
|
| 17 |
+
this.gainNode = this.audioContext.createGain();
|
| 18 |
+
this.gainNode.connect(this.audioContext.destination);
|
| 19 |
+
this.analyser = this.audioContext.createAnalyser();
|
| 20 |
+
this.gainNode.connect(this.analyser);
|
| 21 |
+
|
| 22 |
+
// Queue for chunks waiting to be sent
|
| 23 |
+
this.pendingChunks = [];
|
| 24 |
+
this.availableCapacity = 0;
|
| 25 |
+
this.isWorkletReady = false;
|
| 26 |
+
this.hasReceivedInitialCapacity = false;
|
| 27 |
+
|
| 28 |
+
// Metrics
|
| 29 |
+
this.metrics = {
|
| 30 |
+
chunksPlayed: 0,
|
| 31 |
+
underruns: 0,
|
| 32 |
+
bufferLevel: 0,
|
| 33 |
+
samplesPlayed: 0
|
| 34 |
+
};
|
| 35 |
+
|
| 36 |
+
// Initialize worklet
|
| 37 |
+
this.initPromise = this.initialize();
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
async initialize() {
|
| 41 |
+
if (this.isInitialized) return;
|
| 42 |
+
|
| 43 |
+
try {
|
| 44 |
+
// Calculate buffer parameters
|
| 45 |
+
const sampleRate = this.audioContext.sampleRate;
|
| 46 |
+
const minBufferMs = this.options.minBufferBeforePlaybackMs || 300;
|
| 47 |
+
const minBufferSamples = Math.floor(minBufferMs * sampleRate / 1000);
|
| 48 |
+
|
| 49 |
+
// Buffer size: enough for smooth playback but not excessive
|
| 50 |
+
// Target 60 seconds of buffer to prevent any overflow issues
|
| 51 |
+
const bufferSizeSamples = sampleRate * 60;
|
| 52 |
+
|
| 53 |
+
// Create the worklet processor code
|
| 54 |
+
const processorCode = `
|
| 55 |
+
class PCMProcessor extends AudioWorkletProcessor {
|
| 56 |
+
constructor() {
|
| 57 |
+
super();
|
| 58 |
+
|
| 59 |
+
// Ring buffer - sized appropriately
|
| 60 |
+
this.bufferSize = ${bufferSizeSamples};
|
| 61 |
+
this.ringBuffer = new Float32Array(this.bufferSize);
|
| 62 |
+
this.readPos = 0;
|
| 63 |
+
this.writePos = 0;
|
| 64 |
+
this.isPlaying = false;
|
| 65 |
+
|
| 66 |
+
// Configuration
|
| 67 |
+
this.minBufferSamples = ${minBufferSamples};
|
| 68 |
+
this.targetBufferSamples = ${minBufferSamples * 2}; // Target 2x min for stability
|
| 69 |
+
|
| 70 |
+
// State
|
| 71 |
+
this.streamEnded = false;
|
| 72 |
+
this.playbackCompleteReported = false;
|
| 73 |
+
|
| 74 |
+
// Stats reporting
|
| 75 |
+
this.frameCount = 0;
|
| 76 |
+
this.reportInterval = 256; // Report every ~5ms at 48kHz
|
| 77 |
+
|
| 78 |
+
this.port.onmessage = (e) => {
|
| 79 |
+
switch(e.data.type) {
|
| 80 |
+
case 'audio':
|
| 81 |
+
this.addAudio(e.data.data);
|
| 82 |
+
break;
|
| 83 |
+
case 'reset':
|
| 84 |
+
this.reset();
|
| 85 |
+
break;
|
| 86 |
+
case 'stream-ended':
|
| 87 |
+
this.streamEnded = true;
|
| 88 |
+
break;
|
| 89 |
+
}
|
| 90 |
+
};
|
| 91 |
+
|
| 92 |
+
// Send initial capacity
|
| 93 |
+
this.sendCapacityUpdate();
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
addAudio(float32Data) {
|
| 97 |
+
const samples = float32Data.length;
|
| 98 |
+
const available = this.getAvailableSpace();
|
| 99 |
+
const bufferedBefore = this.getBufferedSamples();
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
if (samples > available) {
|
| 103 |
+
// This shouldn't happen with proper backpressure
|
| 104 |
+
console.error('Buffer overflow - bug in backpressure. Samples:', samples, 'Available:', available, 'Buffered:', this.getBufferedSamples());
|
| 105 |
+
// Drop oldest data to recover
|
| 106 |
+
const overflow = samples - available;
|
| 107 |
+
this.readPos = (this.readPos + overflow) % this.bufferSize;
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
// Write to ring buffer
|
| 111 |
+
if (this.writePos + samples <= this.bufferSize) {
|
| 112 |
+
this.ringBuffer.set(float32Data, this.writePos);
|
| 113 |
+
this.writePos += samples;
|
| 114 |
+
if (this.writePos >= this.bufferSize) {
|
| 115 |
+
this.writePos = 0;
|
| 116 |
+
}
|
| 117 |
+
} else {
|
| 118 |
+
const firstPart = this.bufferSize - this.writePos;
|
| 119 |
+
const secondPart = samples - firstPart;
|
| 120 |
+
this.ringBuffer.set(float32Data.slice(0, firstPart), this.writePos);
|
| 121 |
+
this.ringBuffer.set(float32Data.slice(firstPart), 0);
|
| 122 |
+
this.writePos = secondPart;
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
// Auto-start when we have enough buffered
|
| 126 |
+
const buffered = this.getBufferedSamples();
|
| 127 |
+
|
| 128 |
+
if (!this.isPlaying && buffered >= this.minBufferSamples) {
|
| 129 |
+
const now = currentTime;
|
| 130 |
+
this.isPlaying = true;
|
| 131 |
+
// Notify that playback has started
|
| 132 |
+
this.port.postMessage({
|
| 133 |
+
type: 'playback-started',
|
| 134 |
+
buffered: buffered,
|
| 135 |
+
audioTime: now
|
| 136 |
+
});
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
// Report capacity after adding
|
| 140 |
+
this.sendCapacityUpdate();
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
getAvailableSpace() {
|
| 144 |
+
const used = this.getBufferedSamples();
|
| 145 |
+
return this.bufferSize - used - 128; // Leave small safety margin
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
getBufferedSamples() {
|
| 149 |
+
if (this.writePos >= this.readPos) {
|
| 150 |
+
return this.writePos - this.readPos;
|
| 151 |
+
} else {
|
| 152 |
+
return this.bufferSize - this.readPos + this.writePos;
|
| 153 |
+
}
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
sendCapacityUpdate() {
|
| 157 |
+
const buffered = this.getBufferedSamples();
|
| 158 |
+
const capacity = this.getAvailableSpace();
|
| 159 |
+
|
| 160 |
+
// Calculate how much we want to receive
|
| 161 |
+
// If buffer is low, request more; if it's full, request nothing
|
| 162 |
+
let requestSamples = 0;
|
| 163 |
+
if (buffered < this.targetBufferSamples) {
|
| 164 |
+
requestSamples = Math.min(capacity, this.targetBufferSamples - buffered);
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
this.port.postMessage({
|
| 168 |
+
type: 'capacity',
|
| 169 |
+
buffered: buffered,
|
| 170 |
+
capacity: capacity,
|
| 171 |
+
requestSamples: requestSamples,
|
| 172 |
+
isPlaying: this.isPlaying
|
| 173 |
+
});
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
process(inputs, outputs, parameters) {
|
| 177 |
+
const output = outputs[0];
|
| 178 |
+
if (!output || !output[0]) return true;
|
| 179 |
+
|
| 180 |
+
const outputChannel = output[0];
|
| 181 |
+
const numSamples = outputChannel.length;
|
| 182 |
+
|
| 183 |
+
// Report stats periodically
|
| 184 |
+
if (++this.frameCount % this.reportInterval === 0) {
|
| 185 |
+
this.sendCapacityUpdate();
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
+
if (!this.isPlaying) {
|
| 189 |
+
outputChannel.fill(0);
|
| 190 |
+
return true;
|
| 191 |
+
}
|
| 192 |
+
|
| 193 |
+
const buffered = this.getBufferedSamples();
|
| 194 |
+
|
| 195 |
+
if (buffered < numSamples) {
|
| 196 |
+
// Underrun - play what we have and fill rest with silence
|
| 197 |
+
let samplesRead = 0;
|
| 198 |
+
|
| 199 |
+
if (buffered > 0) {
|
| 200 |
+
// Play whatever samples we DO have
|
| 201 |
+
if (this.readPos + buffered <= this.bufferSize) {
|
| 202 |
+
for (let i = 0; i < buffered; i++) {
|
| 203 |
+
outputChannel[i] = this.ringBuffer[this.readPos + i];
|
| 204 |
+
}
|
| 205 |
+
this.readPos += buffered;
|
| 206 |
+
if (this.readPos >= this.bufferSize) {
|
| 207 |
+
this.readPos = 0;
|
| 208 |
+
}
|
| 209 |
+
} else {
|
| 210 |
+
// Wrap-around case
|
| 211 |
+
const firstPart = this.bufferSize - this.readPos;
|
| 212 |
+
const secondPart = buffered - firstPart;
|
| 213 |
+
|
| 214 |
+
for (let i = 0; i < firstPart; i++) {
|
| 215 |
+
outputChannel[i] = this.ringBuffer[this.readPos + i];
|
| 216 |
+
}
|
| 217 |
+
for (let i = 0; i < secondPart; i++) {
|
| 218 |
+
outputChannel[firstPart + i] = this.ringBuffer[i];
|
| 219 |
+
}
|
| 220 |
+
|
| 221 |
+
this.readPos = secondPart;
|
| 222 |
+
}
|
| 223 |
+
samplesRead = buffered;
|
| 224 |
+
}
|
| 225 |
+
|
| 226 |
+
// Fill remaining with silence
|
| 227 |
+
for (let i = samplesRead; i < numSamples; i++) {
|
| 228 |
+
outputChannel[i] = 0;
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
// Check for playback complete
|
| 232 |
+
if (this.streamEnded && buffered === 0) {
|
| 233 |
+
if (!this.playbackCompleteReported) {
|
| 234 |
+
this.port.postMessage({
|
| 235 |
+
type: 'playback-complete'
|
| 236 |
+
});
|
| 237 |
+
this.playbackCompleteReported = true;
|
| 238 |
+
}
|
| 239 |
+
this.isPlaying = false;
|
| 240 |
+
this.streamEnded = false;
|
| 241 |
+
} else {
|
| 242 |
+
// Request more data urgently
|
| 243 |
+
this.port.postMessage({
|
| 244 |
+
type: 'underrun',
|
| 245 |
+
buffered: buffered,
|
| 246 |
+
needed: numSamples
|
| 247 |
+
});
|
| 248 |
+
this.sendCapacityUpdate();
|
| 249 |
+
}
|
| 250 |
+
} else {
|
| 251 |
+
// Normal playback - read from ring buffer
|
| 252 |
+
if (this.readPos + numSamples <= this.bufferSize) {
|
| 253 |
+
for (let i = 0; i < numSamples; i++) {
|
| 254 |
+
outputChannel[i] = this.ringBuffer[this.readPos + i];
|
| 255 |
+
}
|
| 256 |
+
this.readPos += numSamples;
|
| 257 |
+
if (this.readPos >= this.bufferSize) {
|
| 258 |
+
this.readPos = 0;
|
| 259 |
+
}
|
| 260 |
+
} else {
|
| 261 |
+
// Wrap-around case
|
| 262 |
+
const firstPart = this.bufferSize - this.readPos;
|
| 263 |
+
const secondPart = numSamples - firstPart;
|
| 264 |
+
|
| 265 |
+
for (let i = 0; i < firstPart; i++) {
|
| 266 |
+
outputChannel[i] = this.ringBuffer[this.readPos + i];
|
| 267 |
+
}
|
| 268 |
+
for (let i = 0; i < secondPart; i++) {
|
| 269 |
+
outputChannel[firstPart + i] = this.ringBuffer[i];
|
| 270 |
+
}
|
| 271 |
+
|
| 272 |
+
this.readPos = secondPart;
|
| 273 |
+
}
|
| 274 |
+
}
|
| 275 |
+
|
| 276 |
+
return true;
|
| 277 |
+
}
|
| 278 |
+
|
| 279 |
+
reset() {
|
| 280 |
+
this.readPos = 0;
|
| 281 |
+
this.writePos = 0;
|
| 282 |
+
this.ringBuffer.fill(0);
|
| 283 |
+
this.isPlaying = false;
|
| 284 |
+
this.streamEnded = false;
|
| 285 |
+
this.playbackCompleteReported = false;
|
| 286 |
+
this.sendCapacityUpdate();
|
| 287 |
+
}
|
| 288 |
+
}
|
| 289 |
+
|
| 290 |
+
registerProcessor('pcm-processor', PCMProcessor);
|
| 291 |
+
`;
|
| 292 |
+
|
| 293 |
+
// Create and load worklet
|
| 294 |
+
const blob = new Blob([processorCode], { type: 'application/javascript' });
|
| 295 |
+
const workletUrl = URL.createObjectURL(blob);
|
| 296 |
+
|
| 297 |
+
await this.audioContext.audioWorklet.addModule(workletUrl);
|
| 298 |
+
URL.revokeObjectURL(workletUrl);
|
| 299 |
+
|
| 300 |
+
// Create worklet node
|
| 301 |
+
this.workletNode = new AudioWorkletNode(this.audioContext, 'pcm-processor');
|
| 302 |
+
this.workletNode.connect(this.gainNode);
|
| 303 |
+
|
| 304 |
+
// Handle messages from worklet
|
| 305 |
+
this.workletNode.port.onmessage = (e) => {
|
| 306 |
+
switch (e.data.type) {
|
| 307 |
+
case 'capacity':
|
| 308 |
+
this.handleCapacityUpdate(e.data);
|
| 309 |
+
break;
|
| 310 |
+
|
| 311 |
+
case 'underrun':
|
| 312 |
+
this.metrics.underruns++;
|
| 313 |
+
console.warn(`[MAIN THREAD] ⚠️ UNDERRUN #${this.metrics.underruns} detected! buffered=${e.data.buffered} samples, needed=${e.data.needed} samples`);
|
| 314 |
+
// Try to send more data immediately
|
| 315 |
+
this.processPendingChunks();
|
| 316 |
+
break;
|
| 317 |
+
|
| 318 |
+
case 'playback-started':
|
| 319 |
+
console.log(`[MAIN THREAD] Received playback-started at performance.now=${performance.now().toFixed(2)}ms, audioContext.currentTime=${this.audioContext.currentTime.toFixed(3)}s, worklet reported audioTime=${e.data.audioTime}s`);
|
| 320 |
+
this.emit('firstPlayback', {
|
| 321 |
+
startTime: this.audioContext.currentTime,
|
| 322 |
+
bufferedSamples: e.data.buffered
|
| 323 |
+
});
|
| 324 |
+
break;
|
| 325 |
+
|
| 326 |
+
case 'playback-complete':
|
| 327 |
+
this.emit('audioEnded', {
|
| 328 |
+
endTime: this.audioContext.currentTime
|
| 329 |
+
});
|
| 330 |
+
break;
|
| 331 |
+
}
|
| 332 |
+
};
|
| 333 |
+
|
| 334 |
+
this.isInitialized = true;
|
| 335 |
+
this.isWorkletReady = true;
|
| 336 |
+
} catch (error) {
|
| 337 |
+
console.error('Failed to initialize PCMPlayerWorklet:', error);
|
| 338 |
+
throw error;
|
| 339 |
+
}
|
| 340 |
+
}
|
| 341 |
+
|
| 342 |
+
handleCapacityUpdate(data) {
|
| 343 |
+
this.availableCapacity = data.capacity;
|
| 344 |
+
this.metrics.bufferLevel = data.buffered;
|
| 345 |
+
|
| 346 |
+
// console.log(`[CAPACITY] Update at ${performance.now().toFixed(2)}ms: capacity=${data.capacity}, buffered=${data.buffered}, pending=${this.pendingChunks.length}`);
|
| 347 |
+
|
| 348 |
+
// Mark that we've received initial capacity
|
| 349 |
+
if (!this.hasReceivedInitialCapacity) {
|
| 350 |
+
this.hasReceivedInitialCapacity = true;
|
| 351 |
+
// console.log(`[CAPACITY] *** FIRST capacity received at ${performance.now().toFixed(2)}ms, processing ${this.pendingChunks.length} pending chunks`);
|
| 352 |
+
// Process any chunks that were waiting for initial capacity
|
| 353 |
+
if (this.pendingChunks.length > 0) {
|
| 354 |
+
this.processPendingChunks();
|
| 355 |
+
}
|
| 356 |
+
}
|
| 357 |
+
|
| 358 |
+
// If worklet is requesting data, try to send it
|
| 359 |
+
if (data.requestSamples > 0 && this.pendingChunks.length > 0) {
|
| 360 |
+
this.processPendingChunks();
|
| 361 |
+
}
|
| 362 |
+
}
|
| 363 |
+
|
| 364 |
+
processPendingChunks() {
|
| 365 |
+
if (!this.isWorkletReady || this.pendingChunks.length === 0) {
|
| 366 |
+
return;
|
| 367 |
+
}
|
| 368 |
+
|
| 369 |
+
// Don't send if we don't know capacity yet
|
| 370 |
+
if (this.availableCapacity <= 0) {
|
| 371 |
+
return;
|
| 372 |
+
}
|
| 373 |
+
|
| 374 |
+
// Send ONE chunk if it fits, then wait for next capacity update
|
| 375 |
+
// This prevents race conditions from sending multiple chunks before worklet updates
|
| 376 |
+
const chunk = this.pendingChunks[0];
|
| 377 |
+
|
| 378 |
+
if (chunk.length <= this.availableCapacity) {
|
| 379 |
+
// Send the whole chunk
|
| 380 |
+
this.pendingChunks.shift();
|
| 381 |
+
this.workletNode.port.postMessage({
|
| 382 |
+
type: 'audio',
|
| 383 |
+
data: chunk
|
| 384 |
+
});
|
| 385 |
+
// Set capacity to 0 to prevent sending more until we get an update
|
| 386 |
+
this.availableCapacity = 0;
|
| 387 |
+
} else if (this.availableCapacity > 4096) {
|
| 388 |
+
// Send partial chunk only if we have significant space
|
| 389 |
+
const partial = chunk.slice(0, this.availableCapacity);
|
| 390 |
+
console.log(`Sending partial: ${partial.length} samples from ${chunk.length} (capacity: ${this.availableCapacity})`);
|
| 391 |
+
this.pendingChunks[0] = chunk.slice(this.availableCapacity);
|
| 392 |
+
this.workletNode.port.postMessage({
|
| 393 |
+
type: 'audio',
|
| 394 |
+
data: partial
|
| 395 |
+
});
|
| 396 |
+
// Set capacity to 0 to prevent sending more until we get an update
|
| 397 |
+
this.availableCapacity = 0;
|
| 398 |
+
} else {
|
| 399 |
+
console.log(`Not sending - chunk ${chunk.length} samples, capacity ${this.availableCapacity}`);
|
| 400 |
+
}
|
| 401 |
+
// else: Not enough space, wait for next capacity update
|
| 402 |
+
|
| 403 |
+
// If all chunks sent and stream ended, notify worklet
|
| 404 |
+
if (this.pendingChunks.length === 0 && this.pendingStreamEnd) {
|
| 405 |
+
this.workletNode.port.postMessage({ type: 'stream-ended' });
|
| 406 |
+
this.pendingStreamEnd = false;
|
| 407 |
+
}
|
| 408 |
+
}
|
| 409 |
+
|
| 410 |
+
playAudio(data) {
|
| 411 |
+
if (!this.isInitialized) {
|
| 412 |
+
// Queue the data if not initialized yet
|
| 413 |
+
if (!this.initPendingQueue) {
|
| 414 |
+
this.initPendingQueue = [];
|
| 415 |
+
this.initPromise.then(() => {
|
| 416 |
+
// Process queued data
|
| 417 |
+
const queue = this.initPendingQueue;
|
| 418 |
+
this.initPendingQueue = null;
|
| 419 |
+
for (const queuedData of queue) {
|
| 420 |
+
this.playAudio(queuedData);
|
| 421 |
+
}
|
| 422 |
+
});
|
| 423 |
+
}
|
| 424 |
+
this.initPendingQueue.push(data);
|
| 425 |
+
return;
|
| 426 |
+
}
|
| 427 |
+
|
| 428 |
+
if (this.audioContext.state !== 'running') {
|
| 429 |
+
return;
|
| 430 |
+
}
|
| 431 |
+
|
| 432 |
+
// Convert to Float32Array if needed
|
| 433 |
+
const float32Array = data instanceof Int16Array
|
| 434 |
+
? this.pcm16ToFloat32(data)
|
| 435 |
+
: data;
|
| 436 |
+
|
| 437 |
+
// Add to pending queue
|
| 438 |
+
this.pendingChunks.push(float32Array);
|
| 439 |
+
|
| 440 |
+
// Only try to process if we've received initial capacity and have space
|
| 441 |
+
// Otherwise wait for capacity update from worklet
|
| 442 |
+
if (this.hasReceivedInitialCapacity && this.availableCapacity > 0) {
|
| 443 |
+
this.processPendingChunks();
|
| 444 |
+
}
|
| 445 |
+
|
| 446 |
+
// Update metrics
|
| 447 |
+
this.metrics.chunksPlayed++;
|
| 448 |
+
|
| 449 |
+
// Update playback time for compatibility
|
| 450 |
+
const duration = float32Array.length / this.audioContext.sampleRate;
|
| 451 |
+
this.playbackTime = this.audioContext.currentTime + duration;
|
| 452 |
+
|
| 453 |
+
// Emit events for compatibility
|
| 454 |
+
this.emit('audioStarted', {
|
| 455 |
+
startTime: this.audioContext.currentTime,
|
| 456 |
+
duration: duration,
|
| 457 |
+
samples: float32Array.length
|
| 458 |
+
});
|
| 459 |
+
}
|
| 460 |
+
|
| 461 |
+
notifyStreamEnded() {
|
| 462 |
+
if (this.pendingChunks.length > 0) {
|
| 463 |
+
// Still have chunks to send, mark for later
|
| 464 |
+
this.pendingStreamEnd = true;
|
| 465 |
+
} else {
|
| 466 |
+
// No chunks left, send immediately
|
| 467 |
+
if (this.workletNode) {
|
| 468 |
+
this.workletNode.port.postMessage({ type: 'stream-ended' });
|
| 469 |
+
}
|
| 470 |
+
}
|
| 471 |
+
}
|
| 472 |
+
|
| 473 |
+
pcm16ToFloat32(pcm16) {
|
| 474 |
+
const float32 = new Float32Array(pcm16.length);
|
| 475 |
+
for (let i = 0; i < pcm16.length; i++) {
|
| 476 |
+
float32[i] = pcm16[i] / 32768;
|
| 477 |
+
}
|
| 478 |
+
return float32;
|
| 479 |
+
}
|
| 480 |
+
|
| 481 |
+
reset() {
|
| 482 |
+
this.playbackTime = 0;
|
| 483 |
+
this.pendingChunks = [];
|
| 484 |
+
this.pendingStreamEnd = false;
|
| 485 |
+
this.availableCapacity = 0;
|
| 486 |
+
|
| 487 |
+
if (this.workletNode) {
|
| 488 |
+
this.workletNode.port.postMessage({ type: 'reset' });
|
| 489 |
+
}
|
| 490 |
+
|
| 491 |
+
// Quick fade out to avoid clicks
|
| 492 |
+
if (this.gainNode) {
|
| 493 |
+
const now = this.audioContext.currentTime;
|
| 494 |
+
this.gainNode.gain.setValueAtTime(this.gainNode.gain.value, now);
|
| 495 |
+
this.gainNode.gain.linearRampToValueAtTime(0, now + 0.05);
|
| 496 |
+
setTimeout(() => {
|
| 497 |
+
this.gainNode.gain.value = 1;
|
| 498 |
+
}, 100);
|
| 499 |
+
}
|
| 500 |
+
}
|
| 501 |
+
|
| 502 |
+
stopAllSources() {
|
| 503 |
+
this.reset();
|
| 504 |
+
}
|
| 505 |
+
|
| 506 |
+
async resume() {
|
| 507 |
+
if (this.audioContext.state === 'suspended') {
|
| 508 |
+
await this.audioContext.resume();
|
| 509 |
+
}
|
| 510 |
+
}
|
| 511 |
+
|
| 512 |
+
get volume() {
|
| 513 |
+
return this.gainNode.gain.value;
|
| 514 |
+
}
|
| 515 |
+
|
| 516 |
+
set volume(value) {
|
| 517 |
+
const clampedValue = Math.max(0, Math.min(1, value));
|
| 518 |
+
this.gainNode.gain.value = clampedValue;
|
| 519 |
+
this.emit('volumeChange', { volume: clampedValue });
|
| 520 |
+
}
|
| 521 |
+
|
| 522 |
+
get volumePercentage() {
|
| 523 |
+
return this.volume * 100;
|
| 524 |
+
}
|
| 525 |
+
|
| 526 |
+
set volumePercentage(percentage) {
|
| 527 |
+
this.volume = percentage / 100;
|
| 528 |
+
}
|
| 529 |
+
|
| 530 |
+
getAnalyserData() {
|
| 531 |
+
const bufferLength = this.analyser.frequencyBinCount;
|
| 532 |
+
const dataArray = new Uint8Array(bufferLength);
|
| 533 |
+
this.analyser.getByteFrequencyData(dataArray);
|
| 534 |
+
return dataArray;
|
| 535 |
+
}
|
| 536 |
+
|
| 537 |
+
getTimeDomainData() {
|
| 538 |
+
const bufferLength = this.analyser.frequencyBinCount;
|
| 539 |
+
const dataArray = new Uint8Array(bufferLength);
|
| 540 |
+
this.analyser.getByteTimeDomainData(dataArray);
|
| 541 |
+
return dataArray;
|
| 542 |
+
}
|
| 543 |
+
|
| 544 |
+
getPlaybackStatus() {
|
| 545 |
+
const bufferMs = this.metrics.bufferLevel
|
| 546 |
+
? (this.metrics.bufferLevel / this.audioContext.sampleRate) * 1000
|
| 547 |
+
: 0;
|
| 548 |
+
|
| 549 |
+
return {
|
| 550 |
+
currentTime: this.audioContext.currentTime,
|
| 551 |
+
scheduledTime: this.playbackTime,
|
| 552 |
+
bufferedDuration: bufferMs / 1000,
|
| 553 |
+
state: this.audioContext.state,
|
| 554 |
+
worklet: {
|
| 555 |
+
bufferLevelSamples: this.metrics.bufferLevel,
|
| 556 |
+
bufferLevelMs: bufferMs,
|
| 557 |
+
underruns: this.metrics.underruns,
|
| 558 |
+
chunksPlayed: this.metrics.chunksPlayed,
|
| 559 |
+
pendingChunks: this.pendingChunks.length
|
| 560 |
+
}
|
| 561 |
+
};
|
| 562 |
+
}
|
| 563 |
+
}
|
README.md
CHANGED
|
@@ -1,12 +1,77 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: Pocket
|
| 3 |
-
emoji: 🌖
|
| 4 |
-
colorFrom: yellow
|
| 5 |
-
colorTo: pink
|
| 6 |
-
sdk: static
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Pocket TTS ONNX Web Demo
|
| 3 |
+
emoji: 🌖
|
| 4 |
+
colorFrom: yellow
|
| 5 |
+
colorTo: pink
|
| 6 |
+
sdk: static
|
| 7 |
+
app_file: index.html
|
| 8 |
+
pinned: false
|
| 9 |
+
license: cc-by-4.0
|
| 10 |
+
short_description: Real-time voice cloning entirely in your browser! (CPU)
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
# Pocket TTS Web Demo
|
| 14 |
+
|
| 15 |
+
Real-time neural text-to-speech with voice cloning, running entirely in your browser.
|
| 16 |
+
|
| 17 |
+
## Features
|
| 18 |
+
|
| 19 |
+
- **Voice Cloning**: Clone any voice from a short audio sample
|
| 20 |
+
- **Predefined Voices**: 3 bundled voices (Cosette, Jean, Fantine)
|
| 21 |
+
- **Streaming Audio**: Real-time audio generation with low latency
|
| 22 |
+
- **Pure Browser**: No server required, runs entirely in WebAssembly
|
| 23 |
+
|
| 24 |
+
## Model Files
|
| 25 |
+
|
| 26 |
+
The demo requires the following ONNX models in the `onnx/` directory:
|
| 27 |
+
|
| 28 |
+
| File | Size | Purpose |
|
| 29 |
+
|------|------|---------|
|
| 30 |
+
| `mimi_encoder.onnx` | ~70 MB | Voice audio → embeddings |
|
| 31 |
+
| `text_conditioner.onnx` | ~16 MB | Text tokens → embeddings |
|
| 32 |
+
| `flow_lm_main_int8.onnx` | ~73 MB | AR transformer (INT8) |
|
| 33 |
+
| `flow_lm_flow_int8.onnx` | ~10 MB | Flow matching network (INT8) |
|
| 34 |
+
| `mimi_decoder_int8.onnx` | ~22 MB | Latents → audio decoder (INT8) |
|
| 35 |
+
|
| 36 |
+
Additional files:
|
| 37 |
+
- `tokenizer.model` - SentencePiece tokenizer (~60 KB)
|
| 38 |
+
- `voices.bin` - Predefined voice embeddings (~1.5 MB)
|
| 39 |
+
|
| 40 |
+
## Browser Requirements
|
| 41 |
+
|
| 42 |
+
- Modern browser with WebAssembly support
|
| 43 |
+
- Chrome, Edge, Firefox, or Safari (latest versions)
|
| 44 |
+
- ~200 MB RAM for model loading
|
| 45 |
+
|
| 46 |
+
## Voice Cloning
|
| 47 |
+
|
| 48 |
+
1. Click "Upload Voice" or select "Custom (Upload)" from the dropdown
|
| 49 |
+
2. Upload an audio file (WAV, MP3, etc.) with clear speech
|
| 50 |
+
3. Best results with 3-10 seconds of clean audio
|
| 51 |
+
4. The voice will be encoded and used for all subsequent generations
|
| 52 |
+
|
| 53 |
+
## File Structure
|
| 54 |
+
|
| 55 |
+
```
|
| 56 |
+
pocket-tts-web/
|
| 57 |
+
├── index.html # Main HTML page
|
| 58 |
+
├── onnx-streaming.js # Main thread controller
|
| 59 |
+
├── inference-worker.js # Web Worker for ONNX inference
|
| 60 |
+
├── PCMPlayerWorklet.js # Audio playback worklet
|
| 61 |
+
├── EventEmitter.js # Event utilities
|
| 62 |
+
├── sentencepiece.js # SentencePiece tokenizer library
|
| 63 |
+
├── style.css # Styles
|
| 64 |
+
├── tokenizer.model # SentencePiece model
|
| 65 |
+
├── voices.bin # Predefined voice embeddings
|
| 66 |
+
└── onnx/
|
| 67 |
+
├── mimi_encoder.onnx
|
| 68 |
+
├── text_conditioner.onnx
|
| 69 |
+
├── flow_lm_main_int8.onnx
|
| 70 |
+
├── flow_lm_flow_int8.onnx
|
| 71 |
+
└── mimi_decoder_int8.onnx
|
| 72 |
+
```
|
| 73 |
+
|
| 74 |
+
## License
|
| 75 |
+
|
| 76 |
+
- **Models & Voice Embeddings**: CC BY 4.0 (inherited from [kyutai/pocket-tts](https://huggingface.co/kyutai/pocket-tts))
|
| 77 |
+
- **Code**: Apache 2.0
|
index.html
CHANGED
|
@@ -1,19 +1,193 @@
|
|
| 1 |
-
<!
|
| 2 |
-
<html>
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
</
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
|
| 4 |
+
<head>
|
| 5 |
+
<meta charset="UTF-8">
|
| 6 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 7 |
+
<title>Pocket TTS - Neural Voice Cloning in Your Browser</title>
|
| 8 |
+
<link rel="stylesheet" href="style.css">
|
| 9 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 10 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 11 |
+
<link href="https://fonts.googleapis.com/css2?family=Quicksand:wght@500;600;700&family=Nunito:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500;600;700&display=swap" rel="stylesheet">
|
| 12 |
+
<!-- ONNX Runtime Web (loaded by worker, kept here for potential main thread usage) -->
|
| 13 |
+
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.20.0/dist/ort.min.js"></script>
|
| 14 |
+
</head>
|
| 15 |
+
|
| 16 |
+
<body>
|
| 17 |
+
<!-- Ambient Background Effects -->
|
| 18 |
+
<div class="ambient-layer">
|
| 19 |
+
<div class="orb orb--primary"></div>
|
| 20 |
+
<div class="orb orb--secondary"></div>
|
| 21 |
+
<div class="orb orb--tertiary"></div>
|
| 22 |
+
<div class="grid-overlay"></div>
|
| 23 |
+
</div>
|
| 24 |
+
|
| 25 |
+
<div class="app-shell">
|
| 26 |
+
<!-- Hero Header -->
|
| 27 |
+
<header class="hero">
|
| 28 |
+
<div class="hero__brand">
|
| 29 |
+
<div class="logo">
|
| 30 |
+
<svg class="logo__icon" viewBox="0 0 32 32" fill="none">
|
| 31 |
+
<path d="M16 4C16 4 8 8 8 16C8 24 16 28 16 28" stroke="url(#logoGrad)" stroke-width="2.5" stroke-linecap="round"/>
|
| 32 |
+
<path d="M16 4C16 4 24 8 24 16C24 24 16 28 16 28" stroke="url(#logoGrad)" stroke-width="2.5" stroke-linecap="round"/>
|
| 33 |
+
<path d="M12 10V22" stroke="url(#logoGrad)" stroke-width="2.5" stroke-linecap="round"/>
|
| 34 |
+
<path d="M16 8V24" stroke="url(#logoGrad)" stroke-width="2.5" stroke-linecap="round"/>
|
| 35 |
+
<path d="M20 10V22" stroke="url(#logoGrad)" stroke-width="2.5" stroke-linecap="round"/>
|
| 36 |
+
<defs>
|
| 37 |
+
<linearGradient id="logoGrad" x1="8" y1="4" x2="24" y2="28" gradientUnits="userSpaceOnUse">
|
| 38 |
+
<stop stop-color="#3eb489"/>
|
| 39 |
+
<stop offset="0.5" stop-color="#00d4aa"/>
|
| 40 |
+
<stop offset="1" stop-color="#7fffd4"/>
|
| 41 |
+
</linearGradient>
|
| 42 |
+
</defs>
|
| 43 |
+
</svg>
|
| 44 |
+
<span class="logo__text">Pocket TTS</span>
|
| 45 |
+
</div>
|
| 46 |
+
<div class="hero__badge">
|
| 47 |
+
<span class="badge">ONNX Runtime</span>
|
| 48 |
+
</div>
|
| 49 |
+
</div>
|
| 50 |
+
<p class="hero__tagline">Real-time neural text-to-speech with voice cloning, running entirely in your browser</p>
|
| 51 |
+
</header>
|
| 52 |
+
|
| 53 |
+
<main class="main">
|
| 54 |
+
<!-- Input Section -->
|
| 55 |
+
<section class="input-section">
|
| 56 |
+
<!-- Voice Selection -->
|
| 57 |
+
<div class="voice-section">
|
| 58 |
+
<div class="voice-selector">
|
| 59 |
+
<label for="voice-select" class="voice-selector__label">Voice</label>
|
| 60 |
+
<select id="voice-select" class="voice-selector__dropdown">
|
| 61 |
+
<option value="">Loading voices...</option>
|
| 62 |
+
</select>
|
| 63 |
+
</div>
|
| 64 |
+
<div class="voice-upload">
|
| 65 |
+
<button id="voice-upload-btn" class="btn btn--outline btn--small">
|
| 66 |
+
<svg class="btn__icon" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
| 67 |
+
<path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
|
| 68 |
+
<polyline points="17 8 12 3 7 8"/>
|
| 69 |
+
<line x1="12" y1="3" x2="12" y2="15"/>
|
| 70 |
+
</svg>
|
| 71 |
+
<span>Upload Voice</span>
|
| 72 |
+
</button>
|
| 73 |
+
<input type="file" id="voice-upload" accept="audio/*" hidden>
|
| 74 |
+
<span id="voice-upload-status" class="voice-upload-status"></span>
|
| 75 |
+
</div>
|
| 76 |
+
</div>
|
| 77 |
+
|
| 78 |
+
<div class="textarea-wrap">
|
| 79 |
+
<textarea
|
| 80 |
+
id="text-input"
|
| 81 |
+
placeholder="Type or paste text to synthesize..."
|
| 82 |
+
aria-label="Text to synthesize"
|
| 83 |
+
maxlength="500"
|
| 84 |
+
></textarea>
|
| 85 |
+
<div class="textarea-meta">
|
| 86 |
+
<span class="char-count"><span id="char-count">0</span>/500</span>
|
| 87 |
+
</div>
|
| 88 |
+
</div>
|
| 89 |
+
|
| 90 |
+
<!-- Sample Texts -->
|
| 91 |
+
<div class="sample-texts">
|
| 92 |
+
<span class="sample-texts__label">Try:</span>
|
| 93 |
+
<button class="sample-btn" data-text="Hello, welcome to Pocket TTS. This is a demonstration of real-time voice cloning running entirely in your browser.">Demo greeting</button>
|
| 94 |
+
<button class="sample-btn" data-text="I completely understand how frustrating this must be for you. Let me take care of this right away and make sure we get it resolved.">Empathetic support</button>
|
| 95 |
+
<button class="sample-btn" data-text="Wow, congratulations! That's absolutely fantastic news! I'm so thrilled for you!">Excited</button>
|
| 96 |
+
<button class="sample-btn" data-text="I'm really sorry to hear about your loss. Please know that we're here for you, and take all the time you need.">Compassionate</button>
|
| 97 |
+
<button class="sample-btn" data-text="Great question! I'd be happy to walk you through this step by step. First, let's start with the basics.">Helpful guide</button>
|
| 98 |
+
</div>
|
| 99 |
+
|
| 100 |
+
<div class="controls">
|
| 101 |
+
<button id="generate-btn" class="btn btn--primary">
|
| 102 |
+
<svg class="btn__icon" viewBox="0 0 24 24" fill="currentColor">
|
| 103 |
+
<polygon points="5,3 19,12 5,21"/>
|
| 104 |
+
</svg>
|
| 105 |
+
<span class="btn__text">Generate Audio</span>
|
| 106 |
+
<div class="btn__loader" id="btn-loader"></div>
|
| 107 |
+
</button>
|
| 108 |
+
<button id="stop-btn" class="btn btn--secondary" disabled>
|
| 109 |
+
<svg class="btn__icon" viewBox="0 0 24 24" fill="currentColor">
|
| 110 |
+
<rect x="6" y="6" width="12" height="12" rx="1"/>
|
| 111 |
+
</svg>
|
| 112 |
+
<span class="btn__text">Stop</span>
|
| 113 |
+
</button>
|
| 114 |
+
</div>
|
| 115 |
+
</section>
|
| 116 |
+
|
| 117 |
+
<!-- Output Section: Visualizer + Metrics -->
|
| 118 |
+
<section class="output-section">
|
| 119 |
+
<div class="visualizer-panel">
|
| 120 |
+
<div class="visualizer-panel__header">
|
| 121 |
+
<span class="visualizer-panel__title">Audio Output</span>
|
| 122 |
+
<div class="status-indicator" id="status-indicator">
|
| 123 |
+
<span class="status-dot"></span>
|
| 124 |
+
<span class="status-text" id="stat-status">Idle</span>
|
| 125 |
+
</div>
|
| 126 |
+
</div>
|
| 127 |
+
<div class="visualizer-container">
|
| 128 |
+
<canvas id="visualizer-waveform"></canvas>
|
| 129 |
+
<canvas id="visualizer-bars" class="visualizer-bars"></canvas>
|
| 130 |
+
</div>
|
| 131 |
+
</div>
|
| 132 |
+
|
| 133 |
+
<div class="metrics-panel">
|
| 134 |
+
<h3 class="metrics-panel__title">Performance</h3>
|
| 135 |
+
|
| 136 |
+
<div class="metric">
|
| 137 |
+
<div class="metric__header">
|
| 138 |
+
<span class="metric__label">Time to First Byte</span>
|
| 139 |
+
<button class="metric__info" aria-label="TTFB explanation" data-tooltip="Time from request until first audio chunk is received">
|
| 140 |
+
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
| 141 |
+
<circle cx="12" cy="12" r="10"/>
|
| 142 |
+
<path d="M12 16v-4M12 8h.01"/>
|
| 143 |
+
</svg>
|
| 144 |
+
</button>
|
| 145 |
+
</div>
|
| 146 |
+
<div class="metric__value">
|
| 147 |
+
<span class="metric__number" id="stat-ttfb">--</span>
|
| 148 |
+
<span class="metric__unit">ms</span>
|
| 149 |
+
</div>
|
| 150 |
+
<div class="metric__bar">
|
| 151 |
+
<div class="metric__bar-fill" id="ttfb-bar"></div>
|
| 152 |
+
</div>
|
| 153 |
+
</div>
|
| 154 |
+
|
| 155 |
+
<div class="metric metric--highlight">
|
| 156 |
+
<div class="metric__header">
|
| 157 |
+
<span class="metric__label">Real-Time Factor</span>
|
| 158 |
+
<button class="metric__info" aria-label="RTFx explanation" data-tooltip="Audio duration divided by processing time. Values above 1x mean faster than real-time playback.">
|
| 159 |
+
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
| 160 |
+
<circle cx="12" cy="12" r="10"/>
|
| 161 |
+
<path d="M12 16v-4M12 8h.01"/>
|
| 162 |
+
</svg>
|
| 163 |
+
</button>
|
| 164 |
+
</div>
|
| 165 |
+
<div class="metric__value">
|
| 166 |
+
<span class="metric__number metric__number--large" id="stat-rtfx">--</span>
|
| 167 |
+
<span class="metric__unit">x</span>
|
| 168 |
+
</div>
|
| 169 |
+
<div class="metric__context" id="rtfx-context">>1x = faster than real-time</div>
|
| 170 |
+
<div class="metric__note" id="edge-opt-note" style="display: none;">(edge optimization applied)</div>
|
| 171 |
+
</div>
|
| 172 |
+
|
| 173 |
+
<div class="metric metric--status">
|
| 174 |
+
<span class="metric__label">Model</span>
|
| 175 |
+
<div class="model-status" id="model-status">
|
| 176 |
+
<span class="model-status__dot"></span>
|
| 177 |
+
<span class="model-status__text">Not loaded</span>
|
| 178 |
+
</div>
|
| 179 |
+
</div>
|
| 180 |
+
</div>
|
| 181 |
+
</section>
|
| 182 |
+
</main>
|
| 183 |
+
|
| 184 |
+
<footer class="footer">
|
| 185 |
+
<p><a href="https://github.com/kyutai-labs/pocket-tts" target="_blank" rel="noopener">Pocket TTS</a> by <a href="https://kyutai.org" target="_blank" rel="noopener">Kyutai</a>. This is an unofficial demo.</p>
|
| 186 |
+
<p class="footer__disclaimer">Do not use for voice cloning without consent, misinformation, fraud, or any harmful/illegal purpose. All liability disclaimed.</p>
|
| 187 |
+
</footer>
|
| 188 |
+
</div>
|
| 189 |
+
|
| 190 |
+
<script type="module" src="onnx-streaming.js?v=1"></script>
|
| 191 |
+
</body>
|
| 192 |
+
|
| 193 |
+
</html>
|
inference-worker.js
ADDED
|
@@ -0,0 +1,1044 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Pocket TTS ONNX Web Worker
|
| 2 |
+
console.log('Pocket TTS Worker Starting...');
|
| 3 |
+
self.postMessage({ type: 'status', status: 'Worker Thread Started', state: 'idle' });
|
| 4 |
+
|
| 5 |
+
// Load ONNX Runtime (will be loaded dynamically in loadModels for module worker)
|
| 6 |
+
let ort = null;
|
| 7 |
+
|
| 8 |
+
// Configuration
|
| 9 |
+
const MODELS = {
|
| 10 |
+
mimi_encoder: './onnx/mimi_encoder.onnx',
|
| 11 |
+
text_conditioner: './onnx/text_conditioner.onnx',
|
| 12 |
+
flow_lm_main: './onnx/flow_lm_main_int8.onnx',
|
| 13 |
+
flow_lm_flow: './onnx/flow_lm_flow_int8.onnx',
|
| 14 |
+
mimi_decoder: './onnx/mimi_decoder_int8.onnx',
|
| 15 |
+
tokenizer: './tokenizer.model',
|
| 16 |
+
voices: './voices.bin'
|
| 17 |
+
};
|
| 18 |
+
|
| 19 |
+
const SAMPLE_RATE = 24000;
|
| 20 |
+
const SAMPLES_PER_FRAME = 1920;
|
| 21 |
+
const MAX_FRAMES = 500;
|
| 22 |
+
|
| 23 |
+
// State
|
| 24 |
+
let mimiEncoderSession = null;
|
| 25 |
+
let textConditionerSession = null;
|
| 26 |
+
let flowLmMainSession = null;
|
| 27 |
+
let flowLmFlowSession = null;
|
| 28 |
+
let mimiDecoderSession = null;
|
| 29 |
+
let tokenizerProcessor = null;
|
| 30 |
+
let tokenizerModelB64 = null;
|
| 31 |
+
let predefinedVoices = {};
|
| 32 |
+
let stTensors = []; // Optimization: Pre-allocated s/t tensors for max LSD
|
| 33 |
+
let isGenerating = false;
|
| 34 |
+
let isReady = false;
|
| 35 |
+
|
| 36 |
+
// Dynamic LSD (Latent Solver/Diffusion steps)
|
| 37 |
+
const MAX_LSD = 10; // Default/max quality
|
| 38 |
+
let currentLSD = MAX_LSD;
|
| 39 |
+
|
| 40 |
+
// Current voice embedding (cached)
|
| 41 |
+
let currentVoiceEmbedding = null;
|
| 42 |
+
let currentVoiceName = null;
|
| 43 |
+
|
| 44 |
+
// Text preprocessing utilities
|
| 45 |
+
const ONES = ['', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen'];
|
| 46 |
+
const TENS = ['', '', 'twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy', 'eighty', 'ninety'];
|
| 47 |
+
const ORDINAL_ONES = ['', 'first', 'second', 'third', 'fourth', 'fifth', 'sixth', 'seventh', 'eighth', 'ninth', 'tenth', 'eleventh', 'twelfth', 'thirteenth', 'fourteenth', 'fifteenth', 'sixteenth', 'seventeenth', 'eighteenth', 'nineteenth'];
|
| 48 |
+
const ORDINAL_TENS = ['', '', 'twentieth', 'thirtieth', 'fortieth', 'fiftieth', 'sixtieth', 'seventieth', 'eightieth', 'ninetieth'];
|
| 49 |
+
|
| 50 |
+
function numberToWords(num, options = {}) {
|
| 51 |
+
const { andword = '', zero = 'zero', group = 0 } = options;
|
| 52 |
+
if (num === 0) return zero;
|
| 53 |
+
const convert = (n) => {
|
| 54 |
+
if (n < 20) return ONES[n];
|
| 55 |
+
if (n < 100) return TENS[Math.floor(n / 10)] + (n % 10 ? ' ' + ONES[n % 10] : '');
|
| 56 |
+
if (n < 1000) {
|
| 57 |
+
const remainder = n % 100;
|
| 58 |
+
return ONES[Math.floor(n / 100)] + ' hundred' + (remainder ? (andword ? ' ' + andword + ' ' : ' ') + convert(remainder) : '');
|
| 59 |
+
}
|
| 60 |
+
if (n < 1000000) {
|
| 61 |
+
const thousands = Math.floor(n / 1000);
|
| 62 |
+
const remainder = n % 1000;
|
| 63 |
+
return convert(thousands) + ' thousand' + (remainder ? ' ' + convert(remainder) : '');
|
| 64 |
+
}
|
| 65 |
+
if (n < 1000000000) {
|
| 66 |
+
const millions = Math.floor(n / 1000000);
|
| 67 |
+
const remainder = n % 1000000;
|
| 68 |
+
return convert(millions) + ' million' + (remainder ? ' ' + convert(remainder) : '');
|
| 69 |
+
}
|
| 70 |
+
const billions = Math.floor(n / 1000000000);
|
| 71 |
+
const remainder = n % 1000000000;
|
| 72 |
+
return convert(billions) + ' billion' + (remainder ? ' ' + convert(remainder) : '');
|
| 73 |
+
};
|
| 74 |
+
if (group === 2 && num > 1000 && num < 10000) {
|
| 75 |
+
const high = Math.floor(num / 100);
|
| 76 |
+
const low = num % 100;
|
| 77 |
+
if (low === 0) return convert(high) + ' hundred';
|
| 78 |
+
else if (low < 10) return convert(high) + ' ' + (zero === 'oh' ? 'oh' : zero) + ' ' + ONES[low];
|
| 79 |
+
else return convert(high) + ' ' + convert(low);
|
| 80 |
+
}
|
| 81 |
+
return convert(num);
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
function ordinalToWords(num) {
|
| 85 |
+
if (num < 20) return ORDINAL_ONES[num] || numberToWords(num) + 'th';
|
| 86 |
+
if (num < 100) {
|
| 87 |
+
const tens = Math.floor(num / 10);
|
| 88 |
+
const ones = num % 10;
|
| 89 |
+
if (ones === 0) return ORDINAL_TENS[tens];
|
| 90 |
+
return TENS[tens] + ' ' + ORDINAL_ONES[ones];
|
| 91 |
+
}
|
| 92 |
+
const cardinal = numberToWords(num);
|
| 93 |
+
if (cardinal.endsWith('y')) return cardinal.slice(0, -1) + 'ieth';
|
| 94 |
+
if (cardinal.endsWith('one')) return cardinal.slice(0, -3) + 'first';
|
| 95 |
+
if (cardinal.endsWith('two')) return cardinal.slice(0, -3) + 'second';
|
| 96 |
+
if (cardinal.endsWith('three')) return cardinal.slice(0, -5) + 'third';
|
| 97 |
+
if (cardinal.endsWith('ve')) return cardinal.slice(0, -2) + 'fth';
|
| 98 |
+
if (cardinal.endsWith('e')) return cardinal.slice(0, -1) + 'th';
|
| 99 |
+
if (cardinal.endsWith('t')) return cardinal + 'h';
|
| 100 |
+
return cardinal + 'th';
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
const UNICODE_MAP = {
|
| 104 |
+
'à': 'a', 'á': 'a', 'â': 'a', 'ã': 'a', 'ä': 'a', 'å': 'a', 'æ': 'ae', 'ç': 'c', 'è': 'e', 'é': 'e', 'ê': 'e', 'ë': 'e', 'ì': 'i', 'í': 'i', 'î': 'i', 'ï': 'i', 'ñ': 'n', 'ò': 'o', 'ó': 'o', 'ô': 'o', 'õ': 'o', 'ö': 'o', 'ø': 'o', 'ù': 'u', 'ú': 'u', 'û': 'u', 'ü': 'u', 'ý': 'y', 'ÿ': 'y', 'ß': 'ss', 'œ': 'oe', 'ð': 'd', 'þ': 'th', 'À': 'A', 'Á': 'A', 'Â': 'A', 'Ã': 'A', 'Ä': 'A', 'Å': 'A', 'Æ': 'AE', 'Ç': 'C', 'È': 'E', 'É': 'E', 'Ê': 'E', 'Ë': 'E', 'Ì': 'I', 'Í': 'I', 'Î': 'I', 'Ï': 'I', 'Ñ': 'N', 'Ò': 'O', 'Ó': 'O', 'Ô': 'O', 'Õ': 'O', 'Ö': 'O', 'Ø': 'O', 'Ù': 'U', 'Ú': 'U', 'Û': 'U', 'Ü': 'U', 'Ý': 'Y', '\u201C': '"', '\u201D': '"', '\u2018': "'", '\u2019': "'", '\u2026': '...', '\u2013': '-', '\u2014': '-'
|
| 105 |
+
};
|
| 106 |
+
|
| 107 |
+
function convertToAscii(text) {
|
| 108 |
+
return text.split('').map(c => UNICODE_MAP[c] || c).join('').normalize('NFD').replace(/[\u0300-\u036f]/g, '');
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
const ABBREVIATIONS = [
|
| 112 |
+
[/\bmrs\./gi, 'misuss'], [/\bms\./gi, 'miss'], [/\bmr\./gi, 'mister'], [/\bdr\./gi, 'doctor'], [/\bst\./gi, 'saint'], [/\bco\./gi, 'company'], [/\bjr\./gi, 'junior'], [/\bmaj\./gi, 'major'], [/\bgen\./gi, 'general'], [/\bdrs\./gi, 'doctors'], [/\brev\./gi, 'reverend'], [/\blt\./gi, 'lieutenant'], [/\bhon\./gi, 'honorable'], [/\bsgt\./gi, 'sergeant'], [/\bcapt\./gi, 'captain'], [/\besq\./gi, 'esquire'], [/\bltd\./gi, 'limited'], [/\bcol\./gi, 'colonel'], [/\bft\./gi, 'fort']
|
| 113 |
+
];
|
| 114 |
+
const CASED_ABBREVIATIONS = [
|
| 115 |
+
[/\bTTS\b/g, 'text to speech'], [/\bHz\b/g, 'hertz'], [/\bkHz\b/g, 'kilohertz'], [/\bKBs\b/g, 'kilobytes'], [/\bKB\b/g, 'kilobyte'], [/\bMBs\b/g, 'megabytes'], [/\bMB\b/g, 'megabyte'], [/\bGBs\b/g, 'gigabytes'], [/\bGB\b/g, 'gigabyte'], [/\bTBs\b/g, 'terabytes'], [/\bTB\b/g, 'terabyte'], [/\bAPIs\b/g, "a p i's"], [/\bAPI\b/g, 'a p i'], [/\bCLIs\b/g, "c l i's"], [/\bCLI\b/g, 'c l i'], [/\bCPUs\b/g, "c p u's"], [/\bCPU\b/g, 'c p u'], [/\bGPUs\b/g, "g p u's"], [/\bGPU\b/g, 'g p u'], [/\bAve\b/g, 'avenue'], [/\betc\b/g, 'etcetera']
|
| 116 |
+
];
|
| 117 |
+
|
| 118 |
+
function expandAbbreviations(text) {
|
| 119 |
+
for (const [regex, replacement] of [...ABBREVIATIONS, ...CASED_ABBREVIATIONS]) text = text.replace(regex, replacement);
|
| 120 |
+
return text;
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
const NUM_PREFIX_RE = /#(\d)/g;
|
| 124 |
+
const NUM_SUFFIX_RE = /(\d)([KMBT])/gi;
|
| 125 |
+
const NUM_LETTER_SPLIT_RE = /(\d)([a-z])|([a-z])(\d)/gi;
|
| 126 |
+
const COMMA_NUMBER_RE = /(\d[\d,]+\d)/g;
|
| 127 |
+
const DATE_RE = /(^|[^/])(\d\d?[/-]\d\d?[/-]\d\d(?:\d\d)?)($|[^/])/g;
|
| 128 |
+
const PHONE_NUMBER_RE = /\(?\d{3}\)?[-.\s]\d{3}[-.\s]?\d{4}/g;
|
| 129 |
+
const TIME_RE = /(\d\d?):(\d\d)(?::(\d\d))?/g;
|
| 130 |
+
const POUNDS_RE = /£([\d,]*\d+)/g;
|
| 131 |
+
const DOLLARS_RE = /\$([\d.,]*\d+)/g;
|
| 132 |
+
const DECIMAL_NUMBER_RE = /(\d+(?:\.\d+)+)/g;
|
| 133 |
+
const MULTIPLY_RE = /(\d)\s?\*\s?(\d)/g;
|
| 134 |
+
const DIVIDE_RE = /(\d)\s?\/\s?(\d)/g;
|
| 135 |
+
const ADD_RE = /(\d)\s?\+\s?(\d)/g;
|
| 136 |
+
const SUBTRACT_RE = /(\d)?\s?-\s?(\d)/g;
|
| 137 |
+
const FRACTION_RE = /(\d+)\/(\d+)/g;
|
| 138 |
+
const ORDINAL_RE = /(\d+)(st|nd|rd|th)/gi;
|
| 139 |
+
const NUMBER_RE = /\d+/g;
|
| 140 |
+
|
| 141 |
+
function normalizeNumbers(text) {
|
| 142 |
+
text = text.replace(NUM_PREFIX_RE, (_, d) => `number ${d}`);
|
| 143 |
+
text = text.replace(NUM_SUFFIX_RE, (_, num, suffix) => {
|
| 144 |
+
const map = { k: 'thousand', m: 'million', b: 'billion', t: 'trillion' };
|
| 145 |
+
return `${num} ${map[suffix.toLowerCase()]}`;
|
| 146 |
+
});
|
| 147 |
+
for (let i = 0; i < 2; i++) {
|
| 148 |
+
text = text.replace(NUM_LETTER_SPLIT_RE, (m, d1, l1, l2, d2) => {
|
| 149 |
+
if (d1 && l1) return `${d1} ${l1}`;
|
| 150 |
+
if (l2 && d2) return `${l2} ${d2}`;
|
| 151 |
+
return m;
|
| 152 |
+
});
|
| 153 |
+
}
|
| 154 |
+
text = text.replace(COMMA_NUMBER_RE, m => m.replace(/,/g, ''));
|
| 155 |
+
text = text.replace(DATE_RE, (_, pre, date, post) => pre + date.split(/[./-]/).join(' dash ') + post);
|
| 156 |
+
text = text.replace(PHONE_NUMBER_RE, m => {
|
| 157 |
+
const digits = m.replace(/\D/g, '');
|
| 158 |
+
return digits.length === 10 ? `${digits.slice(0, 3).split('').join(' ')}, ${digits.slice(3, 6).split('').join(' ')}, ${digits.slice(6).split('').join(' ')}` : m;
|
| 159 |
+
});
|
| 160 |
+
text = text.replace(TIME_RE, (_, hours, minutes, seconds) => {
|
| 161 |
+
const h = parseInt(hours), m = parseInt(minutes), s = seconds ? parseInt(seconds) : 0;
|
| 162 |
+
if (!seconds) return m === 0 ? (h === 0 ? '0' : h > 12 ? `${hours} minutes` : `${hours} o'clock`) : minutes.startsWith('0') ? `${hours} oh ${minutes[1]}` : `${hours} ${minutes}`;
|
| 163 |
+
let res = '';
|
| 164 |
+
if (h !== 0) res = hours + ' ' + (m === 0 ? 'oh oh' : minutes.startsWith('0') ? `oh ${minutes[1]}` : minutes);
|
| 165 |
+
else if (m !== 0) res = minutes + ' ' + (s === 0 ? 'oh oh' : seconds.startsWith('0') ? `oh ${seconds[1]}` : seconds);
|
| 166 |
+
else res = seconds;
|
| 167 |
+
return res + ' ' + (s === 0 ? '' : seconds.startsWith('0') ? `oh ${seconds[1]}` : seconds);
|
| 168 |
+
});
|
| 169 |
+
text = text.replace(POUNDS_RE, (_, amount) => `${amount.replace(/,/g, '')} pounds`);
|
| 170 |
+
text = text.replace(DOLLARS_RE, (_, amount) => {
|
| 171 |
+
const parts = amount.replace(/,/g, '').split('.');
|
| 172 |
+
const dollars = parseInt(parts[0]) || 0;
|
| 173 |
+
const cents = parts[1] ? parseInt(parts[1]) : 0;
|
| 174 |
+
if (dollars && cents) return `${dollars} ${dollars === 1 ? 'dollar' : 'dollars'}, ${cents} ${cents === 1 ? 'cent' : 'cents'}`;
|
| 175 |
+
if (dollars) return `${dollars} ${dollars === 1 ? 'dollar' : 'dollars'}`;
|
| 176 |
+
if (cents) return `${cents} ${cents === 1 ? 'cent' : 'cents'}`;
|
| 177 |
+
return 'zero dollars';
|
| 178 |
+
});
|
| 179 |
+
text = text.replace(DECIMAL_NUMBER_RE, m => m.split('.').join(' point ').split('').join(' '));
|
| 180 |
+
text = text.replace(MULTIPLY_RE, '$1 times $2');
|
| 181 |
+
text = text.replace(DIVIDE_RE, '$1 over $2');
|
| 182 |
+
text = text.replace(ADD_RE, '$1 plus $2');
|
| 183 |
+
text = text.replace(SUBTRACT_RE, (_, a, b) => (a ? a : '') + ' minus ' + b);
|
| 184 |
+
text = text.replace(FRACTION_RE, '$1 over $2');
|
| 185 |
+
text = text.replace(ORDINAL_RE, (_, num) => ordinalToWords(parseInt(num)));
|
| 186 |
+
text = text.replace(NUMBER_RE, m => {
|
| 187 |
+
const num = parseInt(m);
|
| 188 |
+
if (num > 1000 && num < 3000) {
|
| 189 |
+
if (num === 2000) return 'two thousand';
|
| 190 |
+
if (num > 2000 && num < 2010) return 'two thousand ' + numberToWords(num % 100);
|
| 191 |
+
if (num % 100 === 0) return numberToWords(Math.floor(num / 100)) + ' hundred';
|
| 192 |
+
return numberToWords(num, { zero: 'oh', group: 2 });
|
| 193 |
+
}
|
| 194 |
+
return numberToWords(num);
|
| 195 |
+
});
|
| 196 |
+
return text;
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
const SPECIAL_CHARACTERS = [
|
| 200 |
+
[/@/g, ' at '], [/&/g, ' and '], [/%/g, ' percent '], [/:/g, '.'], [/;/g, ','], [/\+/g, ' plus '], [/\\/g, ' backslash '], [/~/g, ' about '], [/(^| )<3/g, ' heart '], [/<=/g, ' less than or equal to '], [/>=/g, ' greater than or equal to '], [/</g, ' less than '], [/>/g, ' greater than '], [/=/g, ' equals '], [/\//g, ' slash '], [/_/g, ' '],
|
| 201 |
+
];
|
| 202 |
+
const LINK_HEADER_RE = /https?:\/\//gi;
|
| 203 |
+
const DASH_RE = /(.) - (.)/g;
|
| 204 |
+
const DOT_RE = /([A-Z])\.([A-Z])/gi;
|
| 205 |
+
const PARENTHESES_RE = /[\(\[\{][^\)\]\}]*[\)\]\}](.)?/g;
|
| 206 |
+
|
| 207 |
+
function normalizeSpecial(text) {
|
| 208 |
+
text = text.replace(LINK_HEADER_RE, 'h t t p s colon slash slash ');
|
| 209 |
+
text = text.replace(DASH_RE, '$1, $2');
|
| 210 |
+
text = text.replace(DOT_RE, '$1 dot $2');
|
| 211 |
+
text = text.replace(PARENTHESES_RE, (m, after) => {
|
| 212 |
+
let result = m.replace(/[\(\[\{]/g, ', ').replace(/[\)\]\}]/g, ', ');
|
| 213 |
+
if (after && /[$.!?,]/.test(after)) result = result.slice(0, -2) + after;
|
| 214 |
+
return result;
|
| 215 |
+
});
|
| 216 |
+
return text;
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
function expandSpecialCharacters(text) {
|
| 220 |
+
for (const [regex, replacement] of SPECIAL_CHARACTERS) text = text.replace(regex, replacement);
|
| 221 |
+
return text;
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
function collapseWhitespace(text) {
|
| 225 |
+
return text.replace(/\s+/g, ' ').replace(/ ([.\?!,])/g, '$1');
|
| 226 |
+
}
|
| 227 |
+
|
| 228 |
+
function dedupPunctuation(text) {
|
| 229 |
+
return text.replace(/\.\.\.+/g, '[ELLIPSIS]').replace(/,+/g, ',').replace(/[.,]*\.[.,]*/g, '.').replace(/[.,!]*![.,!]*/g, '!').replace(/[.,!?]*\?[.,!?]*/g, '?').replace(/\[ELLIPSIS\]/g, '...');
|
| 230 |
+
}
|
| 231 |
+
|
| 232 |
+
// Split text into sentence chunks (max 50 tokens each)
|
| 233 |
+
function splitIntoBestSentences(text) {
|
| 234 |
+
const preparedText = prepareText(text);
|
| 235 |
+
if (!preparedText) return [];
|
| 236 |
+
|
| 237 |
+
const tokenIds = tokenizerProcessor.encodeIds(preparedText);
|
| 238 |
+
|
| 239 |
+
// Get end-of-sentence token IDs
|
| 240 |
+
const eosTokenIds = tokenizerProcessor.encodeIds('.!...?');
|
| 241 |
+
const eosSet = new Set(eosTokenIds);
|
| 242 |
+
|
| 243 |
+
// Find sentence boundaries
|
| 244 |
+
const endOfSentenceIndices = [0];
|
| 245 |
+
let previousWasEos = false;
|
| 246 |
+
|
| 247 |
+
for (let i = 0; i < tokenIds.length; i++) {
|
| 248 |
+
if (eosSet.has(tokenIds[i])) {
|
| 249 |
+
previousWasEos = true;
|
| 250 |
+
} else {
|
| 251 |
+
if (previousWasEos) {
|
| 252 |
+
endOfSentenceIndices.push(i);
|
| 253 |
+
}
|
| 254 |
+
previousWasEos = false;
|
| 255 |
+
}
|
| 256 |
+
}
|
| 257 |
+
endOfSentenceIndices.push(tokenIds.length);
|
| 258 |
+
|
| 259 |
+
// Reconstruct sentences with token counts
|
| 260 |
+
const sentences = [];
|
| 261 |
+
for (let i = 0; i < endOfSentenceIndices.length - 1; i++) {
|
| 262 |
+
const start = endOfSentenceIndices[i];
|
| 263 |
+
const end = endOfSentenceIndices[i + 1];
|
| 264 |
+
const sentenceTokens = tokenIds.slice(start, end);
|
| 265 |
+
const sentenceText = tokenizerProcessor.decodeIds(sentenceTokens).trim();
|
| 266 |
+
sentences.push({ tokens: end - start, text: sentenceText });
|
| 267 |
+
}
|
| 268 |
+
|
| 269 |
+
// Merge into chunks of max 50 tokens
|
| 270 |
+
const MAX_TOKENS = 50;
|
| 271 |
+
const chunks = [];
|
| 272 |
+
let currentChunk = '';
|
| 273 |
+
let currentTokens = 0;
|
| 274 |
+
|
| 275 |
+
for (const { tokens, text } of sentences) {
|
| 276 |
+
if (currentChunk === '') {
|
| 277 |
+
currentChunk = text;
|
| 278 |
+
currentTokens = tokens;
|
| 279 |
+
continue;
|
| 280 |
+
}
|
| 281 |
+
|
| 282 |
+
if (currentTokens + tokens > MAX_TOKENS) {
|
| 283 |
+
chunks.push(currentChunk.trim());
|
| 284 |
+
currentChunk = text;
|
| 285 |
+
currentTokens = tokens;
|
| 286 |
+
} else {
|
| 287 |
+
currentChunk += ' ' + text;
|
| 288 |
+
currentTokens += tokens;
|
| 289 |
+
}
|
| 290 |
+
}
|
| 291 |
+
|
| 292 |
+
if (currentChunk !== '') {
|
| 293 |
+
chunks.push(currentChunk.trim());
|
| 294 |
+
}
|
| 295 |
+
|
| 296 |
+
return chunks;
|
| 297 |
+
}
|
| 298 |
+
|
| 299 |
+
// Pocket TTS specific text preprocessing
|
| 300 |
+
function prepareText(text) {
|
| 301 |
+
text = text.trim();
|
| 302 |
+
if (!text) return '';
|
| 303 |
+
|
| 304 |
+
// Convert to ASCII
|
| 305 |
+
text = convertToAscii(text);
|
| 306 |
+
|
| 307 |
+
// Normalize numbers first
|
| 308 |
+
text = normalizeNumbers(text);
|
| 309 |
+
|
| 310 |
+
// Normalize special characters
|
| 311 |
+
text = normalizeSpecial(text);
|
| 312 |
+
|
| 313 |
+
// Expand abbreviations
|
| 314 |
+
text = expandAbbreviations(text);
|
| 315 |
+
|
| 316 |
+
// Expand special characters
|
| 317 |
+
text = expandSpecialCharacters(text);
|
| 318 |
+
|
| 319 |
+
// Collapse whitespace
|
| 320 |
+
text = collapseWhitespace(text);
|
| 321 |
+
|
| 322 |
+
// Deduplicate punctuation
|
| 323 |
+
text = dedupPunctuation(text);
|
| 324 |
+
|
| 325 |
+
// Final cleanup
|
| 326 |
+
text = text.trim();
|
| 327 |
+
|
| 328 |
+
// Ensure proper punctuation at end
|
| 329 |
+
if (text && text[text.length - 1].match(/[a-zA-Z0-9]/)) {
|
| 330 |
+
text = text + '.';
|
| 331 |
+
}
|
| 332 |
+
|
| 333 |
+
// Capitalize first letter
|
| 334 |
+
if (text && !text[0].match(/[A-Z]/)) {
|
| 335 |
+
text = text[0].toUpperCase() + text.slice(1);
|
| 336 |
+
}
|
| 337 |
+
|
| 338 |
+
return text;
|
| 339 |
+
}
|
| 340 |
+
|
| 341 |
+
// ----------------------------------------------------------------------------
|
| 342 |
+
// Worker Logic
|
| 343 |
+
// ----------------------------------------------------------------------------
|
| 344 |
+
|
| 345 |
+
self.onmessage = async (e) => {
|
| 346 |
+
const { type, data } = e.data;
|
| 347 |
+
console.log('Worker received message:', type);
|
| 348 |
+
|
| 349 |
+
if (type === 'load') {
|
| 350 |
+
try {
|
| 351 |
+
await loadModels();
|
| 352 |
+
postMessage({ type: 'loaded' });
|
| 353 |
+
} catch (err) {
|
| 354 |
+
postMessage({ type: 'error', error: err.toString() });
|
| 355 |
+
}
|
| 356 |
+
} else if (type === 'generate') {
|
| 357 |
+
if (!isReady) {
|
| 358 |
+
postMessage({ type: 'error', error: 'Models are not loaded yet.' });
|
| 359 |
+
return;
|
| 360 |
+
}
|
| 361 |
+
if (isGenerating) return;
|
| 362 |
+
try {
|
| 363 |
+
await startGeneration(data.text, data.voice);
|
| 364 |
+
} catch (err) {
|
| 365 |
+
console.error('Generation Error:', err);
|
| 366 |
+
postMessage({ type: 'error', error: err.toString() });
|
| 367 |
+
}
|
| 368 |
+
} else if (type === 'encode_voice') {
|
| 369 |
+
if (!isReady) {
|
| 370 |
+
postMessage({ type: 'error', error: 'Models are not loaded yet.' });
|
| 371 |
+
return;
|
| 372 |
+
}
|
| 373 |
+
try {
|
| 374 |
+
const embedding = await encodeVoiceAudio(data.audio);
|
| 375 |
+
currentVoiceEmbedding = embedding;
|
| 376 |
+
currentVoiceName = 'custom';
|
| 377 |
+
postMessage({ type: 'voice_encoded', voiceName: 'custom' });
|
| 378 |
+
} catch (err) {
|
| 379 |
+
console.error('Voice encoding error:', err);
|
| 380 |
+
postMessage({ type: 'error', error: 'Failed to encode voice: ' + err.toString() });
|
| 381 |
+
}
|
| 382 |
+
} else if (type === 'set_voice') {
|
| 383 |
+
if (!isReady) {
|
| 384 |
+
postMessage({ type: 'error', error: 'Models are not loaded yet.' });
|
| 385 |
+
return;
|
| 386 |
+
}
|
| 387 |
+
if (data.voiceName === 'custom') {
|
| 388 |
+
// Custom voice already set via encode_voice
|
| 389 |
+
postMessage({ type: 'voice_set', voiceName: 'custom' });
|
| 390 |
+
} else if (predefinedVoices[data.voiceName]) {
|
| 391 |
+
currentVoiceEmbedding = predefinedVoices[data.voiceName];
|
| 392 |
+
currentVoiceName = data.voiceName;
|
| 393 |
+
postMessage({ type: 'voice_set', voiceName: data.voiceName });
|
| 394 |
+
} else {
|
| 395 |
+
postMessage({ type: 'error', error: `Unknown voice: ${data.voiceName}` });
|
| 396 |
+
}
|
| 397 |
+
} else if (type === 'set_lsd') {
|
| 398 |
+
// Dynamic LSD adjustment for edge devices
|
| 399 |
+
const newLSD = Math.max(1, Math.min(MAX_LSD, data.lsd));
|
| 400 |
+
if (newLSD !== currentLSD) {
|
| 401 |
+
console.log(`LSD adjusted: ${currentLSD} → ${newLSD}`);
|
| 402 |
+
currentLSD = newLSD;
|
| 403 |
+
}
|
| 404 |
+
} else if (type === 'stop') {
|
| 405 |
+
isGenerating = false;
|
| 406 |
+
postMessage({ type: 'status', status: 'Stopped', state: 'idle' });
|
| 407 |
+
}
|
| 408 |
+
};
|
| 409 |
+
|
| 410 |
+
async function loadModels() {
|
| 411 |
+
if (mimiEncoderSession) return;
|
| 412 |
+
|
| 413 |
+
postMessage({ type: 'status', status: 'Loading ONNX Runtime...', state: 'loading' });
|
| 414 |
+
|
| 415 |
+
// Load ONNX Runtime dynamically
|
| 416 |
+
const version = '1.20.0';
|
| 417 |
+
const cdnBase = `https://cdn.jsdelivr.net/npm/onnxruntime-web@${version}/dist/`;
|
| 418 |
+
|
| 419 |
+
try {
|
| 420 |
+
const ortModule = await import(`https://cdn.jsdelivr.net/npm/onnxruntime-web@${version}/dist/ort.min.mjs`);
|
| 421 |
+
ort = ortModule.default || ortModule;
|
| 422 |
+
} catch (e) {
|
| 423 |
+
console.error('Failed to load ONNX Runtime:', e);
|
| 424 |
+
throw new Error('Failed to load ONNX Runtime: ' + e.message);
|
| 425 |
+
}
|
| 426 |
+
|
| 427 |
+
if (!ort) {
|
| 428 |
+
throw new Error('ONNX Runtime failed to load');
|
| 429 |
+
}
|
| 430 |
+
|
| 431 |
+
postMessage({ type: 'status', status: 'Loading models...', state: 'loading' });
|
| 432 |
+
|
| 433 |
+
// Configure WASM Paths
|
| 434 |
+
ort.env.wasm.wasmPaths = cdnBase;
|
| 435 |
+
|
| 436 |
+
// Enable SIMD for significant performance boost (2-4x faster)
|
| 437 |
+
ort.env.wasm.simd = true;
|
| 438 |
+
|
| 439 |
+
// Configure multi-threading
|
| 440 |
+
if (!self.crossOriginIsolated) {
|
| 441 |
+
console.warn('Environment is not cross-origin isolated. Disabling WASM multi-threading.');
|
| 442 |
+
console.warn('To enable multi-threading, serve with headers:');
|
| 443 |
+
console.warn(' Cross-Origin-Opener-Policy: same-origin');
|
| 444 |
+
console.warn(' Cross-Origin-Embedder-Policy: require-corp');
|
| 445 |
+
ort.env.wasm.numThreads = 1;
|
| 446 |
+
} else {
|
| 447 |
+
const threads = Math.min(navigator.hardwareConcurrency || 4, 8);
|
| 448 |
+
ort.env.wasm.numThreads = threads;
|
| 449 |
+
console.log(`Multi-threading enabled with ${threads} threads`);
|
| 450 |
+
}
|
| 451 |
+
|
| 452 |
+
console.log(`ONNX Runtime Web config: simd=${ort.env.wasm.simd}, threads=${ort.env.wasm.numThreads}`);
|
| 453 |
+
|
| 454 |
+
try {
|
| 455 |
+
const sessionOptions = {
|
| 456 |
+
executionProviders: ['wasm'],
|
| 457 |
+
graphOptimizationLevel: 'all'
|
| 458 |
+
};
|
| 459 |
+
|
| 460 |
+
// Load all models in parallel
|
| 461 |
+
postMessage({ type: 'status', status: 'Loading MIMI encoder...', state: 'loading' });
|
| 462 |
+
console.log('Loading MIMI encoder...');
|
| 463 |
+
|
| 464 |
+
const [encoderRes, textCondRes, flowMainRes, flowFlowRes, decoderRes] = await Promise.all([
|
| 465 |
+
ort.InferenceSession.create(MODELS.mimi_encoder, sessionOptions),
|
| 466 |
+
ort.InferenceSession.create(MODELS.text_conditioner, sessionOptions),
|
| 467 |
+
ort.InferenceSession.create(MODELS.flow_lm_main, sessionOptions),
|
| 468 |
+
ort.InferenceSession.create(MODELS.flow_lm_flow, sessionOptions),
|
| 469 |
+
ort.InferenceSession.create(MODELS.mimi_decoder, sessionOptions)
|
| 470 |
+
]);
|
| 471 |
+
|
| 472 |
+
mimiEncoderSession = encoderRes;
|
| 473 |
+
textConditionerSession = textCondRes;
|
| 474 |
+
flowLmMainSession = flowMainRes;
|
| 475 |
+
flowLmFlowSession = flowFlowRes;
|
| 476 |
+
mimiDecoderSession = decoderRes;
|
| 477 |
+
|
| 478 |
+
console.log('All models loaded successfully');
|
| 479 |
+
console.log('Flow LM Main inputs:', flowLmMainSession.inputNames);
|
| 480 |
+
console.log('Flow LM Main outputs:', flowLmMainSession.outputNames);
|
| 481 |
+
console.log('MIMI decoder inputs:', mimiDecoderSession.inputNames);
|
| 482 |
+
console.log('MIMI decoder outputs:', mimiDecoderSession.outputNames);
|
| 483 |
+
|
| 484 |
+
// Load tokenizer
|
| 485 |
+
postMessage({ type: 'status', status: 'Loading tokenizer...', state: 'loading' });
|
| 486 |
+
console.log('Loading tokenizer...');
|
| 487 |
+
|
| 488 |
+
const tokenizerResponse = await fetch(MODELS.tokenizer);
|
| 489 |
+
if (!tokenizerResponse.ok) {
|
| 490 |
+
throw new Error(`Failed to load tokenizer: ${tokenizerResponse.statusText}`);
|
| 491 |
+
}
|
| 492 |
+
const tokenizerBuffer = await tokenizerResponse.arrayBuffer();
|
| 493 |
+
tokenizerModelB64 = btoa(String.fromCharCode(...new Uint8Array(tokenizerBuffer)));
|
| 494 |
+
|
| 495 |
+
// Import and initialize sentencepiece processor
|
| 496 |
+
const spModule = await import('./sentencepiece.js?v=2');
|
| 497 |
+
const SentencePieceProcessor = spModule.SentencePieceProcessor;
|
| 498 |
+
if (!SentencePieceProcessor) {
|
| 499 |
+
throw new Error('SentencePieceProcessor not found in sentencepiece.js');
|
| 500 |
+
}
|
| 501 |
+
tokenizerProcessor = new SentencePieceProcessor();
|
| 502 |
+
await tokenizerProcessor.loadFromB64StringModel(tokenizerModelB64);
|
| 503 |
+
console.log('Tokenizer loaded');
|
| 504 |
+
|
| 505 |
+
// Load predefined voices
|
| 506 |
+
postMessage({ type: 'status', status: 'Loading voices...', state: 'loading' });
|
| 507 |
+
console.log('Loading predefined voices...');
|
| 508 |
+
|
| 509 |
+
try {
|
| 510 |
+
const voicesResponse = await fetch(MODELS.voices);
|
| 511 |
+
if (voicesResponse.ok) {
|
| 512 |
+
const voicesData = await voicesResponse.arrayBuffer();
|
| 513 |
+
predefinedVoices = parseVoicesBin(voicesData);
|
| 514 |
+
console.log('Loaded voices:', Object.keys(predefinedVoices));
|
| 515 |
+
|
| 516 |
+
// Set default voice
|
| 517 |
+
if (predefinedVoices['cosette']) {
|
| 518 |
+
currentVoiceEmbedding = predefinedVoices['cosette'];
|
| 519 |
+
currentVoiceName = 'cosette';
|
| 520 |
+
} else {
|
| 521 |
+
// Use first available voice
|
| 522 |
+
const firstVoice = Object.keys(predefinedVoices)[0];
|
| 523 |
+
if (firstVoice) {
|
| 524 |
+
currentVoiceEmbedding = predefinedVoices[firstVoice];
|
| 525 |
+
currentVoiceName = firstVoice;
|
| 526 |
+
}
|
| 527 |
+
}
|
| 528 |
+
}
|
| 529 |
+
} catch (e) {
|
| 530 |
+
console.warn('Could not load predefined voices:', e);
|
| 531 |
+
}
|
| 532 |
+
|
| 533 |
+
// Send list of available voices
|
| 534 |
+
postMessage({
|
| 535 |
+
type: 'voices_loaded',
|
| 536 |
+
voices: Object.keys(predefinedVoices),
|
| 537 |
+
defaultVoice: currentVoiceName
|
| 538 |
+
});
|
| 539 |
+
|
| 540 |
+
// Pre-allocate s/t tensors for Flow Matching Loop (Optimization)
|
| 541 |
+
// Pre-allocate for MAX_LSD to support dynamic switching
|
| 542 |
+
console.log(`Pre-allocating Flow Matching tensors for LSD 1-${MAX_LSD}...`);
|
| 543 |
+
stTensors = {};
|
| 544 |
+
|
| 545 |
+
for (let lsd = 1; lsd <= MAX_LSD; lsd++) {
|
| 546 |
+
stTensors[lsd] = [];
|
| 547 |
+
const dt = 1.0 / lsd;
|
| 548 |
+
for (let j = 0; j < lsd; j++) {
|
| 549 |
+
const s = j / lsd;
|
| 550 |
+
const t = s + dt;
|
| 551 |
+
stTensors[lsd].push({
|
| 552 |
+
s: new ort.Tensor('float32', new Float32Array([s]), [1, 1]),
|
| 553 |
+
t: new ort.Tensor('float32', new Float32Array([t]), [1, 1])
|
| 554 |
+
});
|
| 555 |
+
}
|
| 556 |
+
}
|
| 557 |
+
|
| 558 |
+
isReady = true;
|
| 559 |
+
postMessage({ type: 'status', status: 'Ready', state: 'idle' });
|
| 560 |
+
postMessage({ type: 'model_status', status: 'ready', text: 'Ready' });
|
| 561 |
+
postMessage({ type: 'loaded' });
|
| 562 |
+
|
| 563 |
+
} catch (err) {
|
| 564 |
+
console.error('Model load failed:', err);
|
| 565 |
+
throw err;
|
| 566 |
+
}
|
| 567 |
+
}
|
| 568 |
+
|
| 569 |
+
function parseVoicesBin(buffer) {
|
| 570 |
+
// Simple binary format:
|
| 571 |
+
// Header: 4 bytes (uint32) = number of voices
|
| 572 |
+
// For each voice:
|
| 573 |
+
// - 32 bytes: voice name (null-terminated string)
|
| 574 |
+
// - 4 bytes (uint32): number of frames
|
| 575 |
+
// - 4 bytes (uint32): embedding dim (1024)
|
| 576 |
+
// - frames * dim * 4 bytes: float32 embeddings
|
| 577 |
+
|
| 578 |
+
const voices = {};
|
| 579 |
+
const view = new DataView(buffer);
|
| 580 |
+
let offset = 0;
|
| 581 |
+
|
| 582 |
+
const numVoices = view.getUint32(offset, true);
|
| 583 |
+
offset += 4;
|
| 584 |
+
|
| 585 |
+
for (let i = 0; i < numVoices; i++) {
|
| 586 |
+
// Read voice name
|
| 587 |
+
const nameBytes = new Uint8Array(buffer, offset, 32);
|
| 588 |
+
const nameEnd = nameBytes.indexOf(0);
|
| 589 |
+
const name = new TextDecoder().decode(nameBytes.subarray(0, nameEnd > 0 ? nameEnd : 32)).trim();
|
| 590 |
+
offset += 32;
|
| 591 |
+
|
| 592 |
+
// Read dimensions
|
| 593 |
+
const numFrames = view.getUint32(offset, true);
|
| 594 |
+
offset += 4;
|
| 595 |
+
const embDim = view.getUint32(offset, true);
|
| 596 |
+
offset += 4;
|
| 597 |
+
|
| 598 |
+
// Read embeddings
|
| 599 |
+
const embSize = numFrames * embDim;
|
| 600 |
+
const embeddings = new Float32Array(buffer, offset, embSize);
|
| 601 |
+
offset += embSize * 4;
|
| 602 |
+
|
| 603 |
+
// Store as [1, numFrames, embDim] shaped array info
|
| 604 |
+
voices[name] = {
|
| 605 |
+
data: new Float32Array(embeddings),
|
| 606 |
+
shape: [1, numFrames, embDim]
|
| 607 |
+
};
|
| 608 |
+
|
| 609 |
+
console.log(`Loaded voice '${name}': ${numFrames} frames, ${embDim} dim`);
|
| 610 |
+
}
|
| 611 |
+
|
| 612 |
+
return voices;
|
| 613 |
+
}
|
| 614 |
+
|
| 615 |
+
async function encodeVoiceAudio(audioData) {
|
| 616 |
+
// audioData should be Float32Array at 24kHz, mono
|
| 617 |
+
// Reshape to [1, 1, samples]
|
| 618 |
+
const input = new ort.Tensor('float32', audioData, [1, 1, audioData.length]);
|
| 619 |
+
|
| 620 |
+
const outputs = await mimiEncoderSession.run({ audio: input });
|
| 621 |
+
const embeddings = outputs[mimiEncoderSession.outputNames[0]];
|
| 622 |
+
|
| 623 |
+
return {
|
| 624 |
+
data: new Float32Array(embeddings.data),
|
| 625 |
+
shape: embeddings.dims
|
| 626 |
+
};
|
| 627 |
+
}
|
| 628 |
+
|
| 629 |
+
// Hardcoded state shapes extracted from ONNX model metadata
|
| 630 |
+
// These are the initial shapes - dynamic dimensions start at 0
|
| 631 |
+
const FLOW_LM_STATE_SHAPES = {
|
| 632 |
+
// KV cache layers: [kv=2, batch=1, max_seq=1000, heads=16, head_dim=64]
|
| 633 |
+
state_0: { shape: [2, 1, 1000, 16, 64], dtype: 'float32' },
|
| 634 |
+
state_1: { shape: [0], dtype: 'float32' }, // dynamic
|
| 635 |
+
state_2: { shape: [1], dtype: 'int64' }, // step counter
|
| 636 |
+
state_3: { shape: [2, 1, 1000, 16, 64], dtype: 'float32' },
|
| 637 |
+
state_4: { shape: [0], dtype: 'float32' },
|
| 638 |
+
state_5: { shape: [1], dtype: 'int64' },
|
| 639 |
+
state_6: { shape: [2, 1, 1000, 16, 64], dtype: 'float32' },
|
| 640 |
+
state_7: { shape: [0], dtype: 'float32' },
|
| 641 |
+
state_8: { shape: [1], dtype: 'int64' },
|
| 642 |
+
state_9: { shape: [2, 1, 1000, 16, 64], dtype: 'float32' },
|
| 643 |
+
state_10: { shape: [0], dtype: 'float32' },
|
| 644 |
+
state_11: { shape: [1], dtype: 'int64' },
|
| 645 |
+
state_12: { shape: [2, 1, 1000, 16, 64], dtype: 'float32' },
|
| 646 |
+
state_13: { shape: [0], dtype: 'float32' },
|
| 647 |
+
state_14: { shape: [1], dtype: 'int64' },
|
| 648 |
+
state_15: { shape: [2, 1, 1000, 16, 64], dtype: 'float32' },
|
| 649 |
+
state_16: { shape: [0], dtype: 'float32' },
|
| 650 |
+
state_17: { shape: [1], dtype: 'int64' },
|
| 651 |
+
};
|
| 652 |
+
|
| 653 |
+
const MIMI_DECODER_STATE_SHAPES = {
|
| 654 |
+
state_0: { shape: [1], dtype: 'bool' },
|
| 655 |
+
state_1: { shape: [1, 512, 6], dtype: 'float32' },
|
| 656 |
+
state_2: { shape: [1], dtype: 'bool' },
|
| 657 |
+
state_3: { shape: [1, 64, 2], dtype: 'float32' },
|
| 658 |
+
state_4: { shape: [1, 256, 6], dtype: 'float32' },
|
| 659 |
+
state_5: { shape: [1], dtype: 'bool' },
|
| 660 |
+
state_6: { shape: [1, 256, 2], dtype: 'float32' },
|
| 661 |
+
state_7: { shape: [1], dtype: 'bool' },
|
| 662 |
+
state_8: { shape: [1, 128, 0], dtype: 'float32' }, // dynamic
|
| 663 |
+
state_9: { shape: [1, 128, 5], dtype: 'float32' },
|
| 664 |
+
state_10: { shape: [1], dtype: 'bool' },
|
| 665 |
+
state_11: { shape: [1, 128, 2], dtype: 'float32' },
|
| 666 |
+
state_12: { shape: [1], dtype: 'bool' },
|
| 667 |
+
state_13: { shape: [1, 64, 0], dtype: 'float32' }, // dynamic
|
| 668 |
+
state_14: { shape: [1, 64, 4], dtype: 'float32' },
|
| 669 |
+
state_15: { shape: [1], dtype: 'bool' },
|
| 670 |
+
state_16: { shape: [1, 64, 2], dtype: 'float32' },
|
| 671 |
+
state_17: { shape: [1], dtype: 'bool' },
|
| 672 |
+
state_18: { shape: [1, 32, 0], dtype: 'float32' }, // dynamic
|
| 673 |
+
state_19: { shape: [2, 1, 8, 1000, 64], dtype: 'float32' },
|
| 674 |
+
state_20: { shape: [1], dtype: 'int64' },
|
| 675 |
+
state_21: { shape: [1], dtype: 'int64' },
|
| 676 |
+
state_22: { shape: [2, 1, 8, 1000, 64], dtype: 'float32' },
|
| 677 |
+
state_23: { shape: [1], dtype: 'int64' },
|
| 678 |
+
state_24: { shape: [1], dtype: 'int64' },
|
| 679 |
+
state_25: { shape: [1], dtype: 'bool' },
|
| 680 |
+
state_26: { shape: [1, 512, 16], dtype: 'float32' },
|
| 681 |
+
state_27: { shape: [1], dtype: 'bool' },
|
| 682 |
+
state_28: { shape: [1, 1, 6], dtype: 'float32' },
|
| 683 |
+
state_29: { shape: [1], dtype: 'bool' },
|
| 684 |
+
state_30: { shape: [1, 64, 2], dtype: 'float32' },
|
| 685 |
+
state_31: { shape: [1], dtype: 'bool' },
|
| 686 |
+
state_32: { shape: [1, 32, 0], dtype: 'float32' }, // dynamic
|
| 687 |
+
state_33: { shape: [1], dtype: 'bool' },
|
| 688 |
+
state_34: { shape: [1, 512, 2], dtype: 'float32' },
|
| 689 |
+
state_35: { shape: [1], dtype: 'bool' },
|
| 690 |
+
state_36: { shape: [1, 64, 4], dtype: 'float32' },
|
| 691 |
+
state_37: { shape: [1], dtype: 'bool' },
|
| 692 |
+
state_38: { shape: [1, 128, 2], dtype: 'float32' },
|
| 693 |
+
state_39: { shape: [1], dtype: 'bool' },
|
| 694 |
+
state_40: { shape: [1, 64, 0], dtype: 'float32' }, // dynamic
|
| 695 |
+
state_41: { shape: [1], dtype: 'bool' },
|
| 696 |
+
state_42: { shape: [1, 128, 5], dtype: 'float32' },
|
| 697 |
+
state_43: { shape: [1], dtype: 'bool' },
|
| 698 |
+
state_44: { shape: [1, 256, 2], dtype: 'float32' },
|
| 699 |
+
state_45: { shape: [1], dtype: 'bool' },
|
| 700 |
+
state_46: { shape: [1, 128, 0], dtype: 'float32' }, // dynamic
|
| 701 |
+
state_47: { shape: [1], dtype: 'bool' },
|
| 702 |
+
state_48: { shape: [1, 256, 6], dtype: 'float32' },
|
| 703 |
+
state_49: { shape: [2, 1, 8, 1000, 64], dtype: 'float32' },
|
| 704 |
+
state_50: { shape: [1], dtype: 'int64' },
|
| 705 |
+
state_51: { shape: [1], dtype: 'int64' },
|
| 706 |
+
state_52: { shape: [2, 1, 8, 1000, 64], dtype: 'float32' },
|
| 707 |
+
state_53: { shape: [1], dtype: 'int64' },
|
| 708 |
+
state_54: { shape: [1], dtype: 'int64' },
|
| 709 |
+
state_55: { shape: [1, 512, 16], dtype: 'float32' },
|
| 710 |
+
};
|
| 711 |
+
|
| 712 |
+
function initState(session, stateShapes) {
|
| 713 |
+
/**
|
| 714 |
+
* Initialize state tensors for a stateful ONNX model using hardcoded shapes.
|
| 715 |
+
*/
|
| 716 |
+
const state = {};
|
| 717 |
+
|
| 718 |
+
for (const inputName of session.inputNames) {
|
| 719 |
+
if (inputName.startsWith('state_')) {
|
| 720 |
+
const stateInfo = stateShapes[inputName];
|
| 721 |
+
if (!stateInfo) {
|
| 722 |
+
console.warn(`Unknown state input: ${inputName}, skipping`);
|
| 723 |
+
continue;
|
| 724 |
+
}
|
| 725 |
+
|
| 726 |
+
const { shape, dtype } = stateInfo;
|
| 727 |
+
const size = shape.reduce((a, b) => a * b, 1);
|
| 728 |
+
|
| 729 |
+
let data;
|
| 730 |
+
if (dtype === 'int64') {
|
| 731 |
+
data = new BigInt64Array(size);
|
| 732 |
+
} else if (dtype === 'bool') {
|
| 733 |
+
data = new Uint8Array(size);
|
| 734 |
+
} else {
|
| 735 |
+
data = new Float32Array(size);
|
| 736 |
+
}
|
| 737 |
+
|
| 738 |
+
state[inputName] = new ort.Tensor(dtype, data, shape);
|
| 739 |
+
console.log(`Init state ${inputName}: shape=${JSON.stringify(shape)}, dtype=${dtype}`);
|
| 740 |
+
}
|
| 741 |
+
}
|
| 742 |
+
|
| 743 |
+
return state;
|
| 744 |
+
}
|
| 745 |
+
|
| 746 |
+
async function startGeneration(text, voiceName) {
|
| 747 |
+
isGenerating = true;
|
| 748 |
+
currentLSD = MAX_LSD; // Reset to max quality for each new generation
|
| 749 |
+
postMessage({ type: 'status', status: 'Generating...', state: 'running' });
|
| 750 |
+
postMessage({ type: 'generation_started', data: { time: performance.now() } });
|
| 751 |
+
|
| 752 |
+
try {
|
| 753 |
+
// Split text into sentence chunks (max 50 tokens each)
|
| 754 |
+
const chunks = splitIntoBestSentences(text);
|
| 755 |
+
console.log(`Split into ${chunks.length} chunks:`, chunks);
|
| 756 |
+
|
| 757 |
+
if (chunks.length === 0) {
|
| 758 |
+
throw new Error('No text to generate');
|
| 759 |
+
}
|
| 760 |
+
|
| 761 |
+
// Get voice embedding
|
| 762 |
+
let voiceEmb = currentVoiceEmbedding;
|
| 763 |
+
if (voiceName && voiceName !== currentVoiceName) {
|
| 764 |
+
if (predefinedVoices[voiceName]) {
|
| 765 |
+
voiceEmb = predefinedVoices[voiceName];
|
| 766 |
+
currentVoiceEmbedding = voiceEmb;
|
| 767 |
+
currentVoiceName = voiceName;
|
| 768 |
+
}
|
| 769 |
+
}
|
| 770 |
+
|
| 771 |
+
if (!voiceEmb) {
|
| 772 |
+
throw new Error('No voice embedding available. Please select a voice or upload custom audio.');
|
| 773 |
+
}
|
| 774 |
+
|
| 775 |
+
// Run generation pipeline with chunks
|
| 776 |
+
await runGenerationPipeline(voiceEmb, chunks);
|
| 777 |
+
|
| 778 |
+
} catch (err) {
|
| 779 |
+
console.error('Generation error:', err);
|
| 780 |
+
postMessage({ type: 'error', error: err.toString() });
|
| 781 |
+
} finally {
|
| 782 |
+
if (isGenerating) {
|
| 783 |
+
postMessage({ type: 'stream_ended' });
|
| 784 |
+
postMessage({ type: 'status', status: 'Finished', state: 'idle' });
|
| 785 |
+
}
|
| 786 |
+
isGenerating = false;
|
| 787 |
+
}
|
| 788 |
+
}
|
| 789 |
+
|
| 790 |
+
async function runGenerationPipeline(voiceEmb, chunks) {
|
| 791 |
+
// Initialize state - persists across all chunks
|
| 792 |
+
let flowLmState = initState(flowLmMainSession, FLOW_LM_STATE_SHAPES);
|
| 793 |
+
let mimiState = initState(mimiDecoderSession, MIMI_DECODER_STATE_SHAPES);
|
| 794 |
+
const emptySeq = new ort.Tensor('float32', new Float32Array(0), [1, 0, 32]);
|
| 795 |
+
const emptyTextEmb = new ort.Tensor('float32', new Float32Array(0), [1, 0, 1024]);
|
| 796 |
+
|
| 797 |
+
// Voice embedding tensor
|
| 798 |
+
const voiceTensor = new ort.Tensor('float32', voiceEmb.data, voiceEmb.shape);
|
| 799 |
+
console.log('Voice embeddings shape:', voiceEmb.shape);
|
| 800 |
+
|
| 801 |
+
// Voice conditioning (once for all chunks)
|
| 802 |
+
console.log('Running voice conditioning...');
|
| 803 |
+
const voiceCondInputs = {
|
| 804 |
+
sequence: emptySeq,
|
| 805 |
+
text_embeddings: voiceTensor,
|
| 806 |
+
...flowLmState
|
| 807 |
+
};
|
| 808 |
+
|
| 809 |
+
let condResult = await flowLmMainSession.run(voiceCondInputs);
|
| 810 |
+
|
| 811 |
+
// Update state from voice conditioning
|
| 812 |
+
for (let i = 2; i < flowLmMainSession.outputNames.length; i++) {
|
| 813 |
+
const outputName = flowLmMainSession.outputNames[i];
|
| 814 |
+
if (outputName.startsWith('out_state_')) {
|
| 815 |
+
const stateIdx = parseInt(outputName.replace('out_state_', ''));
|
| 816 |
+
flowLmState[`state_${stateIdx}`] = condResult[outputName];
|
| 817 |
+
}
|
| 818 |
+
}
|
| 819 |
+
|
| 820 |
+
// Streaming parameters
|
| 821 |
+
const FIRST_CHUNK_FRAMES = 3;
|
| 822 |
+
const NORMAL_CHUNK_FRAMES = 12;
|
| 823 |
+
|
| 824 |
+
// Tracking across all chunks
|
| 825 |
+
const allGeneratedLatents = [];
|
| 826 |
+
let isFirstAudioChunk = true;
|
| 827 |
+
let totalDecodedFrames = 0;
|
| 828 |
+
let totalFlowLmTime = 0;
|
| 829 |
+
let totalDecodeTime = 0;
|
| 830 |
+
const arStartTime = performance.now();
|
| 831 |
+
|
| 832 |
+
// Process each text chunk
|
| 833 |
+
for (let chunkIdx = 0; chunkIdx < chunks.length; chunkIdx++) {
|
| 834 |
+
if (!isGenerating) break;
|
| 835 |
+
|
| 836 |
+
const chunkText = chunks[chunkIdx];
|
| 837 |
+
console.log(`Processing chunk ${chunkIdx + 1}/${chunks.length}: "${chunkText}"`);
|
| 838 |
+
|
| 839 |
+
// Tokenize this chunk
|
| 840 |
+
const tokenIds = tokenizerProcessor.encodeIds(chunkText);
|
| 841 |
+
console.log(`Chunk ${chunkIdx + 1} tokens:`, tokenIds.length);
|
| 842 |
+
|
| 843 |
+
// Text conditioning for this chunk
|
| 844 |
+
const textInput = new ort.Tensor('int64', BigInt64Array.from(tokenIds.map(x => BigInt(x))), [1, tokenIds.length]);
|
| 845 |
+
const textCondResult = await textConditionerSession.run({ token_ids: textInput });
|
| 846 |
+
let textEmb = textCondResult[textConditionerSession.outputNames[0]];
|
| 847 |
+
|
| 848 |
+
if (textEmb.dims.length === 2) {
|
| 849 |
+
textEmb = new ort.Tensor('float32', textEmb.data, [1, textEmb.dims[0], textEmb.dims[1]]);
|
| 850 |
+
}
|
| 851 |
+
|
| 852 |
+
const textCondInputs = {
|
| 853 |
+
sequence: emptySeq,
|
| 854 |
+
text_embeddings: textEmb,
|
| 855 |
+
...flowLmState
|
| 856 |
+
};
|
| 857 |
+
|
| 858 |
+
condResult = await flowLmMainSession.run(textCondInputs);
|
| 859 |
+
|
| 860 |
+
// Update state from text conditioning
|
| 861 |
+
for (let i = 2; i < flowLmMainSession.outputNames.length; i++) {
|
| 862 |
+
const outputName = flowLmMainSession.outputNames[i];
|
| 863 |
+
if (outputName.startsWith('out_state_')) {
|
| 864 |
+
const stateIdx = parseInt(outputName.replace('out_state_', ''));
|
| 865 |
+
flowLmState[`state_${stateIdx}`] = condResult[outputName];
|
| 866 |
+
}
|
| 867 |
+
}
|
| 868 |
+
|
| 869 |
+
// AR generation for this chunk
|
| 870 |
+
const chunkLatents = [];
|
| 871 |
+
let currentLatent = new ort.Tensor('float32', new Float32Array(32).fill(NaN), [1, 1, 32]);
|
| 872 |
+
let chunkDecodedFrames = 0;
|
| 873 |
+
|
| 874 |
+
for (let step = 0; step < MAX_FRAMES; step++) {
|
| 875 |
+
if (!isGenerating) break;
|
| 876 |
+
|
| 877 |
+
// Yield every 4 steps to allow message processing (e.g., set_lsd)
|
| 878 |
+
if (step > 0 && step % 4 === 0) {
|
| 879 |
+
await new Promise(r => setTimeout(r, 0));
|
| 880 |
+
}
|
| 881 |
+
|
| 882 |
+
const arInputs = {
|
| 883 |
+
sequence: currentLatent,
|
| 884 |
+
text_embeddings: emptyTextEmb,
|
| 885 |
+
...flowLmState
|
| 886 |
+
};
|
| 887 |
+
|
| 888 |
+
const stepStart = performance.now();
|
| 889 |
+
const arResult = await flowLmMainSession.run(arInputs);
|
| 890 |
+
|
| 891 |
+
const conditioning = arResult['conditioning'];
|
| 892 |
+
const eosLogit = arResult['eos_logit'].data[0];
|
| 893 |
+
const isEos = eosLogit > -4.0;
|
| 894 |
+
|
| 895 |
+
// Flow matching (LSD loop) - uses currentLSD which can be adjusted dynamically
|
| 896 |
+
const TEMP = 0.7;
|
| 897 |
+
const STD = Math.sqrt(TEMP);
|
| 898 |
+
let xData = new Float32Array(32);
|
| 899 |
+
for (let i = 0; i < 32; i++) {
|
| 900 |
+
let u = 0, v = 0;
|
| 901 |
+
while (u === 0) u = Math.random();
|
| 902 |
+
while (v === 0) v = Math.random();
|
| 903 |
+
xData[i] = Math.sqrt(-2.0 * Math.log(u)) * Math.cos(2.0 * Math.PI * v) * STD;
|
| 904 |
+
}
|
| 905 |
+
|
| 906 |
+
const lsdSteps = currentLSD;
|
| 907 |
+
const dt = 1.0 / lsdSteps;
|
| 908 |
+
|
| 909 |
+
for (let j = 0; j < lsdSteps; j++) {
|
| 910 |
+
const flowInputs = {
|
| 911 |
+
c: conditioning,
|
| 912 |
+
s: stTensors[lsdSteps][j].s,
|
| 913 |
+
t: stTensors[lsdSteps][j].t,
|
| 914 |
+
x: new ort.Tensor('float32', xData, [1, 32])
|
| 915 |
+
};
|
| 916 |
+
|
| 917 |
+
const flowResult = await flowLmFlowSession.run(flowInputs);
|
| 918 |
+
const v = flowResult['flow_dir'].data;
|
| 919 |
+
|
| 920 |
+
for (let k = 0; k < 32; k++) {
|
| 921 |
+
xData[k] += v[k] * dt;
|
| 922 |
+
}
|
| 923 |
+
}
|
| 924 |
+
|
| 925 |
+
totalFlowLmTime += performance.now() - stepStart;
|
| 926 |
+
|
| 927 |
+
const latentData = xData;
|
| 928 |
+
chunkLatents.push(new Float32Array(latentData));
|
| 929 |
+
allGeneratedLatents.push(new Float32Array(latentData));
|
| 930 |
+
|
| 931 |
+
// Update state
|
| 932 |
+
currentLatent = new ort.Tensor('float32', latentData, [1, 1, 32]);
|
| 933 |
+
for (let i = 2; i < flowLmMainSession.outputNames.length; i++) {
|
| 934 |
+
const outputName = flowLmMainSession.outputNames[i];
|
| 935 |
+
if (outputName.startsWith('out_state_')) {
|
| 936 |
+
const stateIdx = parseInt(outputName.replace('out_state_', ''));
|
| 937 |
+
flowLmState[`state_${stateIdx}`] = arResult[outputName];
|
| 938 |
+
}
|
| 939 |
+
}
|
| 940 |
+
|
| 941 |
+
// Decode audio chunks
|
| 942 |
+
const pending = chunkLatents.length - chunkDecodedFrames;
|
| 943 |
+
let decodeSize = 0;
|
| 944 |
+
|
| 945 |
+
if (isEos) {
|
| 946 |
+
decodeSize = pending;
|
| 947 |
+
} else if (isFirstAudioChunk && pending >= FIRST_CHUNK_FRAMES) {
|
| 948 |
+
decodeSize = FIRST_CHUNK_FRAMES;
|
| 949 |
+
} else if (pending >= NORMAL_CHUNK_FRAMES) {
|
| 950 |
+
decodeSize = NORMAL_CHUNK_FRAMES;
|
| 951 |
+
}
|
| 952 |
+
|
| 953 |
+
if (decodeSize > 0) {
|
| 954 |
+
const decodeLatents = new Float32Array(decodeSize * 32);
|
| 955 |
+
for (let i = 0; i < decodeSize; i++) {
|
| 956 |
+
decodeLatents.set(chunkLatents[chunkDecodedFrames + i], i * 32);
|
| 957 |
+
}
|
| 958 |
+
|
| 959 |
+
const latentTensor = new ort.Tensor('float32', decodeLatents, [1, decodeSize, 32]);
|
| 960 |
+
const decodeInputs = { latent: latentTensor, ...mimiState };
|
| 961 |
+
|
| 962 |
+
const decStart = performance.now();
|
| 963 |
+
const decodeResult = await mimiDecoderSession.run(decodeInputs);
|
| 964 |
+
totalDecodeTime += performance.now() - decStart;
|
| 965 |
+
const audioChunk = decodeResult[mimiDecoderSession.outputNames[0]].data;
|
| 966 |
+
|
| 967 |
+
// Update MIMI state
|
| 968 |
+
for (let i = 1; i < mimiDecoderSession.outputNames.length; i++) {
|
| 969 |
+
const outputName = mimiDecoderSession.outputNames[i];
|
| 970 |
+
const stateIdx = i - 1;
|
| 971 |
+
mimiState[`state_${stateIdx}`] = decodeResult[outputName];
|
| 972 |
+
}
|
| 973 |
+
|
| 974 |
+
chunkDecodedFrames += decodeSize;
|
| 975 |
+
totalDecodedFrames += decodeSize;
|
| 976 |
+
|
| 977 |
+
const audioFloat32 = new Float32Array(audioChunk);
|
| 978 |
+
const isLastChunk = isEos && chunkIdx === chunks.length - 1;
|
| 979 |
+
postMessage({
|
| 980 |
+
type: 'audio_chunk',
|
| 981 |
+
data: audioFloat32,
|
| 982 |
+
metrics: {
|
| 983 |
+
bbTime: 0,
|
| 984 |
+
decTime: 0,
|
| 985 |
+
chunkDuration: audioFloat32.length / SAMPLE_RATE,
|
| 986 |
+
isFirst: isFirstAudioChunk,
|
| 987 |
+
isLast: isLastChunk
|
| 988 |
+
}
|
| 989 |
+
}, [audioFloat32.buffer]);
|
| 990 |
+
|
| 991 |
+
isFirstAudioChunk = false;
|
| 992 |
+
}
|
| 993 |
+
|
| 994 |
+
if (isEos) {
|
| 995 |
+
console.log(`Chunk ${chunkIdx + 1} EOS at step ${step}, ${chunkLatents.length} frames`);
|
| 996 |
+
break;
|
| 997 |
+
}
|
| 998 |
+
}
|
| 999 |
+
}
|
| 1000 |
+
|
| 1001 |
+
const totalTime = (performance.now() - arStartTime) / 1000;
|
| 1002 |
+
const audioSeconds = allGeneratedLatents.length * SAMPLES_PER_FRAME / SAMPLE_RATE;
|
| 1003 |
+
|
| 1004 |
+
// RTFx based on actual generation time (flow LM + decoder), not including conditioning
|
| 1005 |
+
const genTime = (totalFlowLmTime + totalDecodeTime) / 1000;
|
| 1006 |
+
const rtfx = audioSeconds / genTime;
|
| 1007 |
+
|
| 1008 |
+
console.log(`Generation complete: ${allGeneratedLatents.length} frames (${audioSeconds.toFixed(2)}s audio)`);
|
| 1009 |
+
console.log(` Total time: ${totalTime.toFixed(2)}s`);
|
| 1010 |
+
console.log(` Gen time: ${genTime.toFixed(2)}s, RTFx: ${rtfx.toFixed(2)}x`);
|
| 1011 |
+
console.log(` Flow LM: ${(totalFlowLmTime/1000).toFixed(2)}s (${(totalFlowLmTime/allGeneratedLatents.length).toFixed(1)}ms/step)`);
|
| 1012 |
+
console.log(` Decoder: ${(totalDecodeTime/1000).toFixed(2)}s`);
|
| 1013 |
+
|
| 1014 |
+
postMessage({
|
| 1015 |
+
type: 'status',
|
| 1016 |
+
status: `Finished (RTFx: ${rtfx.toFixed(2)}x)`,
|
| 1017 |
+
state: 'idle',
|
| 1018 |
+
metrics: { rtfx, genTime, totalTime, audioDuration: audioSeconds }
|
| 1019 |
+
});
|
| 1020 |
+
}
|
| 1021 |
+
|
| 1022 |
+
// Pre-allocated buffers for step counter updates (avoid GC pressure in hot loop)
|
| 1023 |
+
const stepBuffers = {};
|
| 1024 |
+
|
| 1025 |
+
function updateStateSteps(state, increment) {
|
| 1026 |
+
// Update step counters in state dict - reuse buffers to avoid allocation
|
| 1027 |
+
const incBigInt = BigInt(increment);
|
| 1028 |
+
for (const key in state) {
|
| 1029 |
+
if (key.includes('step') && state[key]) {
|
| 1030 |
+
const tensor = state[key];
|
| 1031 |
+
if (tensor.data instanceof BigInt64Array) {
|
| 1032 |
+
// Reuse buffer if same size, otherwise create new one
|
| 1033 |
+
if (!stepBuffers[key] || stepBuffers[key].length !== tensor.data.length) {
|
| 1034 |
+
stepBuffers[key] = new BigInt64Array(tensor.data.length);
|
| 1035 |
+
}
|
| 1036 |
+
const buf = stepBuffers[key];
|
| 1037 |
+
for (let i = 0; i < tensor.data.length; i++) {
|
| 1038 |
+
buf[i] = tensor.data[i] + incBigInt;
|
| 1039 |
+
}
|
| 1040 |
+
state[key] = new ort.Tensor('int64', buf, tensor.dims);
|
| 1041 |
+
}
|
| 1042 |
+
}
|
| 1043 |
+
}
|
| 1044 |
+
}
|
onnx-streaming.js
ADDED
|
@@ -0,0 +1,638 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import { PCMPlayerWorklet as PCMPlayer } from './PCMPlayerWorklet.js';
|
| 3 |
+
|
| 4 |
+
// Configuration
|
| 5 |
+
const SAMPLE_RATE = 24000;
|
| 6 |
+
const FADE_SAMPLES = 480; // 20ms fade at 24kHz
|
| 7 |
+
|
| 8 |
+
export class PocketTTSStreaming {
|
| 9 |
+
constructor() {
|
| 10 |
+
this.worker = null;
|
| 11 |
+
this.player = null;
|
| 12 |
+
this.audioContext = null;
|
| 13 |
+
this.isGenerating = false;
|
| 14 |
+
this.isWorkerReady = false;
|
| 15 |
+
this.pendingGeneration = false;
|
| 16 |
+
|
| 17 |
+
// Voice state
|
| 18 |
+
this.availableVoices = [];
|
| 19 |
+
this.currentVoice = null;
|
| 20 |
+
this.customVoiceAudio = null;
|
| 21 |
+
|
| 22 |
+
// Metrics State
|
| 23 |
+
this.generationStartTime = 0;
|
| 24 |
+
this.lastChunkFinishTime = 0;
|
| 25 |
+
this.rtfMovingAverage = 0;
|
| 26 |
+
|
| 27 |
+
// Edge optimization state (dynamic LSD)
|
| 28 |
+
this.edgeOptimizationApplied = false;
|
| 29 |
+
|
| 30 |
+
this.elements = {
|
| 31 |
+
textInput: document.getElementById('text-input'),
|
| 32 |
+
generateBtn: document.getElementById('generate-btn'),
|
| 33 |
+
stopBtn: document.getElementById('stop-btn'),
|
| 34 |
+
statusText: document.getElementById('stat-status'),
|
| 35 |
+
statusIndicator: document.getElementById('status-indicator'),
|
| 36 |
+
modelStatusIcon: document.querySelector('#model-status .model-status__dot'),
|
| 37 |
+
modelStatusText: document.querySelector('#model-status .model-status__text'),
|
| 38 |
+
btnLoader: document.getElementById('btn-loader'),
|
| 39 |
+
statTTFB: document.getElementById('stat-ttfb'),
|
| 40 |
+
statRTFx: document.getElementById('stat-rtfx'),
|
| 41 |
+
ttfbBar: document.getElementById('ttfb-bar'),
|
| 42 |
+
rtfxContext: document.getElementById('rtfx-context'),
|
| 43 |
+
edgeOptNote: document.getElementById('edge-opt-note'),
|
| 44 |
+
voiceSelect: document.getElementById('voice-select'),
|
| 45 |
+
voiceUpload: document.getElementById('voice-upload'),
|
| 46 |
+
voiceUploadBtn: document.getElementById('voice-upload-btn'),
|
| 47 |
+
voiceUploadStatus: document.getElementById('voice-upload-status')
|
| 48 |
+
};
|
| 49 |
+
|
| 50 |
+
this.attachEventListeners();
|
| 51 |
+
this.init();
|
| 52 |
+
this.setupVisualization();
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
async init() {
|
| 56 |
+
console.log('Pocket TTS v1.0 - Web Demo');
|
| 57 |
+
console.log('Secure context:', window.isSecureContext);
|
| 58 |
+
console.log('Location:', window.location.href);
|
| 59 |
+
this.updateStatus('Initializing...', 'running');
|
| 60 |
+
|
| 61 |
+
// Initial button state
|
| 62 |
+
this.elements.generateBtn.disabled = true;
|
| 63 |
+
if (this.elements.voiceUploadBtn) this.elements.voiceUploadBtn.disabled = true;
|
| 64 |
+
const btnText = this.elements.generateBtn.querySelector('.btn__text');
|
| 65 |
+
if (btnText) btnText.textContent = 'Loading Models...';
|
| 66 |
+
this.elements.btnLoader.style.display = 'block';
|
| 67 |
+
|
| 68 |
+
// Check secure context
|
| 69 |
+
if (!window.isSecureContext) {
|
| 70 |
+
const msg = 'AudioWorklet requires HTTPS or localhost. Current: ' + window.location.hostname;
|
| 71 |
+
console.error(msg);
|
| 72 |
+
this.updateStatus(msg, 'error');
|
| 73 |
+
this.elements.btnLoader.style.display = 'none';
|
| 74 |
+
if (btnText) btnText.textContent = 'Secure Context Required';
|
| 75 |
+
return;
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
try {
|
| 79 |
+
// Initialize Audio Context and Player
|
| 80 |
+
this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
|
| 81 |
+
sampleRate: SAMPLE_RATE,
|
| 82 |
+
latencyHint: 'interactive'
|
| 83 |
+
});
|
| 84 |
+
|
| 85 |
+
// Check if AudioWorklet is supported
|
| 86 |
+
if (!this.audioContext.audioWorklet) {
|
| 87 |
+
throw new Error('AudioWorklet not supported in this browser.');
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
await this.audioContext.audioWorklet.addModule('PCMPlayerWorklet.js');
|
| 91 |
+
this.player = new PCMPlayer(this.audioContext);
|
| 92 |
+
} catch (err) {
|
| 93 |
+
console.error('Audio initialization failed:', err);
|
| 94 |
+
this.updateStatus('Audio init failed: ' + err.message, 'error');
|
| 95 |
+
this.elements.btnLoader.style.display = 'none';
|
| 96 |
+
if (btnText) btnText.textContent = 'Audio Error';
|
| 97 |
+
return;
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
// Initialize Worker (as ES module)
|
| 101 |
+
console.log('Spawning Inference Worker...');
|
| 102 |
+
this.worker = new Worker('./inference-worker.js?v=9', { type: 'module' });
|
| 103 |
+
|
| 104 |
+
this.worker.onmessage = (e) => {
|
| 105 |
+
const { type, data, error, status, state, metrics, text, voices, defaultVoice, voiceName } = e.data;
|
| 106 |
+
|
| 107 |
+
switch (type) {
|
| 108 |
+
case 'status':
|
| 109 |
+
this.updateStatus(status, state);
|
| 110 |
+
break;
|
| 111 |
+
case 'model_status':
|
| 112 |
+
this.updateModelStatus(status, text);
|
| 113 |
+
break;
|
| 114 |
+
case 'voices_loaded':
|
| 115 |
+
this.handleVoicesLoaded(voices, defaultVoice);
|
| 116 |
+
break;
|
| 117 |
+
case 'voice_encoded':
|
| 118 |
+
this.handleVoiceEncoded(voiceName);
|
| 119 |
+
break;
|
| 120 |
+
case 'voice_set':
|
| 121 |
+
this.currentVoice = voiceName;
|
| 122 |
+
break;
|
| 123 |
+
case 'loaded':
|
| 124 |
+
console.log('Worker confirmed models loaded.');
|
| 125 |
+
this.isWorkerReady = true;
|
| 126 |
+
this.elements.generateBtn.disabled = false;
|
| 127 |
+
if (this.elements.voiceUploadBtn) this.elements.voiceUploadBtn.disabled = false;
|
| 128 |
+
this.elements.btnLoader.style.display = 'none';
|
| 129 |
+
const loadedBtnText = this.elements.generateBtn.querySelector('.btn__text');
|
| 130 |
+
if (loadedBtnText) loadedBtnText.textContent = 'Generate Audio';
|
| 131 |
+
|
| 132 |
+
if (this.pendingGeneration) {
|
| 133 |
+
this.pendingGeneration = false;
|
| 134 |
+
this.startGeneration();
|
| 135 |
+
}
|
| 136 |
+
break;
|
| 137 |
+
case 'generation_started':
|
| 138 |
+
// The main thread already sets this in startGeneration for better precision
|
| 139 |
+
break;
|
| 140 |
+
case 'audio_chunk':
|
| 141 |
+
this.handleAudioChunk(data, metrics);
|
| 142 |
+
break;
|
| 143 |
+
case 'stream_ended':
|
| 144 |
+
this.handleStreamEnd();
|
| 145 |
+
break;
|
| 146 |
+
case 'error':
|
| 147 |
+
console.error('Worker Error:', error);
|
| 148 |
+
this.updateStatus(`Error: ${error}`, 'error');
|
| 149 |
+
this.resetUI();
|
| 150 |
+
break;
|
| 151 |
+
}
|
| 152 |
+
};
|
| 153 |
+
|
| 154 |
+
// Trigger Model Load in Worker
|
| 155 |
+
this.worker.postMessage({ type: 'load' });
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
handleVoicesLoaded(voices, defaultVoice) {
|
| 159 |
+
this.availableVoices = voices;
|
| 160 |
+
this.currentVoice = defaultVoice;
|
| 161 |
+
|
| 162 |
+
// Populate voice selector
|
| 163 |
+
if (this.elements.voiceSelect) {
|
| 164 |
+
this.elements.voiceSelect.innerHTML = '';
|
| 165 |
+
|
| 166 |
+
// Add predefined voices
|
| 167 |
+
for (const voice of voices) {
|
| 168 |
+
const option = document.createElement('option');
|
| 169 |
+
option.value = voice;
|
| 170 |
+
option.textContent = voice.charAt(0).toUpperCase() + voice.slice(1);
|
| 171 |
+
if (voice === defaultVoice) {
|
| 172 |
+
option.selected = true;
|
| 173 |
+
}
|
| 174 |
+
this.elements.voiceSelect.appendChild(option);
|
| 175 |
+
}
|
| 176 |
+
|
| 177 |
+
// Add custom voice option
|
| 178 |
+
const customOption = document.createElement('option');
|
| 179 |
+
customOption.value = 'custom';
|
| 180 |
+
customOption.textContent = 'Custom (Upload)';
|
| 181 |
+
this.elements.voiceSelect.appendChild(customOption);
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
console.log('Available voices:', voices, 'Default:', defaultVoice);
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
handleVoiceEncoded(voiceName) {
|
| 188 |
+
this.currentVoice = voiceName;
|
| 189 |
+
if (this.elements.voiceUploadStatus) {
|
| 190 |
+
this.elements.voiceUploadStatus.textContent = 'Voice encoded successfully!';
|
| 191 |
+
this.elements.voiceUploadStatus.className = 'voice-upload-status success';
|
| 192 |
+
}
|
| 193 |
+
// Set the select to custom
|
| 194 |
+
if (this.elements.voiceSelect) {
|
| 195 |
+
this.elements.voiceSelect.value = 'custom';
|
| 196 |
+
}
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
async handleVoiceUpload(file) {
|
| 200 |
+
if (!file) return;
|
| 201 |
+
|
| 202 |
+
if (this.elements.voiceUploadStatus) {
|
| 203 |
+
this.elements.voiceUploadStatus.textContent = 'Processing audio...';
|
| 204 |
+
this.elements.voiceUploadStatus.className = 'voice-upload-status';
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
try {
|
| 208 |
+
// Decode audio file
|
| 209 |
+
const arrayBuffer = await file.arrayBuffer();
|
| 210 |
+
const audioBuffer = await this.audioContext.decodeAudioData(arrayBuffer);
|
| 211 |
+
|
| 212 |
+
// Resample to 24kHz if needed
|
| 213 |
+
let audioData;
|
| 214 |
+
if (audioBuffer.sampleRate !== SAMPLE_RATE) {
|
| 215 |
+
audioData = this.resampleAudio(audioBuffer, SAMPLE_RATE);
|
| 216 |
+
} else {
|
| 217 |
+
audioData = audioBuffer.getChannelData(0);
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
+
// Convert to mono if stereo
|
| 221 |
+
if (audioBuffer.numberOfChannels > 1 && audioBuffer.sampleRate === SAMPLE_RATE) {
|
| 222 |
+
const left = audioBuffer.getChannelData(0);
|
| 223 |
+
const right = audioBuffer.getChannelData(1);
|
| 224 |
+
audioData = new Float32Array(left.length);
|
| 225 |
+
for (let i = 0; i < left.length; i++) {
|
| 226 |
+
audioData[i] = (left[i] + right[i]) / 2;
|
| 227 |
+
}
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
// Limit to 10 seconds max
|
| 231 |
+
const maxSamples = SAMPLE_RATE * 10;
|
| 232 |
+
if (audioData.length > maxSamples) {
|
| 233 |
+
audioData = audioData.slice(0, maxSamples);
|
| 234 |
+
}
|
| 235 |
+
|
| 236 |
+
// Send to worker for encoding
|
| 237 |
+
this.worker.postMessage({
|
| 238 |
+
type: 'encode_voice',
|
| 239 |
+
data: { audio: audioData }
|
| 240 |
+
});
|
| 241 |
+
|
| 242 |
+
} catch (err) {
|
| 243 |
+
console.error('Voice upload error:', err);
|
| 244 |
+
if (this.elements.voiceUploadStatus) {
|
| 245 |
+
this.elements.voiceUploadStatus.textContent = `Error: ${err.message}`;
|
| 246 |
+
this.elements.voiceUploadStatus.className = 'voice-upload-status error';
|
| 247 |
+
}
|
| 248 |
+
}
|
| 249 |
+
}
|
| 250 |
+
|
| 251 |
+
resampleAudio(audioBuffer, targetRate) {
|
| 252 |
+
const sourceRate = audioBuffer.sampleRate;
|
| 253 |
+
const sourceData = audioBuffer.getChannelData(0);
|
| 254 |
+
|
| 255 |
+
// If stereo, mix to mono
|
| 256 |
+
let monoData = sourceData;
|
| 257 |
+
if (audioBuffer.numberOfChannels > 1) {
|
| 258 |
+
const right = audioBuffer.getChannelData(1);
|
| 259 |
+
monoData = new Float32Array(sourceData.length);
|
| 260 |
+
for (let i = 0; i < sourceData.length; i++) {
|
| 261 |
+
monoData[i] = (sourceData[i] + right[i]) / 2;
|
| 262 |
+
}
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
// Linear interpolation resampling
|
| 266 |
+
const ratio = sourceRate / targetRate;
|
| 267 |
+
const outputLength = Math.floor(monoData.length / ratio);
|
| 268 |
+
const output = new Float32Array(outputLength);
|
| 269 |
+
|
| 270 |
+
for (let i = 0; i < outputLength; i++) {
|
| 271 |
+
const srcIndex = i * ratio;
|
| 272 |
+
const srcIndexFloor = Math.floor(srcIndex);
|
| 273 |
+
const srcIndexCeil = Math.min(srcIndexFloor + 1, monoData.length - 1);
|
| 274 |
+
const t = srcIndex - srcIndexFloor;
|
| 275 |
+
output[i] = monoData[srcIndexFloor] * (1 - t) + monoData[srcIndexCeil] * t;
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
return output;
|
| 279 |
+
}
|
| 280 |
+
|
| 281 |
+
attachEventListeners() {
|
| 282 |
+
this.elements.generateBtn.addEventListener('click', () => this.startGeneration());
|
| 283 |
+
this.elements.stopBtn.addEventListener('click', () => this.stopGeneration());
|
| 284 |
+
|
| 285 |
+
// Voice selector
|
| 286 |
+
if (this.elements.voiceSelect) {
|
| 287 |
+
this.elements.voiceSelect.addEventListener('change', (e) => {
|
| 288 |
+
const voice = e.target.value;
|
| 289 |
+
if (voice === 'custom') {
|
| 290 |
+
// Trigger file upload
|
| 291 |
+
if (this.elements.voiceUpload) {
|
| 292 |
+
this.elements.voiceUpload.click();
|
| 293 |
+
}
|
| 294 |
+
} else {
|
| 295 |
+
this.worker.postMessage({
|
| 296 |
+
type: 'set_voice',
|
| 297 |
+
data: { voiceName: voice }
|
| 298 |
+
});
|
| 299 |
+
}
|
| 300 |
+
});
|
| 301 |
+
}
|
| 302 |
+
|
| 303 |
+
// Voice file upload
|
| 304 |
+
if (this.elements.voiceUpload) {
|
| 305 |
+
this.elements.voiceUpload.addEventListener('change', (e) => {
|
| 306 |
+
const file = e.target.files[0];
|
| 307 |
+
if (file) {
|
| 308 |
+
this.handleVoiceUpload(file);
|
| 309 |
+
}
|
| 310 |
+
});
|
| 311 |
+
}
|
| 312 |
+
|
| 313 |
+
// Voice upload button
|
| 314 |
+
if (this.elements.voiceUploadBtn) {
|
| 315 |
+
this.elements.voiceUploadBtn.addEventListener('click', () => {
|
| 316 |
+
if (this.elements.voiceUpload) {
|
| 317 |
+
this.elements.voiceUpload.click();
|
| 318 |
+
}
|
| 319 |
+
});
|
| 320 |
+
}
|
| 321 |
+
|
| 322 |
+
// Sample buttons
|
| 323 |
+
document.querySelectorAll('.sample-btn').forEach(btn => {
|
| 324 |
+
btn.addEventListener('click', () => {
|
| 325 |
+
this.elements.textInput.value = btn.getAttribute('data-text');
|
| 326 |
+
// Trigger input event to update character count
|
| 327 |
+
this.elements.textInput.dispatchEvent(new Event('input'));
|
| 328 |
+
});
|
| 329 |
+
});
|
| 330 |
+
|
| 331 |
+
// Character count
|
| 332 |
+
this.elements.textInput.addEventListener('input', () => {
|
| 333 |
+
const count = this.elements.textInput.value.length;
|
| 334 |
+
const countEl = document.getElementById('char-count');
|
| 335 |
+
if (countEl) countEl.textContent = count;
|
| 336 |
+
});
|
| 337 |
+
|
| 338 |
+
this.elements.textInput.addEventListener('keydown', (e) => {
|
| 339 |
+
if ((e.ctrlKey || e.metaKey) && e.key === 'Enter') {
|
| 340 |
+
this.startGeneration();
|
| 341 |
+
}
|
| 342 |
+
});
|
| 343 |
+
}
|
| 344 |
+
|
| 345 |
+
async startGeneration() {
|
| 346 |
+
this.generationStartTime = performance.now();
|
| 347 |
+
try {
|
| 348 |
+
if (!this.isWorkerReady) {
|
| 349 |
+
this.pendingGeneration = true;
|
| 350 |
+
const btnText = this.elements.generateBtn.querySelector('.btn__text');
|
| 351 |
+
if (btnText) btnText.textContent = 'Starting soon...';
|
| 352 |
+
return;
|
| 353 |
+
}
|
| 354 |
+
|
| 355 |
+
if (this.isGenerating) return;
|
| 356 |
+
|
| 357 |
+
if (this.audioContext && this.audioContext.state === 'suspended') {
|
| 358 |
+
await this.audioContext.resume();
|
| 359 |
+
}
|
| 360 |
+
|
| 361 |
+
const text = this.elements.textInput.value.trim();
|
| 362 |
+
if (!text) return;
|
| 363 |
+
|
| 364 |
+
this.isGenerating = true;
|
| 365 |
+
this.elements.generateBtn.disabled = true;
|
| 366 |
+
this.elements.generateBtn.classList.add('btn--generating');
|
| 367 |
+
this.elements.stopBtn.disabled = false;
|
| 368 |
+
|
| 369 |
+
if (this.player) this.player.reset();
|
| 370 |
+
|
| 371 |
+
// Reset metrics
|
| 372 |
+
this.elements.statTTFB.textContent = '--';
|
| 373 |
+
this.elements.statRTFx.textContent = '--';
|
| 374 |
+
if (this.elements.ttfbBar) this.elements.ttfbBar.style.width = '0%';
|
| 375 |
+
if (this.elements.edgeOptNote) this.elements.edgeOptNote.style.display = 'none';
|
| 376 |
+
|
| 377 |
+
this.rtfMovingAverage = 0;
|
| 378 |
+
this.edgeOptimizationApplied = false;
|
| 379 |
+
|
| 380 |
+
// Get current voice from selector
|
| 381 |
+
const voice = this.elements.voiceSelect ? this.elements.voiceSelect.value : this.currentVoice;
|
| 382 |
+
|
| 383 |
+
this.worker.postMessage({
|
| 384 |
+
type: 'generate',
|
| 385 |
+
data: { text, voice }
|
| 386 |
+
});
|
| 387 |
+
} catch (err) {
|
| 388 |
+
console.error('Error in startGeneration:', err);
|
| 389 |
+
this.updateStatus(`Error: ${err.message}`, 'error');
|
| 390 |
+
this.isGenerating = false;
|
| 391 |
+
this.resetUI();
|
| 392 |
+
}
|
| 393 |
+
}
|
| 394 |
+
|
| 395 |
+
stopGeneration() {
|
| 396 |
+
if (!this.isGenerating) return;
|
| 397 |
+
this.worker.postMessage({ type: 'stop' });
|
| 398 |
+
// Handle stop immediately in UI
|
| 399 |
+
this.handleStreamEnd();
|
| 400 |
+
}
|
| 401 |
+
|
| 402 |
+
applyFadeIn(audioData) {
|
| 403 |
+
const fadeLen = Math.min(FADE_SAMPLES, audioData.length);
|
| 404 |
+
for (let i = 0; i < fadeLen; i++) {
|
| 405 |
+
audioData[i] *= i / fadeLen;
|
| 406 |
+
}
|
| 407 |
+
}
|
| 408 |
+
|
| 409 |
+
applyFadeOut(audioData) {
|
| 410 |
+
const fadeLen = Math.min(FADE_SAMPLES, audioData.length);
|
| 411 |
+
const startIdx = audioData.length - fadeLen;
|
| 412 |
+
for (let i = 0; i < fadeLen; i++) {
|
| 413 |
+
audioData[startIdx + i] *= 1 - (i / fadeLen);
|
| 414 |
+
}
|
| 415 |
+
}
|
| 416 |
+
|
| 417 |
+
handleAudioChunk(audioData, metrics) {
|
| 418 |
+
if (!this.isGenerating) return;
|
| 419 |
+
|
| 420 |
+
// Apply fades to prevent pops
|
| 421 |
+
if (metrics.isFirst) {
|
| 422 |
+
this.applyFadeIn(audioData);
|
| 423 |
+
}
|
| 424 |
+
if (metrics.isLast) {
|
| 425 |
+
this.applyFadeOut(audioData);
|
| 426 |
+
}
|
| 427 |
+
|
| 428 |
+
// Play audio
|
| 429 |
+
this.player.playAudio(audioData);
|
| 430 |
+
|
| 431 |
+
// Calculate RTFx immediately (not in RAF) so edge optimization triggers fast
|
| 432 |
+
const now = performance.now();
|
| 433 |
+
let ttfb = 0;
|
| 434 |
+
|
| 435 |
+
if (metrics.isFirst) {
|
| 436 |
+
ttfb = now - this.generationStartTime;
|
| 437 |
+
this.lastChunkFinishTime = now;
|
| 438 |
+
} else if (this.lastChunkFinishTime > 0) {
|
| 439 |
+
const timeSinceLastChunk = (now - this.lastChunkFinishTime) / 1000;
|
| 440 |
+
this.lastChunkFinishTime = now;
|
| 441 |
+
|
| 442 |
+
if (timeSinceLastChunk > 0) {
|
| 443 |
+
const chunkDurationSec = metrics.chunkDuration;
|
| 444 |
+
const instantaneousRTF = chunkDurationSec / timeSinceLastChunk;
|
| 445 |
+
|
| 446 |
+
if (this.rtfMovingAverage === 0) {
|
| 447 |
+
this.rtfMovingAverage = instantaneousRTF;
|
| 448 |
+
} else {
|
| 449 |
+
this.rtfMovingAverage = this.rtfMovingAverage * 0.8 + instantaneousRTF * 0.2;
|
| 450 |
+
}
|
| 451 |
+
|
| 452 |
+
// Edge optimization: if RTFx drops below 1.0, switch to LSD=1 immediately
|
| 453 |
+
if (!this.edgeOptimizationApplied && this.rtfMovingAverage < 1.0) {
|
| 454 |
+
this.edgeOptimizationApplied = true;
|
| 455 |
+
this.worker.postMessage({ type: 'set_lsd', data: { lsd: 1 } });
|
| 456 |
+
console.log('Edge optimization applied: LSD reduced to 1');
|
| 457 |
+
}
|
| 458 |
+
}
|
| 459 |
+
}
|
| 460 |
+
|
| 461 |
+
// Update UI in RAF (non-blocking)
|
| 462 |
+
const rtfxToDisplay = this.rtfMovingAverage;
|
| 463 |
+
const showEdgeOpt = this.edgeOptimizationApplied;
|
| 464 |
+
requestAnimationFrame(() => {
|
| 465 |
+
if (metrics.isFirst) {
|
| 466 |
+
this.updateTTFB(ttfb);
|
| 467 |
+
}
|
| 468 |
+
if (rtfxToDisplay > 0) {
|
| 469 |
+
this.updateRTFx(rtfxToDisplay);
|
| 470 |
+
}
|
| 471 |
+
if (showEdgeOpt && this.elements.edgeOptNote) {
|
| 472 |
+
this.elements.edgeOptNote.style.display = 'block';
|
| 473 |
+
}
|
| 474 |
+
});
|
| 475 |
+
}
|
| 476 |
+
|
| 477 |
+
handleStreamEnd() {
|
| 478 |
+
if (this.player.notifyStreamEnded) this.player.notifyStreamEnded();
|
| 479 |
+
this.resetUI();
|
| 480 |
+
this.isGenerating = false;
|
| 481 |
+
}
|
| 482 |
+
|
| 483 |
+
resetUI() {
|
| 484 |
+
this.elements.generateBtn.disabled = false;
|
| 485 |
+
this.elements.generateBtn.classList.remove('btn--generating');
|
| 486 |
+
const btnText = this.elements.generateBtn.querySelector('.btn__text');
|
| 487 |
+
if (btnText) btnText.textContent = 'Generate Audio';
|
| 488 |
+
this.elements.stopBtn.disabled = true;
|
| 489 |
+
}
|
| 490 |
+
|
| 491 |
+
updateStatus(text, state) {
|
| 492 |
+
this.elements.statusText.textContent = text;
|
| 493 |
+
this.elements.statusIndicator.className = `status-indicator status-${state}`;
|
| 494 |
+
}
|
| 495 |
+
|
| 496 |
+
updateModelStatus(state, text) {
|
| 497 |
+
this.elements.modelStatusText.textContent = text;
|
| 498 |
+
this.elements.modelStatusIcon.className = `status-icon status-${state}`;
|
| 499 |
+
}
|
| 500 |
+
|
| 501 |
+
updateTTFB(ms) {
|
| 502 |
+
this.elements.statTTFB.textContent = Math.round(ms);
|
| 503 |
+
const percentage = Math.min((ms / 2000) * 100, 100);
|
| 504 |
+
this.elements.ttfbBar.style.width = `${percentage}%`;
|
| 505 |
+
this.elements.ttfbBar.style.background = ms < 500 ? '#00d4aa' : ms < 1000 ? '#ffd93d' : '#ff6b6b';
|
| 506 |
+
}
|
| 507 |
+
|
| 508 |
+
updateRTFx(val) {
|
| 509 |
+
this.elements.statRTFx.textContent = `${val.toFixed(2)}x`;
|
| 510 |
+
this.elements.rtfxContext.style.color = val >= 1.0 ? '#00d4aa' : '#ff6b6b';
|
| 511 |
+
}
|
| 512 |
+
|
| 513 |
+
// -------------------------------------------------------------------------
|
| 514 |
+
// Visualization
|
| 515 |
+
// -------------------------------------------------------------------------
|
| 516 |
+
setupVisualization() {
|
| 517 |
+
this.waveformCanvas = document.getElementById('visualizer-waveform');
|
| 518 |
+
this.barsCanvas = document.getElementById('visualizer-bars');
|
| 519 |
+
if (!this.waveformCanvas || !this.barsCanvas) return;
|
| 520 |
+
|
| 521 |
+
this.waveformCtx = this.waveformCanvas.getContext('2d');
|
| 522 |
+
this.barsCtx = this.barsCanvas.getContext('2d');
|
| 523 |
+
|
| 524 |
+
// Initial resize
|
| 525 |
+
this.resizeCanvases();
|
| 526 |
+
window.addEventListener('resize', () => this.resizeCanvases());
|
| 527 |
+
|
| 528 |
+
// Start animation loop
|
| 529 |
+
requestAnimationFrame(() => this.draw());
|
| 530 |
+
}
|
| 531 |
+
|
| 532 |
+
resizeCanvases() {
|
| 533 |
+
if (!this.waveformCanvas || !this.barsCanvas) return;
|
| 534 |
+
|
| 535 |
+
const parent = this.waveformCanvas.parentElement;
|
| 536 |
+
const width = parent.clientWidth;
|
| 537 |
+
const height = parent.clientHeight;
|
| 538 |
+
|
| 539 |
+
const dpr = window.devicePixelRatio || 1;
|
| 540 |
+
|
| 541 |
+
[this.waveformCanvas, this.barsCanvas].forEach(canvas => {
|
| 542 |
+
canvas.width = width * dpr;
|
| 543 |
+
canvas.height = height * dpr;
|
| 544 |
+
canvas.style.width = `${width}px`;
|
| 545 |
+
canvas.style.height = `${height}px`;
|
| 546 |
+
const ctx = canvas.getContext('2d');
|
| 547 |
+
ctx.scale(dpr, dpr);
|
| 548 |
+
});
|
| 549 |
+
}
|
| 550 |
+
|
| 551 |
+
draw() {
|
| 552 |
+
requestAnimationFrame(() => this.draw());
|
| 553 |
+
|
| 554 |
+
if (!this.player || !this.player.analyser) return;
|
| 555 |
+
|
| 556 |
+
const bufferLength = this.player.analyser.frequencyBinCount;
|
| 557 |
+
const dataArray = new Uint8Array(bufferLength);
|
| 558 |
+
|
| 559 |
+
// Draw Bars (Frequency)
|
| 560 |
+
this.player.analyser.getByteFrequencyData(dataArray);
|
| 561 |
+
this.drawBars(dataArray);
|
| 562 |
+
|
| 563 |
+
// Draw Waveform (Time Domain)
|
| 564 |
+
this.player.analyser.getByteTimeDomainData(dataArray);
|
| 565 |
+
this.drawWaveform(dataArray);
|
| 566 |
+
}
|
| 567 |
+
|
| 568 |
+
drawWaveform(dataArray) {
|
| 569 |
+
const ctx = this.waveformCtx;
|
| 570 |
+
const canvas = this.waveformCanvas;
|
| 571 |
+
const width = canvas.width / (window.devicePixelRatio || 1);
|
| 572 |
+
const height = canvas.height / (window.devicePixelRatio || 1);
|
| 573 |
+
|
| 574 |
+
ctx.clearRect(0, 0, width, height);
|
| 575 |
+
ctx.lineWidth = 2;
|
| 576 |
+
ctx.strokeStyle = '#00d4aa'; // Mint primary
|
| 577 |
+
ctx.beginPath();
|
| 578 |
+
|
| 579 |
+
const sliceWidth = width / dataArray.length;
|
| 580 |
+
let x = 0;
|
| 581 |
+
|
| 582 |
+
for (let i = 0; i < dataArray.length; i++) {
|
| 583 |
+
const v = dataArray[i] / 128.0;
|
| 584 |
+
const y = (v * height) / 2;
|
| 585 |
+
|
| 586 |
+
if (i === 0) ctx.moveTo(x, y);
|
| 587 |
+
else ctx.lineTo(x, y);
|
| 588 |
+
|
| 589 |
+
x += sliceWidth;
|
| 590 |
+
}
|
| 591 |
+
|
| 592 |
+
ctx.lineTo(width, height / 2);
|
| 593 |
+
ctx.stroke();
|
| 594 |
+
}
|
| 595 |
+
|
| 596 |
+
drawBars(dataArray) {
|
| 597 |
+
const ctx = this.barsCtx;
|
| 598 |
+
const canvas = this.barsCanvas;
|
| 599 |
+
const width = canvas.width / (window.devicePixelRatio || 1);
|
| 600 |
+
const height = canvas.height / (window.devicePixelRatio || 1);
|
| 601 |
+
|
| 602 |
+
ctx.clearRect(0, 0, width, height);
|
| 603 |
+
|
| 604 |
+
const barCount = 120; // Number of bars to display
|
| 605 |
+
const barWidth = (width / barCount);
|
| 606 |
+
const samplesPerBar = Math.floor(dataArray.length / barCount);
|
| 607 |
+
|
| 608 |
+
for (let i = 0; i < barCount; i++) {
|
| 609 |
+
let sum = 0;
|
| 610 |
+
for (let j = 0; j < samplesPerBar; j++) {
|
| 611 |
+
sum += dataArray[i * samplesPerBar + j];
|
| 612 |
+
}
|
| 613 |
+
const average = sum / samplesPerBar;
|
| 614 |
+
const barHeight = (average / 255) * height * 0.8;
|
| 615 |
+
|
| 616 |
+
// Gradient for bar - Mint spectrum
|
| 617 |
+
const gradient = ctx.createLinearGradient(0, height, 0, height - barHeight);
|
| 618 |
+
gradient.addColorStop(0, '#3eb48944');
|
| 619 |
+
gradient.addColorStop(1, '#7fffd4cc');
|
| 620 |
+
|
| 621 |
+
ctx.fillStyle = gradient;
|
| 622 |
+
|
| 623 |
+
// Rounded bars
|
| 624 |
+
const x = i * barWidth;
|
| 625 |
+
const y = height - barHeight;
|
| 626 |
+
const radius = barWidth / 2;
|
| 627 |
+
|
| 628 |
+
ctx.beginPath();
|
| 629 |
+
ctx.roundRect(x + 1, y, barWidth - 2, barHeight, [2, 2, 0, 0]);
|
| 630 |
+
ctx.fill();
|
| 631 |
+
}
|
| 632 |
+
}
|
| 633 |
+
}
|
| 634 |
+
|
| 635 |
+
// Start the app
|
| 636 |
+
document.addEventListener('DOMContentLoaded', () => {
|
| 637 |
+
window.app = new PocketTTSStreaming();
|
| 638 |
+
});
|
onnx/ONNX-LICENSE
ADDED
|
@@ -0,0 +1,408 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Attribution-NonCommercial 4.0 International
|
| 2 |
+
|
| 3 |
+
=======================================================================
|
| 4 |
+
|
| 5 |
+
Creative Commons Corporation ("Creative Commons") is not a law firm and
|
| 6 |
+
does not provide legal services or legal advice. Distribution of
|
| 7 |
+
Creative Commons public licenses does not create a lawyer-client or
|
| 8 |
+
other relationship. Creative Commons makes its licenses and related
|
| 9 |
+
information available on an "as-is" basis. Creative Commons gives no
|
| 10 |
+
warranties regarding its licenses, any material licensed under their
|
| 11 |
+
terms and conditions, or any related information. Creative Commons
|
| 12 |
+
disclaims all liability for damages resulting from their use to the
|
| 13 |
+
fullest extent possible.
|
| 14 |
+
|
| 15 |
+
Using Creative Commons Public Licenses
|
| 16 |
+
|
| 17 |
+
Creative Commons public licenses provide a standard set of terms and
|
| 18 |
+
conditions that creators and other rights holders may use to share
|
| 19 |
+
original works of authorship and other material subject to copyright
|
| 20 |
+
and certain other rights specified in the public license below. The
|
| 21 |
+
following considerations are for informational purposes only, are not
|
| 22 |
+
exhaustive, and do not form part of our licenses.
|
| 23 |
+
|
| 24 |
+
Considerations for licensors: Our public licenses are
|
| 25 |
+
intended for use by those authorized to give the public
|
| 26 |
+
permission to use material in ways otherwise restricted by
|
| 27 |
+
copyright and certain other rights. Our licenses are
|
| 28 |
+
irrevocable. Licensors should read and understand the terms
|
| 29 |
+
and conditions of the license they choose before applying it.
|
| 30 |
+
Licensors should also secure all rights necessary before
|
| 31 |
+
applying our licenses so that the public can reuse the
|
| 32 |
+
material as expected. Licensors should clearly mark any
|
| 33 |
+
material not subject to the license. This includes other CC-
|
| 34 |
+
licensed material, or material used under an exception or
|
| 35 |
+
limitation to copyright. More considerations for licensors:
|
| 36 |
+
wiki.creativecommons.org/Considerations_for_licensors
|
| 37 |
+
|
| 38 |
+
Considerations for the public: By using one of our public
|
| 39 |
+
licenses, a licensor grants the public permission to use the
|
| 40 |
+
licensed material under specified terms and conditions. If
|
| 41 |
+
the licensor's permission is not necessary for any reason--for
|
| 42 |
+
example, because of any applicable exception or limitation to
|
| 43 |
+
copyright--then that use is not regulated by the license. Our
|
| 44 |
+
licenses grant only permissions under copyright and certain
|
| 45 |
+
other rights that a licensor has authority to grant. Use of
|
| 46 |
+
the licensed material may still be restricted for other
|
| 47 |
+
reasons, including because others have copyright or other
|
| 48 |
+
rights in the material. A licensor may make special requests,
|
| 49 |
+
such as asking that all changes be marked or described.
|
| 50 |
+
Although not required by our licenses, you are encouraged to
|
| 51 |
+
respect those requests where reasonable. More considerations
|
| 52 |
+
for the public:
|
| 53 |
+
wiki.creativecommons.org/Considerations_for_licensees
|
| 54 |
+
|
| 55 |
+
=======================================================================
|
| 56 |
+
|
| 57 |
+
Creative Commons Attribution-NonCommercial 4.0 International Public
|
| 58 |
+
License
|
| 59 |
+
|
| 60 |
+
By exercising the Licensed Rights (defined below), You accept and agree
|
| 61 |
+
to be bound by the terms and conditions of this Creative Commons
|
| 62 |
+
Attribution-NonCommercial 4.0 International Public License ("Public
|
| 63 |
+
License"). To the extent this Public License may be interpreted as a
|
| 64 |
+
contract, You are granted the Licensed Rights in consideration of Your
|
| 65 |
+
acceptance of these terms and conditions, and the Licensor grants You
|
| 66 |
+
such rights in consideration of benefits the Licensor receives from
|
| 67 |
+
making the Licensed Material available under these terms and
|
| 68 |
+
conditions.
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
Section 1 -- Definitions.
|
| 72 |
+
|
| 73 |
+
a. Adapted Material means material subject to Copyright and Similar
|
| 74 |
+
Rights that is derived from or based upon the Licensed Material
|
| 75 |
+
and in which the Licensed Material is translated, altered,
|
| 76 |
+
arranged, transformed, or otherwise modified in a manner requiring
|
| 77 |
+
permission under the Copyright and Similar Rights held by the
|
| 78 |
+
Licensor. For purposes of this Public License, where the Licensed
|
| 79 |
+
Material is a musical work, performance, or sound recording,
|
| 80 |
+
Adapted Material is always produced where the Licensed Material is
|
| 81 |
+
synched in timed relation with a moving image.
|
| 82 |
+
|
| 83 |
+
b. Adapter's License means the license You apply to Your Copyright
|
| 84 |
+
and Similar Rights in Your contributions to Adapted Material in
|
| 85 |
+
accordance with the terms and conditions of this Public License.
|
| 86 |
+
|
| 87 |
+
c. Copyright and Similar Rights means copyright and/or similar rights
|
| 88 |
+
closely related to copyright including, without limitation,
|
| 89 |
+
performance, broadcast, sound recording, and Sui Generis Database
|
| 90 |
+
Rights, without regard to how the rights are labeled or
|
| 91 |
+
categorized. For purposes of this Public License, the rights
|
| 92 |
+
specified in Section 2(b)(1)-(2) are not Copyright and Similar
|
| 93 |
+
Rights.
|
| 94 |
+
d. Effective Technological Measures means those measures that, in the
|
| 95 |
+
absence of proper authority, may not be circumvented under laws
|
| 96 |
+
fulfilling obligations under Article 11 of the WIPO Copyright
|
| 97 |
+
Treaty adopted on December 20, 1996, and/or similar international
|
| 98 |
+
agreements.
|
| 99 |
+
|
| 100 |
+
e. Exceptions and Limitations means fair use, fair dealing, and/or
|
| 101 |
+
any other exception or limitation to Copyright and Similar Rights
|
| 102 |
+
that applies to Your use of the Licensed Material.
|
| 103 |
+
|
| 104 |
+
f. Licensed Material means the artistic or literary work, database,
|
| 105 |
+
or other material to which the Licensor applied this Public
|
| 106 |
+
License.
|
| 107 |
+
|
| 108 |
+
g. Licensed Rights means the rights granted to You subject to the
|
| 109 |
+
terms and conditions of this Public License, which are limited to
|
| 110 |
+
all Copyright and Similar Rights that apply to Your use of the
|
| 111 |
+
Licensed Material and that the Licensor has authority to license.
|
| 112 |
+
|
| 113 |
+
h. Licensor means the individual(s) or entity(ies) granting rights
|
| 114 |
+
under this Public License.
|
| 115 |
+
|
| 116 |
+
i. NonCommercial means not primarily intended for or directed towards
|
| 117 |
+
commercial advantage or monetary compensation. For purposes of
|
| 118 |
+
this Public License, the exchange of the Licensed Material for
|
| 119 |
+
other material subject to Copyright and Similar Rights by digital
|
| 120 |
+
file-sharing or similar means is NonCommercial provided there is
|
| 121 |
+
no payment of monetary compensation in connection with the
|
| 122 |
+
exchange.
|
| 123 |
+
|
| 124 |
+
j. Share means to provide material to the public by any means or
|
| 125 |
+
process that requires permission under the Licensed Rights, such
|
| 126 |
+
as reproduction, public display, public performance, distribution,
|
| 127 |
+
dissemination, communication, or importation, and to make material
|
| 128 |
+
available to the public including in ways that members of the
|
| 129 |
+
public may access the material from a place and at a time
|
| 130 |
+
individually chosen by them.
|
| 131 |
+
|
| 132 |
+
k. Sui Generis Database Rights means rights other than copyright
|
| 133 |
+
resulting from Directive 96/9/EC of the European Parliament and of
|
| 134 |
+
the Council of 11 March 1996 on the legal protection of databases,
|
| 135 |
+
as amended and/or succeeded, as well as other essentially
|
| 136 |
+
equivalent rights anywhere in the world.
|
| 137 |
+
|
| 138 |
+
l. You means the individual or entity exercising the Licensed Rights
|
| 139 |
+
under this Public License. Your has a corresponding meaning.
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
Section 2 -- Scope.
|
| 143 |
+
|
| 144 |
+
a. License grant.
|
| 145 |
+
|
| 146 |
+
1. Subject to the terms and conditions of this Public License,
|
| 147 |
+
the Licensor hereby grants You a worldwide, royalty-free,
|
| 148 |
+
non-sublicensable, non-exclusive, irrevocable license to
|
| 149 |
+
exercise the Licensed Rights in the Licensed Material to:
|
| 150 |
+
|
| 151 |
+
a. reproduce and Share the Licensed Material, in whole or
|
| 152 |
+
in part, for NonCommercial purposes only; and
|
| 153 |
+
|
| 154 |
+
b. produce, reproduce, and Share Adapted Material for
|
| 155 |
+
NonCommercial purposes only.
|
| 156 |
+
|
| 157 |
+
2. Exceptions and Limitations. For the avoidance of doubt, where
|
| 158 |
+
Exceptions and Limitations apply to Your use, this Public
|
| 159 |
+
License does not apply, and You do not need to comply with
|
| 160 |
+
its terms and conditions.
|
| 161 |
+
|
| 162 |
+
3. Term. The term of this Public License is specified in Section
|
| 163 |
+
6(a).
|
| 164 |
+
|
| 165 |
+
4. Media and formats; technical modifications allowed. The
|
| 166 |
+
Licensor authorizes You to exercise the Licensed Rights in
|
| 167 |
+
all media and formats whether now known or hereafter created,
|
| 168 |
+
and to make technical modifications necessary to do so. The
|
| 169 |
+
Licensor waives and/or agrees not to assert any right or
|
| 170 |
+
authority to forbid You from making technical modifications
|
| 171 |
+
necessary to exercise the Licensed Rights, including
|
| 172 |
+
technical modifications necessary to circumvent Effective
|
| 173 |
+
Technological Measures. For purposes of this Public License,
|
| 174 |
+
simply making modifications authorized by this Section 2(a)
|
| 175 |
+
(4) never produces Adapted Material.
|
| 176 |
+
|
| 177 |
+
5. Downstream recipients.
|
| 178 |
+
|
| 179 |
+
a. Offer from the Licensor -- Licensed Material. Every
|
| 180 |
+
recipient of the Licensed Material automatically
|
| 181 |
+
receives an offer from the Licensor to exercise the
|
| 182 |
+
Licensed Rights under the terms and conditions of this
|
| 183 |
+
Public License.
|
| 184 |
+
|
| 185 |
+
b. No downstream restrictions. You may not offer or impose
|
| 186 |
+
any additional or different terms or conditions on, or
|
| 187 |
+
apply any Effective Technological Measures to, the
|
| 188 |
+
Licensed Material if doing so restricts exercise of the
|
| 189 |
+
Licensed Rights by any recipient of the Licensed
|
| 190 |
+
Material.
|
| 191 |
+
|
| 192 |
+
6. No endorsement. Nothing in this Public License constitutes or
|
| 193 |
+
may be construed as permission to assert or imply that You
|
| 194 |
+
are, or that Your use of the Licensed Material is, connected
|
| 195 |
+
with, or sponsored, endorsed, or granted official status by,
|
| 196 |
+
the Licensor or others designated to receive attribution as
|
| 197 |
+
provided in Section 3(a)(1)(A)(i).
|
| 198 |
+
|
| 199 |
+
b. Other rights.
|
| 200 |
+
|
| 201 |
+
1. Moral rights, such as the right of integrity, are not
|
| 202 |
+
licensed under this Public License, nor are publicity,
|
| 203 |
+
privacy, and/or other similar personality rights; however, to
|
| 204 |
+
the extent possible, the Licensor waives and/or agrees not to
|
| 205 |
+
assert any such rights held by the Licensor to the limited
|
| 206 |
+
extent necessary to allow You to exercise the Licensed
|
| 207 |
+
Rights, but not otherwise.
|
| 208 |
+
|
| 209 |
+
2. Patent and trademark rights are not licensed under this
|
| 210 |
+
Public License.
|
| 211 |
+
|
| 212 |
+
3. To the extent possible, the Licensor waives any right to
|
| 213 |
+
collect royalties from You for the exercise of the Licensed
|
| 214 |
+
Rights, whether directly or through a collecting society
|
| 215 |
+
under any voluntary or waivable statutory or compulsory
|
| 216 |
+
licensing scheme. In all other cases the Licensor expressly
|
| 217 |
+
reserves any right to collect such royalties, including when
|
| 218 |
+
the Licensed Material is used other than for NonCommercial
|
| 219 |
+
purposes.
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
Section 3 -- License Conditions.
|
| 223 |
+
|
| 224 |
+
Your exercise of the Licensed Rights is expressly made subject to the
|
| 225 |
+
following conditions.
|
| 226 |
+
|
| 227 |
+
a. Attribution.
|
| 228 |
+
|
| 229 |
+
1. If You Share the Licensed Material (including in modified
|
| 230 |
+
form), You must:
|
| 231 |
+
|
| 232 |
+
a. retain the following if it is supplied by the Licensor
|
| 233 |
+
with the Licensed Material:
|
| 234 |
+
|
| 235 |
+
i. identification of the creator(s) of the Licensed
|
| 236 |
+
Material and any others designated to receive
|
| 237 |
+
attribution, in any reasonable manner requested by
|
| 238 |
+
the Licensor (including by pseudonym if
|
| 239 |
+
designated);
|
| 240 |
+
|
| 241 |
+
ii. a copyright notice;
|
| 242 |
+
|
| 243 |
+
iii. a notice that refers to this Public License;
|
| 244 |
+
|
| 245 |
+
iv. a notice that refers to the disclaimer of
|
| 246 |
+
warranties;
|
| 247 |
+
|
| 248 |
+
v. a URI or hyperlink to the Licensed Material to the
|
| 249 |
+
extent reasonably practicable;
|
| 250 |
+
|
| 251 |
+
b. indicate if You modified the Licensed Material and
|
| 252 |
+
retain an indication of any previous modifications; and
|
| 253 |
+
|
| 254 |
+
c. indicate the Licensed Material is licensed under this
|
| 255 |
+
Public License, and include the text of, or the URI or
|
| 256 |
+
hyperlink to, this Public License.
|
| 257 |
+
|
| 258 |
+
2. You may satisfy the conditions in Section 3(a)(1) in any
|
| 259 |
+
reasonable manner based on the medium, means, and context in
|
| 260 |
+
which You Share the Licensed Material. For example, it may be
|
| 261 |
+
reasonable to satisfy the conditions by providing a URI or
|
| 262 |
+
hyperlink to a resource that includes the required
|
| 263 |
+
information.
|
| 264 |
+
|
| 265 |
+
3. If requested by the Licensor, You must remove any of the
|
| 266 |
+
information required by Section 3(a)(1)(A) to the extent
|
| 267 |
+
reasonably practicable.
|
| 268 |
+
|
| 269 |
+
4. If You Share Adapted Material You produce, the Adapter's
|
| 270 |
+
License You apply must not prevent recipients of the Adapted
|
| 271 |
+
Material from complying with this Public License.
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
Section 4 -- Sui Generis Database Rights.
|
| 275 |
+
|
| 276 |
+
Where the Licensed Rights include Sui Generis Database Rights that
|
| 277 |
+
apply to Your use of the Licensed Material:
|
| 278 |
+
|
| 279 |
+
a. for the avoidance of doubt, Section 2(a)(1) grants You the right
|
| 280 |
+
to extract, reuse, reproduce, and Share all or a substantial
|
| 281 |
+
portion of the contents of the database for NonCommercial purposes
|
| 282 |
+
only;
|
| 283 |
+
|
| 284 |
+
b. if You include all or a substantial portion of the database
|
| 285 |
+
contents in a database in which You have Sui Generis Database
|
| 286 |
+
Rights, then the database in which You have Sui Generis Database
|
| 287 |
+
Rights (but not its individual contents) is Adapted Material; and
|
| 288 |
+
|
| 289 |
+
c. You must comply with the conditions in Section 3(a) if You Share
|
| 290 |
+
all or a substantial portion of the contents of the database.
|
| 291 |
+
|
| 292 |
+
For the avoidance of doubt, this Section 4 supplements and does not
|
| 293 |
+
replace Your obligations under this Public License where the Licensed
|
| 294 |
+
Rights include other Copyright and Similar Rights.
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
Section 5 -- Disclaimer of Warranties and Limitation of Liability.
|
| 298 |
+
|
| 299 |
+
a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
|
| 300 |
+
EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
|
| 301 |
+
AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
|
| 302 |
+
ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
|
| 303 |
+
IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
|
| 304 |
+
WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
| 305 |
+
PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
|
| 306 |
+
ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
|
| 307 |
+
KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
|
| 308 |
+
ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
|
| 309 |
+
|
| 310 |
+
b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
|
| 311 |
+
TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
|
| 312 |
+
NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
|
| 313 |
+
INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
|
| 314 |
+
COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
|
| 315 |
+
USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
|
| 316 |
+
ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
|
| 317 |
+
DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
|
| 318 |
+
IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
|
| 319 |
+
|
| 320 |
+
c. The disclaimer of warranties and limitation of liability provided
|
| 321 |
+
above shall be interpreted in a manner that, to the extent
|
| 322 |
+
possible, most closely approximates an absolute disclaimer and
|
| 323 |
+
waiver of all liability.
|
| 324 |
+
|
| 325 |
+
|
| 326 |
+
Section 6 -- Term and Termination.
|
| 327 |
+
|
| 328 |
+
a. This Public License applies for the term of the Copyright and
|
| 329 |
+
Similar Rights licensed here. However, if You fail to comply with
|
| 330 |
+
this Public License, then Your rights under this Public License
|
| 331 |
+
terminate automatically.
|
| 332 |
+
|
| 333 |
+
b. Where Your right to use the Licensed Material has terminated under
|
| 334 |
+
Section 6(a), it reinstates:
|
| 335 |
+
|
| 336 |
+
1. automatically as of the date the violation is cured, provided
|
| 337 |
+
it is cured within 30 days of Your discovery of the
|
| 338 |
+
violation; or
|
| 339 |
+
|
| 340 |
+
2. upon express reinstatement by the Licensor.
|
| 341 |
+
|
| 342 |
+
For the avoidance of doubt, this Section 6(b) does not affect any
|
| 343 |
+
right the Licensor may have to seek remedies for Your violations
|
| 344 |
+
of this Public License.
|
| 345 |
+
|
| 346 |
+
c. For the avoidance of doubt, the Licensor may also offer the
|
| 347 |
+
Licensed Material under separate terms or conditions or stop
|
| 348 |
+
distributing the Licensed Material at any time; however, doing so
|
| 349 |
+
will not terminate this Public License.
|
| 350 |
+
|
| 351 |
+
d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
|
| 352 |
+
License.
|
| 353 |
+
|
| 354 |
+
|
| 355 |
+
Section 7 -- Other Terms and Conditions.
|
| 356 |
+
|
| 357 |
+
a. The Licensor shall not be bound by any additional or different
|
| 358 |
+
terms or conditions communicated by You unless expressly agreed.
|
| 359 |
+
|
| 360 |
+
b. Any arrangements, understandings, or agreements regarding the
|
| 361 |
+
Licensed Material not stated herein are separate from and
|
| 362 |
+
independent of the terms and conditions of this Public License.
|
| 363 |
+
|
| 364 |
+
|
| 365 |
+
Section 8 -- Interpretation.
|
| 366 |
+
|
| 367 |
+
a. For the avoidance of doubt, this Public License does not, and
|
| 368 |
+
shall not be interpreted to, reduce, limit, restrict, or impose
|
| 369 |
+
conditions on any use of the Licensed Material that could lawfully
|
| 370 |
+
be made without permission under this Public License.
|
| 371 |
+
|
| 372 |
+
b. To the extent possible, if any provision of this Public License is
|
| 373 |
+
deemed unenforceable, it shall be automatically reformed to the
|
| 374 |
+
minimum extent necessary to make it enforceable. If the provision
|
| 375 |
+
cannot be reformed, it shall be severed from this Public License
|
| 376 |
+
without affecting the enforceability of the remaining terms and
|
| 377 |
+
conditions.
|
| 378 |
+
|
| 379 |
+
c. No term or condition of this Public License will be waived and no
|
| 380 |
+
failure to comply consented to unless expressly agreed to by the
|
| 381 |
+
Licensor.
|
| 382 |
+
|
| 383 |
+
d. Nothing in this Public License constitutes or may be interpreted
|
| 384 |
+
as a limitation upon, or waiver of, any privileges and immunities
|
| 385 |
+
that apply to the Licensor or You, including from the legal
|
| 386 |
+
processes of any jurisdiction or authority.
|
| 387 |
+
|
| 388 |
+
=======================================================================
|
| 389 |
+
|
| 390 |
+
Creative Commons is not a party to its public
|
| 391 |
+
licenses. Notwithstanding, Creative Commons may elect to apply one of
|
| 392 |
+
its public licenses to material it publishes and in those instances
|
| 393 |
+
will be considered the “Licensor.” The text of the Creative Commons
|
| 394 |
+
public licenses is dedicated to the public domain under the CC0 Public
|
| 395 |
+
Domain Dedication. Except for the limited purpose of indicating that
|
| 396 |
+
material is shared under a Creative Commons public license or as
|
| 397 |
+
otherwise permitted by the Creative Commons policies published at
|
| 398 |
+
creativecommons.org/policies, Creative Commons does not authorize the
|
| 399 |
+
use of the trademark "Creative Commons" or any other trademark or logo
|
| 400 |
+
of Creative Commons without its prior written consent including,
|
| 401 |
+
without limitation, in connection with any unauthorized modifications
|
| 402 |
+
to any of its public licenses or any other arrangements,
|
| 403 |
+
understandings, or agreements concerning use of licensed material. For
|
| 404 |
+
the avoidance of doubt, this paragraph does not form part of the
|
| 405 |
+
public licenses.
|
| 406 |
+
|
| 407 |
+
Creative Commons may be contacted at creativecommons.org.
|
| 408 |
+
|
onnx/flow_lm_flow_int8.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d627d235c44a597da908e1085ebe241cbbe358964c502c5a5063d18851a5529
|
| 3 |
+
size 9962530
|
onnx/flow_lm_main_int8.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd5cdd7f7ab05f638af0011b9561fe95f3d86bae7be7504e921ae3d2874b5da5
|
| 3 |
+
size 76341627
|
onnx/mimi_decoder_int8.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:501e16f51cf3fb91bd2928ee2a10c96d3461544eff329aafca10489e990b450c
|
| 3 |
+
size 22684077
|
onnx/mimi_encoder.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:360f050cd0b1e1c9e92e25584391a2214cb7574ba16c67ac5f471f9dce8588e4
|
| 3 |
+
size 73165554
|
onnx/text_conditioner.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80ea69f46d8153a9bd42373723cedae4c88ccbde0e052c2a96e9e8f19445adf1
|
| 3 |
+
size 16388363
|
sentencepiece-browser.js
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Browser shims for Node.js modules used by sentencepiece.js
|
| 2 |
+
// These shims allow the sentencepiece module to load in the browser.
|
| 3 |
+
// The actual fs operations are not used when loading from base64.
|
| 4 |
+
|
| 5 |
+
// Shim for 'fs' module
|
| 6 |
+
export function readFileSync(path) {
|
| 7 |
+
throw new Error(`fs.readFileSync not available in browser. Tried to read: ${path}`);
|
| 8 |
+
}
|
| 9 |
+
|
| 10 |
+
export function existsSync(path) {
|
| 11 |
+
return false;
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
export const promises = {
|
| 15 |
+
readFile: (path) => Promise.reject(new Error(`fs.promises.readFile not available in browser`))
|
| 16 |
+
};
|
| 17 |
+
|
| 18 |
+
// Default export for `import * as fs from 'fs'`
|
| 19 |
+
export default {
|
| 20 |
+
readFileSync,
|
| 21 |
+
existsSync,
|
| 22 |
+
promises
|
| 23 |
+
};
|
| 24 |
+
|
| 25 |
+
// Buffer shim - re-export what sentencepiece needs
|
| 26 |
+
// `import { Buffer as Buffer$1 } from 'buffer'`
|
| 27 |
+
export const Buffer = globalThis.Buffer || {
|
| 28 |
+
from: (data, encoding) => {
|
| 29 |
+
if (encoding === 'base64') {
|
| 30 |
+
const binary = atob(data);
|
| 31 |
+
const bytes = new Uint8Array(binary.length);
|
| 32 |
+
for (let i = 0; i < binary.length; i++) {
|
| 33 |
+
bytes[i] = binary.charCodeAt(i);
|
| 34 |
+
}
|
| 35 |
+
return bytes;
|
| 36 |
+
}
|
| 37 |
+
if (typeof data === 'string') {
|
| 38 |
+
return new TextEncoder().encode(data);
|
| 39 |
+
}
|
| 40 |
+
return new Uint8Array(data);
|
| 41 |
+
},
|
| 42 |
+
isBuffer: (obj) => obj instanceof Uint8Array,
|
| 43 |
+
alloc: (size) => new Uint8Array(size),
|
| 44 |
+
allocUnsafe: (size) => new Uint8Array(size),
|
| 45 |
+
concat: (buffers, totalLength) => {
|
| 46 |
+
if (totalLength === undefined) {
|
| 47 |
+
totalLength = buffers.reduce((sum, buf) => sum + buf.length, 0);
|
| 48 |
+
}
|
| 49 |
+
const result = new Uint8Array(totalLength);
|
| 50 |
+
let offset = 0;
|
| 51 |
+
for (const buf of buffers) {
|
| 52 |
+
result.set(buf, offset);
|
| 53 |
+
offset += buf.length;
|
| 54 |
+
}
|
| 55 |
+
return result;
|
| 56 |
+
}
|
| 57 |
+
};
|
| 58 |
+
|
| 59 |
+
// Named export for Buffer$1
|
| 60 |
+
export { Buffer as Buffer$1 };
|
sentencepiece.js
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
server.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Simple HTTP server with Cross-Origin Isolation headers for SharedArrayBuffer.
|
| 4 |
+
This enables multi-threading in ONNX Runtime Web for much better performance.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import http.server
|
| 8 |
+
import socketserver
|
| 9 |
+
import sys
|
| 10 |
+
|
| 11 |
+
PORT = int(sys.argv[1]) if len(sys.argv) > 1 else 8080
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class CORSRequestHandler(http.server.SimpleHTTPRequestHandler):
|
| 15 |
+
def end_headers(self):
|
| 16 |
+
# Required for SharedArrayBuffer (enables multi-threading in WASM)
|
| 17 |
+
self.send_header('Cross-Origin-Opener-Policy', 'same-origin')
|
| 18 |
+
self.send_header('Cross-Origin-Embedder-Policy', 'require-corp')
|
| 19 |
+
# Allow loading from CDN
|
| 20 |
+
self.send_header('Access-Control-Allow-Origin', '*')
|
| 21 |
+
super().end_headers()
|
| 22 |
+
|
| 23 |
+
def do_OPTIONS(self):
|
| 24 |
+
self.send_response(200)
|
| 25 |
+
self.end_headers()
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
if __name__ == '__main__':
|
| 29 |
+
with socketserver.TCPServer(("", PORT), CORSRequestHandler) as httpd:
|
| 30 |
+
print(f"Serving at http://localhost:{PORT}")
|
| 31 |
+
print("Cross-Origin Isolation headers enabled for multi-threading")
|
| 32 |
+
print("Press Ctrl+C to stop")
|
| 33 |
+
try:
|
| 34 |
+
httpd.serve_forever()
|
| 35 |
+
except KeyboardInterrupt:
|
| 36 |
+
print("\nShutting down...")
|
style.css
CHANGED
|
@@ -1,28 +1,958 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
body {
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
}
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
}
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
}
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
| 24 |
}
|
| 25 |
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
}
|
|
|
|
| 1 |
+
/* ============================================
|
| 2 |
+
POCKET TTS - DIGITAL MINT & NEO-EMERALD THEME
|
| 3 |
+
Tech-Kawaii • Clean Code Aesthetic
|
| 4 |
+
============================================ */
|
| 5 |
+
|
| 6 |
+
/* Design Tokens */
|
| 7 |
+
:root {
|
| 8 |
+
/* Colors - Digital Mint Monochromatic */
|
| 9 |
+
--color-void: #0a1612;
|
| 10 |
+
--color-deep: #0d1f1a;
|
| 11 |
+
--color-surface: rgba(20, 45, 38, 0.85);
|
| 12 |
+
--color-surface-elevated: rgba(30, 65, 55, 0.9);
|
| 13 |
+
--color-glass-border: rgba(144, 238, 200, 0.12);
|
| 14 |
+
|
| 15 |
+
/* Mint Gradient Spectrum */
|
| 16 |
+
--gradient-primary: linear-gradient(135deg, #3eb489 0%, #00d4aa 50%, #7fffd4 100%);
|
| 17 |
+
--gradient-primary-vivid: linear-gradient(135deg, #5eedb8 0%, #00ffcc 100%);
|
| 18 |
+
--gradient-glow: linear-gradient(135deg, rgba(62, 180, 137, 0.4) 0%, rgba(0, 212, 170, 0.4) 100%);
|
| 19 |
+
--gradient-soft: linear-gradient(135deg, rgba(127, 255, 212, 0.08) 0%, rgba(62, 180, 137, 0.08) 100%);
|
| 20 |
+
|
| 21 |
+
/* Accent Colors - Mint Spectrum */
|
| 22 |
+
--color-primary: #3eb489;
|
| 23 |
+
--color-mint: #00d4aa;
|
| 24 |
+
--color-aquamarine: #7fffd4;
|
| 25 |
+
--color-seafoam: #98ffc8;
|
| 26 |
+
--color-glint: #e8fff4;
|
| 27 |
+
--color-forest: #1a5d4a;
|
| 28 |
+
--color-teal-dark: #0f3d32;
|
| 29 |
+
|
| 30 |
+
/* Semantic Colors */
|
| 31 |
+
--color-success: #00d4aa;
|
| 32 |
+
--color-warning: #ffd93d;
|
| 33 |
+
--color-error: #ff6b6b;
|
| 34 |
+
|
| 35 |
+
/* Text */
|
| 36 |
+
--color-text-primary: #e8fff4;
|
| 37 |
+
--color-text-secondary: #98c9b8;
|
| 38 |
+
--color-text-muted: #5a8a7a;
|
| 39 |
+
|
| 40 |
+
/* Typography - Soft Geometric */
|
| 41 |
+
--font-display: 'Quicksand', 'Nunito', sans-serif;
|
| 42 |
+
--font-body: 'Nunito', 'Quicksand', sans-serif;
|
| 43 |
+
--font-mono: 'JetBrains Mono', 'Fira Code', monospace;
|
| 44 |
+
|
| 45 |
+
/* Spacing */
|
| 46 |
+
--space-1: 0.25rem;
|
| 47 |
+
--space-2: 0.5rem;
|
| 48 |
+
--space-3: 0.75rem;
|
| 49 |
+
--space-4: 1rem;
|
| 50 |
+
--space-5: 1.25rem;
|
| 51 |
+
--space-6: 1.5rem;
|
| 52 |
+
--space-8: 2rem;
|
| 53 |
+
--space-10: 2.5rem;
|
| 54 |
+
--space-12: 3rem;
|
| 55 |
+
--space-16: 4rem;
|
| 56 |
+
|
| 57 |
+
/* Radii - Extra Rounded for Kawaii Feel */
|
| 58 |
+
--radius-sm: 10px;
|
| 59 |
+
--radius-md: 14px;
|
| 60 |
+
--radius-lg: 20px;
|
| 61 |
+
--radius-xl: 28px;
|
| 62 |
+
--radius-full: 9999px;
|
| 63 |
+
|
| 64 |
+
/* Shadows - Mint Glow */
|
| 65 |
+
--shadow-sm: 0 2px 8px rgba(0, 20, 15, 0.4);
|
| 66 |
+
--shadow-md: 0 4px 20px rgba(0, 20, 15, 0.5);
|
| 67 |
+
--shadow-lg: 0 8px 40px rgba(0, 20, 15, 0.6);
|
| 68 |
+
--shadow-glow: 0 0 40px rgba(62, 180, 137, 0.3);
|
| 69 |
+
--shadow-glow-strong: 0 0 60px rgba(0, 212, 170, 0.4);
|
| 70 |
+
|
| 71 |
+
/* Transitions */
|
| 72 |
+
--ease-out: cubic-bezier(0.16, 1, 0.3, 1);
|
| 73 |
+
--ease-bounce: cubic-bezier(0.34, 1.56, 0.64, 1);
|
| 74 |
+
--duration-fast: 150ms;
|
| 75 |
+
--duration-base: 250ms;
|
| 76 |
+
--duration-slow: 400ms;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
/* Reset */
|
| 80 |
+
*, *::before, *::after {
|
| 81 |
+
box-sizing: border-box;
|
| 82 |
+
margin: 0;
|
| 83 |
+
padding: 0;
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
html {
|
| 87 |
+
font-size: 16px;
|
| 88 |
+
-webkit-font-smoothing: antialiased;
|
| 89 |
+
-moz-osx-font-smoothing: grayscale;
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
body {
|
| 93 |
+
font-family: var(--font-body);
|
| 94 |
+
background: var(--color-void);
|
| 95 |
+
color: var(--color-text-primary);
|
| 96 |
+
min-height: 100vh;
|
| 97 |
+
overflow-x: hidden;
|
| 98 |
+
line-height: 1.6;
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
/* Screen reader only */
|
| 102 |
+
.sr-only {
|
| 103 |
+
position: absolute;
|
| 104 |
+
width: 1px;
|
| 105 |
+
height: 1px;
|
| 106 |
+
padding: 0;
|
| 107 |
+
margin: -1px;
|
| 108 |
+
overflow: hidden;
|
| 109 |
+
clip: rect(0, 0, 0, 0);
|
| 110 |
+
white-space: nowrap;
|
| 111 |
+
border: 0;
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
/* ============================================
|
| 115 |
+
AMBIENT BACKGROUND - Mint Glow
|
| 116 |
+
============================================ */
|
| 117 |
+
.ambient-layer {
|
| 118 |
+
position: fixed;
|
| 119 |
+
inset: 0;
|
| 120 |
+
pointer-events: none;
|
| 121 |
+
z-index: 0;
|
| 122 |
+
overflow: hidden;
|
| 123 |
+
background: radial-gradient(ellipse at 50% 0%, rgba(62, 180, 137, 0.08) 0%, transparent 50%);
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
.orb {
|
| 127 |
+
position: absolute;
|
| 128 |
+
border-radius: 50%;
|
| 129 |
+
filter: blur(100px);
|
| 130 |
+
opacity: 0.5;
|
| 131 |
+
animation: orb-float 25s ease-in-out infinite;
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
.orb--primary {
|
| 135 |
+
width: 600px;
|
| 136 |
+
height: 600px;
|
| 137 |
+
background: radial-gradient(circle, rgba(62, 180, 137, 0.35) 0%, transparent 70%);
|
| 138 |
+
top: -20%;
|
| 139 |
+
right: -15%;
|
| 140 |
+
animation-delay: 0s;
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
.orb--secondary {
|
| 144 |
+
width: 450px;
|
| 145 |
+
height: 450px;
|
| 146 |
+
background: radial-gradient(circle, rgba(0, 212, 170, 0.25) 0%, transparent 70%);
|
| 147 |
+
bottom: -15%;
|
| 148 |
+
left: -15%;
|
| 149 |
+
animation-delay: -8s;
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
+
.orb--tertiary {
|
| 153 |
+
width: 350px;
|
| 154 |
+
height: 350px;
|
| 155 |
+
background: radial-gradient(circle, rgba(127, 255, 212, 0.15) 0%, transparent 70%);
|
| 156 |
+
top: 50%;
|
| 157 |
+
left: 50%;
|
| 158 |
+
transform: translateX(-50%);
|
| 159 |
+
animation-delay: -16s;
|
| 160 |
+
}
|
| 161 |
+
|
| 162 |
+
@keyframes orb-float {
|
| 163 |
+
0%, 100% {
|
| 164 |
+
transform: translate(0, 0) scale(1);
|
| 165 |
+
}
|
| 166 |
+
25% {
|
| 167 |
+
transform: translate(40px, -50px) scale(1.08);
|
| 168 |
+
}
|
| 169 |
+
50% {
|
| 170 |
+
transform: translate(-30px, 30px) scale(0.95);
|
| 171 |
+
}
|
| 172 |
+
75% {
|
| 173 |
+
transform: translate(50px, 40px) scale(1.03);
|
| 174 |
+
}
|
| 175 |
}
|
| 176 |
|
| 177 |
+
.grid-overlay {
|
| 178 |
+
position: absolute;
|
| 179 |
+
inset: 0;
|
| 180 |
+
background-image:
|
| 181 |
+
linear-gradient(rgba(62, 180, 137, 0.04) 1px, transparent 1px),
|
| 182 |
+
linear-gradient(90deg, rgba(62, 180, 137, 0.04) 1px, transparent 1px);
|
| 183 |
+
background-size: 50px 50px;
|
| 184 |
+
mask-image: radial-gradient(ellipse at center, black 0%, transparent 70%);
|
| 185 |
+
-webkit-mask-image: radial-gradient(ellipse at center, black 0%, transparent 70%);
|
| 186 |
}
|
| 187 |
|
| 188 |
+
/* ============================================
|
| 189 |
+
APP SHELL
|
| 190 |
+
============================================ */
|
| 191 |
+
.app-shell {
|
| 192 |
+
position: relative;
|
| 193 |
+
z-index: 1;
|
| 194 |
+
max-width: 960px;
|
| 195 |
+
margin: 0 auto;
|
| 196 |
+
padding: var(--space-8) var(--space-6);
|
| 197 |
+
min-height: 100vh;
|
| 198 |
+
display: flex;
|
| 199 |
+
flex-direction: column;
|
| 200 |
+
animation: shell-enter 0.8s var(--ease-out) both;
|
| 201 |
}
|
| 202 |
|
| 203 |
+
@keyframes shell-enter {
|
| 204 |
+
from {
|
| 205 |
+
opacity: 0;
|
| 206 |
+
transform: translateY(30px);
|
| 207 |
+
}
|
| 208 |
+
to {
|
| 209 |
+
opacity: 1;
|
| 210 |
+
transform: translateY(0);
|
| 211 |
+
}
|
| 212 |
}
|
| 213 |
|
| 214 |
+
/* ============================================
|
| 215 |
+
HERO HEADER
|
| 216 |
+
============================================ */
|
| 217 |
+
.hero {
|
| 218 |
+
text-align: center;
|
| 219 |
+
margin-bottom: var(--space-10);
|
| 220 |
+
}
|
| 221 |
+
|
| 222 |
+
.hero__brand {
|
| 223 |
+
display: flex;
|
| 224 |
+
align-items: center;
|
| 225 |
+
justify-content: center;
|
| 226 |
+
gap: var(--space-4);
|
| 227 |
+
margin-bottom: var(--space-4);
|
| 228 |
+
flex-wrap: wrap;
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
.logo {
|
| 232 |
+
display: flex;
|
| 233 |
+
align-items: center;
|
| 234 |
+
gap: var(--space-3);
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
.logo__icon {
|
| 238 |
+
width: 44px;
|
| 239 |
+
height: 44px;
|
| 240 |
+
filter: drop-shadow(0 0 16px rgba(62, 180, 137, 0.6));
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
.logo__text {
|
| 244 |
+
font-family: var(--font-display);
|
| 245 |
+
font-size: 2.6rem;
|
| 246 |
+
font-weight: 700;
|
| 247 |
+
background: var(--gradient-primary);
|
| 248 |
+
-webkit-background-clip: text;
|
| 249 |
+
-webkit-text-fill-color: transparent;
|
| 250 |
+
background-clip: text;
|
| 251 |
+
letter-spacing: -0.02em;
|
| 252 |
+
text-shadow: 0 0 40px rgba(62, 180, 137, 0.3);
|
| 253 |
+
}
|
| 254 |
+
|
| 255 |
+
.badge {
|
| 256 |
+
display: inline-flex;
|
| 257 |
+
align-items: center;
|
| 258 |
+
padding: var(--space-1) var(--space-3);
|
| 259 |
+
background: var(--color-surface);
|
| 260 |
+
border: 1px solid var(--color-glass-border);
|
| 261 |
+
border-radius: var(--radius-full);
|
| 262 |
+
font-family: var(--font-mono);
|
| 263 |
+
font-size: 0.7rem;
|
| 264 |
+
font-weight: 600;
|
| 265 |
+
color: var(--color-mint);
|
| 266 |
+
letter-spacing: 0.06em;
|
| 267 |
+
text-transform: uppercase;
|
| 268 |
+
}
|
| 269 |
+
|
| 270 |
+
.hero__tagline {
|
| 271 |
+
font-size: 1.1rem;
|
| 272 |
+
color: var(--color-text-secondary);
|
| 273 |
+
max-width: 520px;
|
| 274 |
+
margin: 0 auto;
|
| 275 |
+
font-weight: 500;
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
/* ============================================
|
| 279 |
+
VOICE SECTION
|
| 280 |
+
============================================ */
|
| 281 |
+
.voice-section {
|
| 282 |
+
display: flex;
|
| 283 |
+
align-items: center;
|
| 284 |
+
gap: var(--space-4);
|
| 285 |
+
margin-bottom: var(--space-4);
|
| 286 |
+
flex-wrap: wrap;
|
| 287 |
+
}
|
| 288 |
+
|
| 289 |
+
.voice-selector {
|
| 290 |
+
display: flex;
|
| 291 |
+
align-items: center;
|
| 292 |
+
gap: var(--space-3);
|
| 293 |
+
}
|
| 294 |
+
|
| 295 |
+
.voice-selector__label {
|
| 296 |
+
font-size: 0.9rem;
|
| 297 |
+
font-weight: 600;
|
| 298 |
+
color: var(--color-text-secondary);
|
| 299 |
+
}
|
| 300 |
+
|
| 301 |
+
.voice-selector__dropdown {
|
| 302 |
+
padding: var(--space-2) var(--space-4);
|
| 303 |
+
padding-right: var(--space-8);
|
| 304 |
+
background: var(--color-surface);
|
| 305 |
+
border: 1px solid var(--color-glass-border);
|
| 306 |
+
border-radius: var(--radius-md);
|
| 307 |
+
color: var(--color-text-primary);
|
| 308 |
+
font-family: var(--font-body);
|
| 309 |
+
font-size: 0.9rem;
|
| 310 |
+
font-weight: 500;
|
| 311 |
+
cursor: pointer;
|
| 312 |
+
appearance: none;
|
| 313 |
+
background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='24' height='24' viewBox='0 0 24 24' fill='none' stroke='%2398c9b8' stroke-width='2' stroke-linecap='round' stroke-linejoin='round'%3E%3Cpolyline points='6 9 12 15 18 9'%3E%3C/polyline%3E%3C/svg%3E");
|
| 314 |
+
background-repeat: no-repeat;
|
| 315 |
+
background-position: right var(--space-2) center;
|
| 316 |
+
background-size: 18px;
|
| 317 |
+
transition: all var(--duration-fast) var(--ease-out);
|
| 318 |
+
}
|
| 319 |
+
|
| 320 |
+
.voice-selector__dropdown:hover {
|
| 321 |
+
border-color: rgba(62, 180, 137, 0.4);
|
| 322 |
+
background-color: var(--color-surface-elevated);
|
| 323 |
+
}
|
| 324 |
+
|
| 325 |
+
.voice-selector__dropdown:focus {
|
| 326 |
+
outline: none;
|
| 327 |
+
border-color: var(--color-mint);
|
| 328 |
+
box-shadow: 0 0 0 3px rgba(0, 212, 170, 0.2);
|
| 329 |
+
}
|
| 330 |
+
|
| 331 |
+
.voice-selector__dropdown option {
|
| 332 |
+
background: var(--color-deep);
|
| 333 |
+
color: var(--color-text-primary);
|
| 334 |
+
}
|
| 335 |
+
|
| 336 |
+
.voice-upload {
|
| 337 |
+
display: flex;
|
| 338 |
+
align-items: center;
|
| 339 |
+
gap: var(--space-3);
|
| 340 |
+
}
|
| 341 |
+
|
| 342 |
+
.voice-upload-status {
|
| 343 |
+
font-size: 0.8rem;
|
| 344 |
+
color: var(--color-text-muted);
|
| 345 |
+
font-weight: 500;
|
| 346 |
+
}
|
| 347 |
+
|
| 348 |
+
.voice-upload-status.success {
|
| 349 |
+
color: var(--color-success);
|
| 350 |
+
}
|
| 351 |
+
|
| 352 |
+
.voice-upload-status.error {
|
| 353 |
+
color: var(--color-error);
|
| 354 |
+
}
|
| 355 |
+
|
| 356 |
+
/* ============================================
|
| 357 |
+
INPUT SECTION
|
| 358 |
+
============================================ */
|
| 359 |
+
.input-section {
|
| 360 |
+
margin-bottom: var(--space-8);
|
| 361 |
+
}
|
| 362 |
+
|
| 363 |
+
.textarea-wrap {
|
| 364 |
+
position: relative;
|
| 365 |
+
margin-bottom: var(--space-4);
|
| 366 |
+
}
|
| 367 |
+
|
| 368 |
+
textarea {
|
| 369 |
+
width: 100%;
|
| 370 |
+
min-height: 130px;
|
| 371 |
+
padding: var(--space-4);
|
| 372 |
+
padding-bottom: var(--space-8);
|
| 373 |
+
background: var(--color-surface);
|
| 374 |
+
border: 2px solid var(--color-glass-border);
|
| 375 |
+
border-radius: var(--radius-lg);
|
| 376 |
+
color: var(--color-text-primary);
|
| 377 |
+
font-family: var(--font-body);
|
| 378 |
+
font-size: 1rem;
|
| 379 |
+
font-weight: 500;
|
| 380 |
+
line-height: 1.7;
|
| 381 |
+
resize: vertical;
|
| 382 |
+
transition: all var(--duration-base) var(--ease-out);
|
| 383 |
+
}
|
| 384 |
+
|
| 385 |
+
textarea::placeholder {
|
| 386 |
+
color: var(--color-text-muted);
|
| 387 |
+
}
|
| 388 |
+
|
| 389 |
+
textarea:focus {
|
| 390 |
+
outline: none;
|
| 391 |
+
border-color: var(--color-primary);
|
| 392 |
+
box-shadow: 0 0 0 4px rgba(62, 180, 137, 0.15), var(--shadow-glow);
|
| 393 |
+
}
|
| 394 |
+
|
| 395 |
+
.textarea-meta {
|
| 396 |
+
position: absolute;
|
| 397 |
+
bottom: var(--space-3);
|
| 398 |
+
right: var(--space-4);
|
| 399 |
+
display: flex;
|
| 400 |
+
align-items: center;
|
| 401 |
+
gap: var(--space-4);
|
| 402 |
+
}
|
| 403 |
+
|
| 404 |
+
.char-count {
|
| 405 |
+
font-family: var(--font-mono);
|
| 406 |
+
font-size: 0.75rem;
|
| 407 |
+
color: var(--color-text-muted);
|
| 408 |
+
}
|
| 409 |
+
|
| 410 |
+
/* Sample Texts */
|
| 411 |
+
.sample-texts {
|
| 412 |
+
display: flex;
|
| 413 |
+
align-items: center;
|
| 414 |
+
flex-wrap: wrap;
|
| 415 |
+
gap: var(--space-2);
|
| 416 |
+
margin-bottom: var(--space-4);
|
| 417 |
+
}
|
| 418 |
+
|
| 419 |
+
.sample-texts__label {
|
| 420 |
+
font-size: 0.85rem;
|
| 421 |
+
color: var(--color-text-muted);
|
| 422 |
+
font-weight: 600;
|
| 423 |
+
}
|
| 424 |
+
|
| 425 |
+
.sample-btn {
|
| 426 |
+
padding: var(--space-1) var(--space-3);
|
| 427 |
+
background: var(--color-surface);
|
| 428 |
+
border: 1px solid var(--color-glass-border);
|
| 429 |
+
border-radius: var(--radius-full);
|
| 430 |
+
color: var(--color-text-secondary);
|
| 431 |
+
font-family: var(--font-body);
|
| 432 |
+
font-size: 0.8rem;
|
| 433 |
+
font-weight: 600;
|
| 434 |
+
cursor: pointer;
|
| 435 |
+
transition: all var(--duration-fast) var(--ease-out);
|
| 436 |
+
}
|
| 437 |
+
|
| 438 |
+
.sample-btn:hover {
|
| 439 |
+
background: var(--color-surface-elevated);
|
| 440 |
+
color: var(--color-aquamarine);
|
| 441 |
+
border-color: rgba(62, 180, 137, 0.3);
|
| 442 |
+
transform: translateY(-1px);
|
| 443 |
+
}
|
| 444 |
+
|
| 445 |
+
.sample-btn:active {
|
| 446 |
+
transform: scale(0.97) translateY(0);
|
| 447 |
+
}
|
| 448 |
+
|
| 449 |
+
/* Controls */
|
| 450 |
+
.controls {
|
| 451 |
+
display: flex;
|
| 452 |
+
gap: var(--space-3);
|
| 453 |
+
}
|
| 454 |
+
|
| 455 |
+
.btn {
|
| 456 |
+
display: inline-flex;
|
| 457 |
+
align-items: center;
|
| 458 |
+
justify-content: center;
|
| 459 |
+
gap: var(--space-2);
|
| 460 |
+
padding: var(--space-4) var(--space-6);
|
| 461 |
+
border-radius: var(--radius-md);
|
| 462 |
+
font-family: var(--font-body);
|
| 463 |
+
font-size: 1rem;
|
| 464 |
+
font-weight: 700;
|
| 465 |
+
cursor: pointer;
|
| 466 |
+
border: none;
|
| 467 |
+
transition: all var(--duration-base) var(--ease-out);
|
| 468 |
+
}
|
| 469 |
+
|
| 470 |
+
.btn__icon {
|
| 471 |
+
width: 18px;
|
| 472 |
+
height: 18px;
|
| 473 |
+
}
|
| 474 |
+
|
| 475 |
+
.btn--primary {
|
| 476 |
+
flex: 1;
|
| 477 |
+
position: relative;
|
| 478 |
+
background: var(--gradient-primary);
|
| 479 |
+
color: var(--color-void);
|
| 480 |
+
overflow: hidden;
|
| 481 |
+
}
|
| 482 |
+
|
| 483 |
+
.btn--primary::before {
|
| 484 |
+
content: '';
|
| 485 |
+
position: absolute;
|
| 486 |
+
inset: 0;
|
| 487 |
+
background: linear-gradient(135deg, rgba(255,255,255,0.25) 0%, transparent 50%);
|
| 488 |
+
opacity: 0;
|
| 489 |
+
transition: opacity var(--duration-fast);
|
| 490 |
+
}
|
| 491 |
+
|
| 492 |
+
.btn--primary:hover:not(:disabled)::before {
|
| 493 |
+
opacity: 1;
|
| 494 |
+
}
|
| 495 |
+
|
| 496 |
+
.btn--primary:hover:not(:disabled) {
|
| 497 |
+
transform: translateY(-3px);
|
| 498 |
+
box-shadow: var(--shadow-glow-strong);
|
| 499 |
+
}
|
| 500 |
+
|
| 501 |
+
.btn--primary:active:not(:disabled) {
|
| 502 |
+
transform: translateY(-1px);
|
| 503 |
+
}
|
| 504 |
+
|
| 505 |
+
.btn--primary.btn--generating {
|
| 506 |
+
animation: pulse-glow 2s ease-in-out infinite;
|
| 507 |
+
}
|
| 508 |
+
|
| 509 |
+
@keyframes pulse-glow {
|
| 510 |
+
0%, 100% {
|
| 511 |
+
box-shadow: 0 0 0 0 rgba(62, 180, 137, 0.5);
|
| 512 |
+
}
|
| 513 |
+
50% {
|
| 514 |
+
box-shadow: 0 0 0 20px rgba(62, 180, 137, 0);
|
| 515 |
+
}
|
| 516 |
+
}
|
| 517 |
+
|
| 518 |
+
.btn--secondary {
|
| 519 |
+
background: var(--color-surface);
|
| 520 |
+
color: var(--color-text-primary);
|
| 521 |
+
border: 2px solid var(--color-glass-border);
|
| 522 |
+
}
|
| 523 |
+
|
| 524 |
+
.btn--secondary:hover:not(:disabled) {
|
| 525 |
+
background: var(--color-surface-elevated);
|
| 526 |
+
border-color: rgba(62, 180, 137, 0.3);
|
| 527 |
+
}
|
| 528 |
+
|
| 529 |
+
.btn--outline {
|
| 530 |
+
background: transparent;
|
| 531 |
+
border: 2px solid var(--color-glass-border);
|
| 532 |
+
color: var(--color-text-secondary);
|
| 533 |
+
}
|
| 534 |
+
|
| 535 |
+
.btn--outline:hover:not(:disabled) {
|
| 536 |
+
background: var(--color-surface);
|
| 537 |
+
border-color: rgba(62, 180, 137, 0.4);
|
| 538 |
+
color: var(--color-aquamarine);
|
| 539 |
+
}
|
| 540 |
+
|
| 541 |
+
.btn--small {
|
| 542 |
+
padding: var(--space-2) var(--space-4);
|
| 543 |
+
font-size: 0.85rem;
|
| 544 |
+
}
|
| 545 |
+
|
| 546 |
+
.btn--small .btn__icon {
|
| 547 |
+
width: 16px;
|
| 548 |
+
height: 16px;
|
| 549 |
+
}
|
| 550 |
+
|
| 551 |
+
.btn:disabled {
|
| 552 |
+
opacity: 0.4;
|
| 553 |
+
cursor: not-allowed;
|
| 554 |
+
}
|
| 555 |
+
|
| 556 |
+
.btn__loader {
|
| 557 |
+
width: 18px;
|
| 558 |
+
height: 18px;
|
| 559 |
+
border: 2px solid rgba(10, 22, 18, 0.3);
|
| 560 |
+
border-top-color: var(--color-void);
|
| 561 |
+
border-radius: 50%;
|
| 562 |
+
animation: spin 0.8s linear infinite;
|
| 563 |
+
display: none;
|
| 564 |
+
}
|
| 565 |
+
|
| 566 |
+
@keyframes spin {
|
| 567 |
+
to { transform: rotate(360deg); }
|
| 568 |
+
}
|
| 569 |
+
|
| 570 |
+
/* ============================================
|
| 571 |
+
OUTPUT SECTION
|
| 572 |
+
============================================ */
|
| 573 |
+
.output-section {
|
| 574 |
+
display: grid;
|
| 575 |
+
grid-template-columns: 1fr 280px;
|
| 576 |
+
gap: var(--space-6);
|
| 577 |
+
margin-bottom: var(--space-8);
|
| 578 |
+
}
|
| 579 |
+
|
| 580 |
+
/* Visualizer Panel */
|
| 581 |
+
.visualizer-panel {
|
| 582 |
+
background: var(--color-surface);
|
| 583 |
+
border: 2px solid var(--color-glass-border);
|
| 584 |
+
border-radius: var(--radius-xl);
|
| 585 |
+
overflow: hidden;
|
| 586 |
+
}
|
| 587 |
+
|
| 588 |
+
.visualizer-panel__header {
|
| 589 |
+
display: flex;
|
| 590 |
+
align-items: center;
|
| 591 |
+
justify-content: space-between;
|
| 592 |
+
padding: var(--space-4) var(--space-5);
|
| 593 |
+
border-bottom: 1px solid var(--color-glass-border);
|
| 594 |
+
}
|
| 595 |
+
|
| 596 |
+
.visualizer-panel__title {
|
| 597 |
+
font-family: var(--font-display);
|
| 598 |
+
font-size: 0.9rem;
|
| 599 |
+
font-weight: 700;
|
| 600 |
+
color: var(--color-text-secondary);
|
| 601 |
+
text-transform: uppercase;
|
| 602 |
+
letter-spacing: 0.06em;
|
| 603 |
+
}
|
| 604 |
+
|
| 605 |
+
.status-indicator {
|
| 606 |
+
display: flex;
|
| 607 |
+
align-items: center;
|
| 608 |
+
gap: var(--space-2);
|
| 609 |
+
}
|
| 610 |
+
|
| 611 |
+
.status-dot {
|
| 612 |
+
width: 10px;
|
| 613 |
+
height: 10px;
|
| 614 |
+
border-radius: 50%;
|
| 615 |
+
background: var(--color-text-muted);
|
| 616 |
+
}
|
| 617 |
+
|
| 618 |
+
.status-indicator--idle .status-dot,
|
| 619 |
+
.status-idle .status-dot {
|
| 620 |
+
background: var(--color-text-muted);
|
| 621 |
+
}
|
| 622 |
+
|
| 623 |
+
.status-indicator--running .status-dot,
|
| 624 |
+
.status-running .status-dot {
|
| 625 |
+
background: var(--color-mint);
|
| 626 |
+
box-shadow: 0 0 12px rgba(0, 212, 170, 0.6);
|
| 627 |
+
animation: dot-pulse 1.5s ease-in-out infinite;
|
| 628 |
+
}
|
| 629 |
+
|
| 630 |
+
.status-indicator--error .status-dot,
|
| 631 |
+
.status-error .status-dot {
|
| 632 |
+
background: var(--color-error);
|
| 633 |
+
}
|
| 634 |
+
|
| 635 |
+
@keyframes dot-pulse {
|
| 636 |
+
0%, 100% { opacity: 1; transform: scale(1); }
|
| 637 |
+
50% { opacity: 0.5; transform: scale(0.85); }
|
| 638 |
+
}
|
| 639 |
+
|
| 640 |
+
.status-text {
|
| 641 |
+
font-size: 0.85rem;
|
| 642 |
+
font-weight: 600;
|
| 643 |
+
color: var(--color-text-secondary);
|
| 644 |
+
}
|
| 645 |
+
|
| 646 |
+
.visualizer-container {
|
| 647 |
+
position: relative;
|
| 648 |
+
height: 160px;
|
| 649 |
+
background: linear-gradient(180deg, transparent 0%, rgba(62, 180, 137, 0.05) 100%);
|
| 650 |
+
}
|
| 651 |
+
|
| 652 |
+
.visualizer-container canvas {
|
| 653 |
+
position: absolute;
|
| 654 |
+
inset: 0;
|
| 655 |
+
width: 100%;
|
| 656 |
+
height: 100%;
|
| 657 |
+
}
|
| 658 |
+
|
| 659 |
+
.visualizer-bars {
|
| 660 |
+
opacity: 0.8;
|
| 661 |
+
mix-blend-mode: screen;
|
| 662 |
+
}
|
| 663 |
+
|
| 664 |
+
/* Metrics Panel */
|
| 665 |
+
.metrics-panel {
|
| 666 |
+
display: flex;
|
| 667 |
+
flex-direction: column;
|
| 668 |
+
gap: var(--space-4);
|
| 669 |
+
}
|
| 670 |
+
|
| 671 |
+
.metrics-panel__title {
|
| 672 |
+
font-family: var(--font-display);
|
| 673 |
+
font-size: 0.8rem;
|
| 674 |
+
font-weight: 700;
|
| 675 |
+
color: var(--color-text-muted);
|
| 676 |
+
text-transform: uppercase;
|
| 677 |
+
letter-spacing: 0.1em;
|
| 678 |
+
}
|
| 679 |
+
|
| 680 |
+
.metric {
|
| 681 |
+
padding: var(--space-4);
|
| 682 |
+
background: var(--color-surface);
|
| 683 |
+
border: 2px solid var(--color-glass-border);
|
| 684 |
+
border-radius: var(--radius-md);
|
| 685 |
+
}
|
| 686 |
+
|
| 687 |
+
.metric--highlight {
|
| 688 |
+
background: var(--gradient-soft);
|
| 689 |
+
border-color: rgba(62, 180, 137, 0.25);
|
| 690 |
+
}
|
| 691 |
+
|
| 692 |
+
.metric__header {
|
| 693 |
+
display: flex;
|
| 694 |
+
align-items: center;
|
| 695 |
+
justify-content: space-between;
|
| 696 |
+
margin-bottom: var(--space-2);
|
| 697 |
+
}
|
| 698 |
+
|
| 699 |
+
.metric__label {
|
| 700 |
+
font-size: 0.8rem;
|
| 701 |
+
font-weight: 600;
|
| 702 |
+
color: var(--color-text-secondary);
|
| 703 |
+
}
|
| 704 |
+
|
| 705 |
+
.metric__info {
|
| 706 |
+
width: 16px;
|
| 707 |
+
height: 16px;
|
| 708 |
+
padding: 0;
|
| 709 |
+
background: none;
|
| 710 |
+
border: none;
|
| 711 |
+
color: var(--color-text-muted);
|
| 712 |
+
cursor: help;
|
| 713 |
+
position: relative;
|
| 714 |
+
}
|
| 715 |
+
|
| 716 |
+
.metric__info svg {
|
| 717 |
+
width: 100%;
|
| 718 |
+
height: 100%;
|
| 719 |
+
}
|
| 720 |
+
|
| 721 |
+
.metric__info:hover {
|
| 722 |
+
color: var(--color-mint);
|
| 723 |
+
}
|
| 724 |
+
|
| 725 |
+
.metric__info::after {
|
| 726 |
+
content: attr(data-tooltip);
|
| 727 |
+
position: absolute;
|
| 728 |
+
bottom: calc(100% + 8px);
|
| 729 |
+
right: 0;
|
| 730 |
+
width: 200px;
|
| 731 |
+
padding: var(--space-2) var(--space-3);
|
| 732 |
+
background: var(--color-surface-elevated);
|
| 733 |
+
border: 1px solid var(--color-glass-border);
|
| 734 |
+
border-radius: var(--radius-sm);
|
| 735 |
+
font-size: 0.75rem;
|
| 736 |
+
font-weight: 500;
|
| 737 |
+
color: var(--color-text-secondary);
|
| 738 |
+
text-align: left;
|
| 739 |
+
line-height: 1.4;
|
| 740 |
+
opacity: 0;
|
| 741 |
+
visibility: hidden;
|
| 742 |
+
transform: translateY(4px);
|
| 743 |
+
transition: all var(--duration-fast) var(--ease-out);
|
| 744 |
+
z-index: 10;
|
| 745 |
+
pointer-events: none;
|
| 746 |
+
}
|
| 747 |
+
|
| 748 |
+
.metric__info:hover::after {
|
| 749 |
+
opacity: 1;
|
| 750 |
+
visibility: visible;
|
| 751 |
+
transform: translateY(0);
|
| 752 |
+
}
|
| 753 |
+
|
| 754 |
+
.metric__value {
|
| 755 |
+
display: flex;
|
| 756 |
+
align-items: baseline;
|
| 757 |
+
gap: var(--space-1);
|
| 758 |
+
}
|
| 759 |
+
|
| 760 |
+
.metric__number {
|
| 761 |
+
font-family: var(--font-mono);
|
| 762 |
+
font-size: 1.75rem;
|
| 763 |
+
font-weight: 700;
|
| 764 |
+
background: var(--gradient-primary);
|
| 765 |
+
-webkit-background-clip: text;
|
| 766 |
+
-webkit-text-fill-color: transparent;
|
| 767 |
+
background-clip: text;
|
| 768 |
+
}
|
| 769 |
+
|
| 770 |
+
.metric__number--large {
|
| 771 |
+
font-size: 2.25rem;
|
| 772 |
+
}
|
| 773 |
+
|
| 774 |
+
.metric__unit {
|
| 775 |
+
font-family: var(--font-mono);
|
| 776 |
+
font-size: 0.9rem;
|
| 777 |
+
font-weight: 600;
|
| 778 |
+
color: var(--color-text-muted);
|
| 779 |
+
}
|
| 780 |
+
|
| 781 |
+
.metric__bar {
|
| 782 |
+
height: 5px;
|
| 783 |
+
background: rgba(62, 180, 137, 0.15);
|
| 784 |
+
border-radius: var(--radius-full);
|
| 785 |
+
margin-top: var(--space-3);
|
| 786 |
+
overflow: hidden;
|
| 787 |
+
}
|
| 788 |
+
|
| 789 |
+
.metric__bar-fill {
|
| 790 |
+
height: 100%;
|
| 791 |
+
background: var(--gradient-primary);
|
| 792 |
+
border-radius: var(--radius-full);
|
| 793 |
+
transition: width var(--duration-slow) var(--ease-out);
|
| 794 |
+
width: 0%;
|
| 795 |
+
}
|
| 796 |
+
|
| 797 |
+
.metric__context {
|
| 798 |
+
font-size: 0.75rem;
|
| 799 |
+
font-weight: 600;
|
| 800 |
+
color: var(--color-text-muted);
|
| 801 |
+
margin-top: var(--space-2);
|
| 802 |
+
}
|
| 803 |
+
|
| 804 |
+
.metric__note {
|
| 805 |
+
font-size: 0.7rem;
|
| 806 |
+
color: var(--color-warning);
|
| 807 |
+
margin-top: var(--space-1);
|
| 808 |
+
font-style: italic;
|
| 809 |
+
font-weight: 600;
|
| 810 |
+
}
|
| 811 |
+
|
| 812 |
+
.metric--status {
|
| 813 |
+
display: flex;
|
| 814 |
+
align-items: center;
|
| 815 |
+
justify-content: space-between;
|
| 816 |
+
}
|
| 817 |
+
|
| 818 |
+
.model-status {
|
| 819 |
+
display: flex;
|
| 820 |
+
align-items: center;
|
| 821 |
+
gap: var(--space-2);
|
| 822 |
+
}
|
| 823 |
+
|
| 824 |
+
.model-status__dot {
|
| 825 |
+
width: 10px;
|
| 826 |
+
height: 10px;
|
| 827 |
+
border-radius: 50%;
|
| 828 |
+
background: var(--color-text-muted);
|
| 829 |
+
}
|
| 830 |
+
|
| 831 |
+
.status-loading .model-status__dot {
|
| 832 |
+
background: var(--color-warning);
|
| 833 |
+
animation: dot-pulse 1s ease-in-out infinite;
|
| 834 |
+
}
|
| 835 |
+
|
| 836 |
+
.status-ready .model-status__dot {
|
| 837 |
+
background: var(--color-success);
|
| 838 |
+
box-shadow: 0 0 10px rgba(0, 212, 170, 0.5);
|
| 839 |
+
}
|
| 840 |
+
|
| 841 |
+
.model-status__text {
|
| 842 |
+
font-size: 0.85rem;
|
| 843 |
+
font-weight: 600;
|
| 844 |
+
color: var(--color-text-secondary);
|
| 845 |
+
}
|
| 846 |
+
|
| 847 |
+
/* ============================================
|
| 848 |
+
FOOTER
|
| 849 |
+
============================================ */
|
| 850 |
+
.footer {
|
| 851 |
+
margin-top: auto;
|
| 852 |
+
padding-top: var(--space-8);
|
| 853 |
+
text-align: center;
|
| 854 |
+
}
|
| 855 |
+
|
| 856 |
+
.footer p {
|
| 857 |
+
font-size: 0.8rem;
|
| 858 |
+
font-weight: 500;
|
| 859 |
+
color: var(--color-text-muted);
|
| 860 |
+
}
|
| 861 |
+
|
| 862 |
+
.footer a {
|
| 863 |
+
color: var(--color-mint);
|
| 864 |
+
text-decoration: none;
|
| 865 |
+
font-weight: 600;
|
| 866 |
+
transition: color var(--duration-fast);
|
| 867 |
+
}
|
| 868 |
+
|
| 869 |
+
.footer a:hover {
|
| 870 |
+
color: var(--color-aquamarine);
|
| 871 |
+
text-decoration: underline;
|
| 872 |
+
}
|
| 873 |
+
|
| 874 |
+
.footer__disclaimer {
|
| 875 |
+
font-size: 0.7rem;
|
| 876 |
+
margin-top: var(--space-2);
|
| 877 |
+
opacity: 0.7;
|
| 878 |
+
}
|
| 879 |
+
|
| 880 |
+
/* ============================================
|
| 881 |
+
RESPONSIVE
|
| 882 |
+
============================================ */
|
| 883 |
+
@media (max-width: 768px) {
|
| 884 |
+
.app-shell {
|
| 885 |
+
padding: var(--space-6) var(--space-4);
|
| 886 |
+
}
|
| 887 |
+
|
| 888 |
+
.logo__text {
|
| 889 |
+
font-size: 2rem;
|
| 890 |
+
}
|
| 891 |
+
|
| 892 |
+
.voice-section {
|
| 893 |
+
flex-direction: column;
|
| 894 |
+
align-items: flex-start;
|
| 895 |
+
}
|
| 896 |
+
|
| 897 |
+
.voice-selector {
|
| 898 |
+
width: 100%;
|
| 899 |
+
}
|
| 900 |
+
|
| 901 |
+
.voice-selector__dropdown {
|
| 902 |
+
flex: 1;
|
| 903 |
+
}
|
| 904 |
+
|
| 905 |
+
.output-section {
|
| 906 |
+
grid-template-columns: 1fr;
|
| 907 |
+
}
|
| 908 |
+
|
| 909 |
+
.metrics-panel {
|
| 910 |
+
display: grid;
|
| 911 |
+
grid-template-columns: repeat(2, 1fr);
|
| 912 |
+
}
|
| 913 |
+
|
| 914 |
+
.metric:last-child {
|
| 915 |
+
grid-column: span 2;
|
| 916 |
+
}
|
| 917 |
+
}
|
| 918 |
+
|
| 919 |
+
@media (max-width: 480px) {
|
| 920 |
+
.hero__brand {
|
| 921 |
+
flex-direction: column;
|
| 922 |
+
gap: var(--space-3);
|
| 923 |
+
}
|
| 924 |
+
|
| 925 |
+
.controls {
|
| 926 |
+
flex-direction: column;
|
| 927 |
+
}
|
| 928 |
+
|
| 929 |
+
.btn--primary {
|
| 930 |
+
width: 100%;
|
| 931 |
+
}
|
| 932 |
+
|
| 933 |
+
.sample-texts {
|
| 934 |
+
flex-direction: column;
|
| 935 |
+
align-items: flex-start;
|
| 936 |
+
}
|
| 937 |
+
|
| 938 |
+
.sample-btn {
|
| 939 |
+
width: 100%;
|
| 940 |
+
text-align: center;
|
| 941 |
+
}
|
| 942 |
+
|
| 943 |
+
.metrics-panel {
|
| 944 |
+
grid-template-columns: 1fr;
|
| 945 |
+
}
|
| 946 |
+
|
| 947 |
+
.metric:last-child {
|
| 948 |
+
grid-column: span 1;
|
| 949 |
+
}
|
| 950 |
+
|
| 951 |
+
.metric__number {
|
| 952 |
+
font-size: 1.5rem;
|
| 953 |
+
}
|
| 954 |
+
|
| 955 |
+
.metric__number--large {
|
| 956 |
+
font-size: 1.75rem;
|
| 957 |
+
}
|
| 958 |
}
|
tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d461765ae179566678c93091c5fa6f2984c31bbe990bf1aa62d92c64d91bc3f6
|
| 3 |
+
size 59339
|
voices.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a499039e88043ff86cb39705f487cc761406d55f1e73ad19a3c055b16e86b062
|
| 3 |
+
size 1564796
|