support >5h audio exports
Browse files- src/tts.ts +32 -10
src/tts.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
-
import { pipeline,
|
| 2 |
import { split } from "./splitter";
|
|
|
|
| 3 |
|
| 4 |
const MODEL_ID = "onnx-community/Supertonic-TTS-ONNX";
|
| 5 |
const VOICES_URL = `https://huggingface.co/${MODEL_ID}/resolve/main/voices/`;
|
|
@@ -119,14 +120,35 @@ export async function* streamTTS(
|
|
| 119 |
|
| 120 |
export function createAudioBlob(chunks: Float32Array[], sampling_rate: number): Blob {
|
| 121 |
const totalLength = chunks.reduce((acc, chunk) => acc + chunk.length, 0);
|
| 122 |
-
const result = new Float32Array(totalLength);
|
| 123 |
-
let offset = 0;
|
| 124 |
-
for (const chunk of chunks) {
|
| 125 |
-
result.set(chunk, offset);
|
| 126 |
-
offset += chunk.length;
|
| 127 |
-
}
|
| 128 |
|
| 129 |
-
|
| 130 |
-
const
|
| 131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
}
|
|
|
|
| 1 |
+
import { pipeline, TextToAudioPipeline } from "@huggingface/transformers";
|
| 2 |
import { split } from "./splitter";
|
| 3 |
+
import type { RawAudio } from "@huggingface/transformers";
|
| 4 |
|
| 5 |
const MODEL_ID = "onnx-community/Supertonic-TTS-ONNX";
|
| 6 |
const VOICES_URL = `https://huggingface.co/${MODEL_ID}/resolve/main/voices/`;
|
|
|
|
| 120 |
|
| 121 |
export function createAudioBlob(chunks: Float32Array[], sampling_rate: number): Blob {
|
| 122 |
const totalLength = chunks.reduce((acc, chunk) => acc + chunk.length, 0);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
+
// Create WAV header
|
| 125 |
+
const buffer = new ArrayBuffer(44);
|
| 126 |
+
const view = new DataView(buffer);
|
| 127 |
+
|
| 128 |
+
// RIFF chunk descriptor
|
| 129 |
+
writeString(view, 0, "RIFF");
|
| 130 |
+
view.setUint32(4, 36 + totalLength * 4, true); // ChunkSize
|
| 131 |
+
writeString(view, 8, "WAVE");
|
| 132 |
+
|
| 133 |
+
// fmt sub-chunk
|
| 134 |
+
writeString(view, 12, "fmt ");
|
| 135 |
+
view.setUint32(16, 16, true); // Subchunk1Size
|
| 136 |
+
view.setUint16(20, 3, true); // AudioFormat (3 = IEEE Float)
|
| 137 |
+
view.setUint16(22, 1, true); // NumChannels (Mono)
|
| 138 |
+
view.setUint32(24, sampling_rate, true); // SampleRate
|
| 139 |
+
view.setUint32(28, sampling_rate * 4, true); // ByteRate
|
| 140 |
+
view.setUint16(32, 4, true); // BlockAlign
|
| 141 |
+
view.setUint16(34, 32, true); // BitsPerSample
|
| 142 |
+
|
| 143 |
+
// data sub-chunk
|
| 144 |
+
writeString(view, 36, "data");
|
| 145 |
+
view.setUint32(40, totalLength * 4, true); // Subchunk2Size
|
| 146 |
+
|
| 147 |
+
return new Blob([buffer, ...chunks as any], { type: "audio/wav" });
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
function writeString(view: DataView, offset: number, string: string) {
|
| 151 |
+
for (let i = 0; i < string.length; i++) {
|
| 152 |
+
view.setUint8(offset + i, string.charCodeAt(i));
|
| 153 |
+
}
|
| 154 |
}
|