Xenova HF Staff commited on
Commit
a0d4ab9
·
verified ·
1 Parent(s): fd160f2

support >5h audio exports

Browse files
Files changed (1) hide show
  1. src/tts.ts +32 -10
src/tts.ts CHANGED
@@ -1,5 +1,6 @@
1
- import { pipeline, RawAudio, TextToAudioPipeline } from "@huggingface/transformers";
2
  import { split } from "./splitter";
 
3
 
4
  const MODEL_ID = "onnx-community/Supertonic-TTS-ONNX";
5
  const VOICES_URL = `https://huggingface.co/${MODEL_ID}/resolve/main/voices/`;
@@ -119,14 +120,35 @@ export async function* streamTTS(
119
 
120
  export function createAudioBlob(chunks: Float32Array[], sampling_rate: number): Blob {
121
  const totalLength = chunks.reduce((acc, chunk) => acc + chunk.length, 0);
122
- const result = new Float32Array(totalLength);
123
- let offset = 0;
124
- for (const chunk of chunks) {
125
- result.set(chunk, offset);
126
- offset += chunk.length;
127
- }
128
 
129
- const audio = new RawAudio(result, sampling_rate);
130
- const blob = audio.toBlob();
131
- return blob;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  }
 
1
+ import { pipeline, TextToAudioPipeline } from "@huggingface/transformers";
2
  import { split } from "./splitter";
3
+ import type { RawAudio } from "@huggingface/transformers";
4
 
5
  const MODEL_ID = "onnx-community/Supertonic-TTS-ONNX";
6
  const VOICES_URL = `https://huggingface.co/${MODEL_ID}/resolve/main/voices/`;
 
120
 
121
  export function createAudioBlob(chunks: Float32Array[], sampling_rate: number): Blob {
122
  const totalLength = chunks.reduce((acc, chunk) => acc + chunk.length, 0);
 
 
 
 
 
 
123
 
124
+ // Create WAV header
125
+ const buffer = new ArrayBuffer(44);
126
+ const view = new DataView(buffer);
127
+
128
+ // RIFF chunk descriptor
129
+ writeString(view, 0, "RIFF");
130
+ view.setUint32(4, 36 + totalLength * 4, true); // ChunkSize
131
+ writeString(view, 8, "WAVE");
132
+
133
+ // fmt sub-chunk
134
+ writeString(view, 12, "fmt ");
135
+ view.setUint32(16, 16, true); // Subchunk1Size
136
+ view.setUint16(20, 3, true); // AudioFormat (3 = IEEE Float)
137
+ view.setUint16(22, 1, true); // NumChannels (Mono)
138
+ view.setUint32(24, sampling_rate, true); // SampleRate
139
+ view.setUint32(28, sampling_rate * 4, true); // ByteRate
140
+ view.setUint16(32, 4, true); // BlockAlign
141
+ view.setUint16(34, 32, true); // BitsPerSample
142
+
143
+ // data sub-chunk
144
+ writeString(view, 36, "data");
145
+ view.setUint32(40, totalLength * 4, true); // Subchunk2Size
146
+
147
+ return new Blob([buffer, ...chunks as any], { type: "audio/wav" });
148
+ }
149
+
150
+ function writeString(view: DataView, offset: number, string: string) {
151
+ for (let i = 0; i < string.length; i++) {
152
+ view.setUint8(offset + i, string.charCodeAt(i));
153
+ }
154
  }