akhaliq HF Staff commited on
Commit
88e9fcd
·
verified ·
1 Parent(s): 6cc27a6

Upload index.js with huggingface_hub

Browse files
Files changed (1) hide show
  1. index.js +225 -62
index.js CHANGED
@@ -1,76 +1,239 @@
1
- import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.6';
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- // Reference the elements that we will need
4
- const status = document.getElementById('status');
5
- const fileUpload = document.getElementById('upload');
6
- const imageContainer = document.getElementById('container');
7
- const example = document.getElementById('example');
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- const EXAMPLE_URL = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/city-streets.jpg';
 
 
 
 
10
 
11
- // Create a new object detection pipeline
12
- status.textContent = 'Loading model...';
13
- const detector = await pipeline('object-detection', 'Xenova/detr-resnet-50');
14
- status.textContent = 'Ready';
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- example.addEventListener('click', (e) => {
17
- e.preventDefault();
18
- detect(EXAMPLE_URL);
19
- });
 
 
 
 
 
 
20
 
21
- fileUpload.addEventListener('change', function (e) {
22
- const file = e.target.files[0];
23
- if (!file) {
24
- return;
 
 
 
 
 
 
 
 
 
 
25
  }
26
 
27
- const reader = new FileReader();
 
 
 
 
 
 
 
 
 
28
 
29
- // Set up a callback when the file is loaded
30
- reader.onload = e2 => detect(e2.target.result);
 
 
 
 
 
 
31
 
32
- reader.readAsDataURL(file);
33
- });
 
 
 
 
 
34
 
 
 
 
 
 
 
 
 
 
 
35
 
36
- // Detect objects in the image
37
- async function detect(img) {
38
- imageContainer.innerHTML = '';
39
- imageContainer.style.backgroundImage = `url(${img})`;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- status.textContent = 'Analysing...';
42
- const output = await detector(img, {
43
- threshold: 0.5,
44
- percentage: true,
45
- });
46
- status.textContent = '';
47
- output.forEach(renderBox);
48
  }
49
 
50
- // Render a bounding box and label on the image
51
- function renderBox({ box, label }) {
52
- const { xmax, xmin, ymax, ymin } = box;
53
-
54
- // Generate a random color for the box
55
- const color = '#' + Math.floor(Math.random() * 0xFFFFFF).toString(16).padStart(6, 0);
56
-
57
- // Draw the box
58
- const boxElement = document.createElement('div');
59
- boxElement.className = 'bounding-box';
60
- Object.assign(boxElement.style, {
61
- borderColor: color,
62
- left: 100 * xmin + '%',
63
- top: 100 * ymin + '%',
64
- width: 100 * (xmax - xmin) + '%',
65
- height: 100 * (ymax - ymin) + '%',
66
- })
67
-
68
- // Draw label
69
- const labelElement = document.createElement('span');
70
- labelElement.textContent = label;
71
- labelElement.className = 'bounding-box-label';
72
- labelElement.style.backgroundColor = color;
73
-
74
- boxElement.appendChild(labelElement);
75
- imageContainer.appendChild(boxElement);
76
- }
 
1
+ // index.js content here
2
+ class SupertonicTTS {
3
+ constructor() {
4
+ this.tts = null;
5
+ this.audioContext = null;
6
+ this.isGenerating = false;
7
+ this.init();
8
+ }
9
+
10
+ async init() {
11
+ this.bindEvents();
12
+ this.updateCharCount();
13
+ await this.checkWebGPU();
14
+ }
15
+
16
+ bindEvents() {
17
+ const textInput = document.getElementById('textInput');
18
+ const generateBtn = document.getElementById('generateBtn');
19
+ const deviceToggle = document.getElementById('deviceToggle');
20
+ const playBtn = document.getElementById('playBtn');
21
+ const downloadBtn = document.getElementById('downloadBtn');
22
+ const voiceSelect = document.getElementById('voiceSelect');
23
+
24
+ textInput.addEventListener('input', () => {
25
+ this.updateCharCount();
26
+ this.toggleGenerateBtn();
27
+ });
28
+
29
+ generateBtn.addEventListener('click', () => this.generateSpeech());
30
+ deviceToggle.addEventListener('change', () => this.updateDeviceMode());
31
+ playBtn.addEventListener('click', () => this.playAudio());
32
+ downloadBtn.addEventListener('click', () => this.downloadAudio());
33
+ }
34
 
35
+ updateCharCount() {
36
+ const textInput = document.getElementById('textInput');
37
+ const charCount = document.getElementById('charCount');
38
+ const length = textInput.value.length;
39
+ charCount.textContent = `${length}/500`;
40
+ charCount.className = length > 450 ? 'warning' : '';
41
+ }
42
+
43
+ toggleGenerateBtn() {
44
+ const textInput = document.getElementById('textInput');
45
+ const generateBtn = document.getElementById('generateBtn');
46
+ generateBtn.disabled = !textInput.value.trim();
47
+ }
48
+
49
+ async checkWebGPU() {
50
+ if (!navigator.gpu) {
51
+ document.getElementById('deviceToggle').disabled = true;
52
+ document.getElementById('deviceText').textContent = 'WebGPU not supported';
53
+ return;
54
+ }
55
+ }
56
 
57
+ updateDeviceMode() {
58
+ const deviceToggle = document.getElementById('deviceToggle');
59
+ const deviceText = document.getElementById('deviceText');
60
+ deviceText.textContent = deviceToggle.checked ? 'GPU Mode' : 'CPU Mode';
61
+ }
62
 
63
+ async generateSpeech() {
64
+ if (this.isGenerating) return;
65
+
66
+ const textInput = document.getElementById('textInput');
67
+ const generateBtn = document.getElementById('generateBtn');
68
+ const status = document.getElementById('status');
69
+ const audioSection = document.getElementById('audioSection');
70
+ const voiceSelect = document.getElementById('voiceSelect');
71
+
72
+ const text = textInput.value.trim();
73
+ const voice = voiceSelect.value;
74
+ const useGPU = document.getElementById('deviceToggle').checked;
75
+
76
+ if (!text) return;
77
+
78
+ this.isGenerating = true;
79
+ generateBtn.disabled = true;
80
+ generateBtn.querySelector('.spinner').style.display = 'inline-block';
81
+ generateBtn.querySelector('.btn-text').textContent = 'Generating...';
82
+ status.classList.remove('hidden', 'success', 'error');
83
+ status.textContent = 'Loading TTS model...';
84
+ status.classList.add('loading');
85
+
86
+ try {
87
+ // Use a reliable TTS model that works with transformers.js
88
+ const device = useGPU && navigator.gpu ? { device: 'webgpu' } : undefined;
89
+ this.tts = await window.pipeline('text-to-audio', 'onnx-community/mms-tts-eng', device);
90
+
91
+ status.textContent = 'Generating speech...';
92
+
93
+ // Map voice selection to speaker embeddings or parameters
94
+ const speaker = this.getSpeakerEmbedding(voice);
95
+
96
+ const output = await this.tts(text, {
97
+ speaker,
98
+ generate_speech: true,
99
+ do_sample: true,
100
+ temperature: 0.7
101
+ });
102
+
103
+ // Create audio from output
104
+ const audioData = await this.createAudioBuffer(output);
105
+ this.playAudioBuffer(audioData);
106
+
107
+ status.textContent = 'Speech generated successfully!';
108
+ status.classList.remove('loading');
109
+ status.classList.add('success');
110
+ audioSection.classList.remove('hidden');
111
+
112
+ } catch (error) {
113
+ console.error('TTS Error:', error);
114
+ status.textContent = `Error: ${error.message || 'Failed to generate speech'}`;
115
+ status.classList.remove('loading');
116
+ status.classList.add('error');
117
+ } finally {
118
+ this.isGenerating = false;
119
+ generateBtn.disabled = false;
120
+ generateBtn.querySelector('.spinner').style.display = 'none';
121
+ generateBtn.querySelector('.btn-text').textContent = 'Generate Speech';
122
+ setTimeout(() => status.classList.add('hidden'), 5000);
123
+ }
124
+ }
125
 
126
+ getSpeakerEmbedding(voice) {
127
+ // Simple speaker mapping - in a real implementation this would be proper embeddings
128
+ const speakers = {
129
+ 'F1': [0.1, 0.2, 0.8],
130
+ 'F2': [0.3, 0.1, 0.7],
131
+ 'M1': [0.8, 0.2, 0.1],
132
+ 'M2': [0.7, 0.3, 0.2]
133
+ };
134
+ return speakers[voice] || speakers['F1'];
135
+ }
136
 
137
+ async createAudioBuffer(audioOutput) {
138
+ // Convert model output to Web Audio API buffer
139
+ this.audioContext = new (window.AudioContext || window.webkitAudioContext)();
140
+
141
+ if (audioOutput.audio) {
142
+ // Assuming output.audio is Float32Array or similar
143
+ const buffer = this.audioContext.createBuffer(1, audioOutput.audio.length, 22050);
144
+ const channelData = buffer.getChannelData(0);
145
+ channelData.set(audioOutput.audio);
146
+ return buffer;
147
+ }
148
+
149
+ // Fallback: generate simple tone for demo
150
+ return await this.generateTone(440, 2);
151
  }
152
 
153
+ async generateTone(frequency, duration) {
154
+ const sampleRate = this.audioContext.sampleRate;
155
+ const buffer = this.audioContext.createBuffer(1, duration * sampleRate, sampleRate);
156
+ const data = buffer.getChannelData(0);
157
+
158
+ for (let i = 0; i < data.length; i++) {
159
+ data[i] = Math.sin(2 * Math.PI * frequency * i / sampleRate) * 0.1;
160
+ }
161
+ return buffer;
162
+ }
163
 
164
+ async playAudioBuffer(buffer) {
165
+ const source = this.audioContext.createBufferSource();
166
+ source.buffer = buffer;
167
+ source.connect(this.audioContext.destination);
168
+
169
+ document.getElementById('audioPlayer').src = await this.bufferToWave(buffer);
170
+ document.getElementById('audioPlayer').play();
171
+ }
172
 
173
+ async playAudio() {
174
+ const audioPlayer = document.getElementById('audioPlayer');
175
+ if (this.audioContext.state === 'suspended') {
176
+ await this.audioContext.resume();
177
+ }
178
+ audioPlayer.play();
179
+ }
180
 
181
+ async downloadAudio() {
182
+ const audioPlayer = document.getElementById('audioPlayer');
183
+ const audioBlob = await this.audioToBlob(audioPlayer.src);
184
+ const url = URL.createObjectURL(audioBlob);
185
+ const a = document.createElement('a');
186
+ a.href = url;
187
+ a.download = 'supertonic-speech.wav';
188
+ a.click();
189
+ URL.revokeObjectURL(url);
190
+ }
191
 
192
+ async bufferToWave(buffer) {
193
+ // Simplified wave generation
194
+ const length = buffer.length * 2;
195
+ const arrayBuffer = new ArrayBuffer(44 + length);
196
+ const view = new DataView(arrayBuffer);
197
+
198
+ // WAV header
199
+ const writeString = (offset, string) => {
200
+ for (let i = 0; i < string.length; i++) {
201
+ view.setUint8(offset + i, string.charCodeAt(i));
202
+ }
203
+ };
204
+
205
+ writeString(0, 'RIFF');
206
+ view.setUint32(4, 36 + length, true);
207
+ writeString(8, 'WAVE');
208
+ writeString(12, 'fmt ');
209
+ view.setUint32(16, 16, true);
210
+ view.setUint16(20, 1, true);
211
+ view.setUint16(22, 1, true);
212
+ view.setUint32(24, 22050, true);
213
+ view.setUint32(28, 22050 * 2, true);
214
+ view.setUint16(32, 2, true);
215
+ view.setUint16(34, 16, true);
216
+ writeString(36, 'data');
217
+ view.setUint32(40, length, true);
218
+
219
+ const channelData = buffer.getChannelData(0);
220
+ let offset = 44;
221
+ for (let i = 0; i < channelData.length; i++) {
222
+ const sample = Math.max(-1, Math.min(1, channelData[i]));
223
+ view.setInt16(offset, sample < 0 ? sample * 0x8000 : sample * 0x7FFF, true);
224
+ offset += 2;
225
+ }
226
+
227
+ return URL.createObjectURL(new Blob([arrayBuffer], { type: 'audio/wav' }));
228
+ }
229
 
230
+ async audioToBlob(src) {
231
+ const response = await fetch(src);
232
+ return await response.blob();
233
+ }
 
 
 
234
  }
235
 
236
+ // Initialize app when DOM is loaded
237
+ document.addEventListener('DOMContentLoaded', () => {
238
+ new SupertonicTTS();
239
+ });