Spaces:
Running
Running
| // index.js content here | |
| class SupertonicTTS { | |
| constructor() { | |
| this.tts = null; | |
| this.audioContext = null; | |
| this.isGenerating = false; | |
| this.init(); | |
| } | |
| async init() { | |
| this.bindEvents(); | |
| this.updateCharCount(); | |
| await this.checkWebGPU(); | |
| } | |
| bindEvents() { | |
| const textInput = document.getElementById('textInput'); | |
| const generateBtn = document.getElementById('generateBtn'); | |
| const deviceToggle = document.getElementById('deviceToggle'); | |
| const playBtn = document.getElementById('playBtn'); | |
| const downloadBtn = document.getElementById('downloadBtn'); | |
| const voiceSelect = document.getElementById('voiceSelect'); | |
| textInput.addEventListener('input', () => { | |
| this.updateCharCount(); | |
| this.toggleGenerateBtn(); | |
| }); | |
| generateBtn.addEventListener('click', () => this.generateSpeech()); | |
| deviceToggle.addEventListener('change', () => this.updateDeviceMode()); | |
| playBtn.addEventListener('click', () => this.playAudio()); | |
| downloadBtn.addEventListener('click', () => this.downloadAudio()); | |
| } | |
| updateCharCount() { | |
| const textInput = document.getElementById('textInput'); | |
| const charCount = document.getElementById('charCount'); | |
| const length = textInput.value.length; | |
| charCount.textContent = `${length}/500`; | |
| charCount.className = length > 450 ? 'warning' : ''; | |
| } | |
| toggleGenerateBtn() { | |
| const textInput = document.getElementById('textInput'); | |
| const generateBtn = document.getElementById('generateBtn'); | |
| generateBtn.disabled = !textInput.value.trim(); | |
| } | |
| async checkWebGPU() { | |
| if (!navigator.gpu) { | |
| document.getElementById('deviceToggle').disabled = true; | |
| document.getElementById('deviceText').textContent = 'WebGPU not supported'; | |
| return; | |
| } | |
| } | |
| updateDeviceMode() { | |
| const deviceToggle = document.getElementById('deviceToggle'); | |
| const deviceText = document.getElementById('deviceText'); | |
| deviceText.textContent = deviceToggle.checked ? 'GPU Mode' : 'CPU Mode'; | |
| } | |
| async generateSpeech() { | |
| if (this.isGenerating) return; | |
| const textInput = document.getElementById('textInput'); | |
| const generateBtn = document.getElementById('generateBtn'); | |
| const status = document.getElementById('status'); | |
| const audioSection = document.getElementById('audioSection'); | |
| const voiceSelect = document.getElementById('voiceSelect'); | |
| const text = textInput.value.trim(); | |
| const voice = voiceSelect.value; | |
| const useGPU = document.getElementById('deviceToggle').checked; | |
| if (!text) return; | |
| this.isGenerating = true; | |
| generateBtn.disabled = true; | |
| generateBtn.querySelector('.spinner').style.display = 'inline-block'; | |
| generateBtn.querySelector('.btn-text').textContent = 'Generating...'; | |
| status.classList.remove('hidden', 'success', 'error'); | |
| status.textContent = 'Loading TTS model...'; | |
| status.classList.add('loading'); | |
| try { | |
| // Use a reliable TTS model that works with transformers.js | |
| const device = useGPU && navigator.gpu ? { device: 'webgpu' } : undefined; | |
| this.tts = await window.pipeline('text-to-audio', 'onnx-community/mms-tts-eng', device); | |
| status.textContent = 'Generating speech...'; | |
| // Map voice selection to speaker embeddings or parameters | |
| const speaker = this.getSpeakerEmbedding(voice); | |
| const output = await this.tts(text, { | |
| speaker, | |
| generate_speech: true, | |
| do_sample: true, | |
| temperature: 0.7 | |
| }); | |
| // Create audio from output | |
| const audioData = await this.createAudioBuffer(output); | |
| this.playAudioBuffer(audioData); | |
| status.textContent = 'Speech generated successfully!'; | |
| status.classList.remove('loading'); | |
| status.classList.add('success'); | |
| audioSection.classList.remove('hidden'); | |
| } catch (error) { | |
| console.error('TTS Error:', error); | |
| status.textContent = `Error: ${error.message || 'Failed to generate speech'}`; | |
| status.classList.remove('loading'); | |
| status.classList.add('error'); | |
| } finally { | |
| this.isGenerating = false; | |
| generateBtn.disabled = false; | |
| generateBtn.querySelector('.spinner').style.display = 'none'; | |
| generateBtn.querySelector('.btn-text').textContent = 'Generate Speech'; | |
| setTimeout(() => status.classList.add('hidden'), 5000); | |
| } | |
| } | |
| getSpeakerEmbedding(voice) { | |
| // Simple speaker mapping - in a real implementation this would be proper embeddings | |
| const speakers = { | |
| 'F1': [0.1, 0.2, 0.8], | |
| 'F2': [0.3, 0.1, 0.7], | |
| 'M1': [0.8, 0.2, 0.1], | |
| 'M2': [0.7, 0.3, 0.2] | |
| }; | |
| return speakers[voice] || speakers['F1']; | |
| } | |
| async createAudioBuffer(audioOutput) { | |
| // Convert model output to Web Audio API buffer | |
| this.audioContext = new (window.AudioContext || window.webkitAudioContext)(); | |
| if (audioOutput.audio) { | |
| // Assuming output.audio is Float32Array or similar | |
| const buffer = this.audioContext.createBuffer(1, audioOutput.audio.length, 22050); | |
| const channelData = buffer.getChannelData(0); | |
| channelData.set(audioOutput.audio); | |
| return buffer; | |
| } | |
| // Fallback: generate simple tone for demo | |
| return await this.generateTone(440, 2); | |
| } | |
| async generateTone(frequency, duration) { | |
| const sampleRate = this.audioContext.sampleRate; | |
| const buffer = this.audioContext.createBuffer(1, duration * sampleRate, sampleRate); | |
| const data = buffer.getChannelData(0); | |
| for (let i = 0; i < data.length; i++) { | |
| data[i] = Math.sin(2 * Math.PI * frequency * i / sampleRate) * 0.1; | |
| } | |
| return buffer; | |
| } | |
| async playAudioBuffer(buffer) { | |
| const source = this.audioContext.createBufferSource(); | |
| source.buffer = buffer; | |
| source.connect(this.audioContext.destination); | |
| document.getElementById('audioPlayer').src = await this.bufferToWave(buffer); | |
| document.getElementById('audioPlayer').play(); | |
| } | |
| async playAudio() { | |
| const audioPlayer = document.getElementById('audioPlayer'); | |
| if (this.audioContext.state === 'suspended') { | |
| await this.audioContext.resume(); | |
| } | |
| audioPlayer.play(); | |
| } | |
| async downloadAudio() { | |
| const audioPlayer = document.getElementById('audioPlayer'); | |
| const audioBlob = await this.audioToBlob(audioPlayer.src); | |
| const url = URL.createObjectURL(audioBlob); | |
| const a = document.createElement('a'); | |
| a.href = url; | |
| a.download = 'supertonic-speech.wav'; | |
| a.click(); | |
| URL.revokeObjectURL(url); | |
| } | |
| async bufferToWave(buffer) { | |
| // Simplified wave generation | |
| const length = buffer.length * 2; | |
| const arrayBuffer = new ArrayBuffer(44 + length); | |
| const view = new DataView(arrayBuffer); | |
| // WAV header | |
| const writeString = (offset, string) => { | |
| for (let i = 0; i < string.length; i++) { | |
| view.setUint8(offset + i, string.charCodeAt(i)); | |
| } | |
| }; | |
| writeString(0, 'RIFF'); | |
| view.setUint32(4, 36 + length, true); | |
| writeString(8, 'WAVE'); | |
| writeString(12, 'fmt '); | |
| view.setUint32(16, 16, true); | |
| view.setUint16(20, 1, true); | |
| view.setUint16(22, 1, true); | |
| view.setUint32(24, 22050, true); | |
| view.setUint32(28, 22050 * 2, true); | |
| view.setUint16(32, 2, true); | |
| view.setUint16(34, 16, true); | |
| writeString(36, 'data'); | |
| view.setUint32(40, length, true); | |
| const channelData = buffer.getChannelData(0); | |
| let offset = 44; | |
| for (let i = 0; i < channelData.length; i++) { | |
| const sample = Math.max(-1, Math.min(1, channelData[i])); | |
| view.setInt16(offset, sample < 0 ? sample * 0x8000 : sample * 0x7FFF, true); | |
| offset += 2; | |
| } | |
| return URL.createObjectURL(new Blob([arrayBuffer], { type: 'audio/wav' })); | |
| } | |
| async audioToBlob(src) { | |
| const response = await fetch(src); | |
| return await response.blob(); | |
| } | |
| } | |
| // Initialize app when DOM is loaded | |
| document.addEventListener('DOMContentLoaded', () => { | |
| new SupertonicTTS(); | |
| }); |