// index.js content here class SupertonicTTS { constructor() { this.tts = null; this.audioContext = null; this.isGenerating = false; this.init(); } async init() { this.bindEvents(); this.updateCharCount(); await this.checkWebGPU(); } bindEvents() { const textInput = document.getElementById('textInput'); const generateBtn = document.getElementById('generateBtn'); const deviceToggle = document.getElementById('deviceToggle'); const playBtn = document.getElementById('playBtn'); const downloadBtn = document.getElementById('downloadBtn'); const voiceSelect = document.getElementById('voiceSelect'); textInput.addEventListener('input', () => { this.updateCharCount(); this.toggleGenerateBtn(); }); generateBtn.addEventListener('click', () => this.generateSpeech()); deviceToggle.addEventListener('change', () => this.updateDeviceMode()); playBtn.addEventListener('click', () => this.playAudio()); downloadBtn.addEventListener('click', () => this.downloadAudio()); } updateCharCount() { const textInput = document.getElementById('textInput'); const charCount = document.getElementById('charCount'); const length = textInput.value.length; charCount.textContent = `${length}/500`; charCount.className = length > 450 ? 'warning' : ''; } toggleGenerateBtn() { const textInput = document.getElementById('textInput'); const generateBtn = document.getElementById('generateBtn'); generateBtn.disabled = !textInput.value.trim(); } async checkWebGPU() { if (!navigator.gpu) { document.getElementById('deviceToggle').disabled = true; document.getElementById('deviceText').textContent = 'WebGPU not supported'; return; } } updateDeviceMode() { const deviceToggle = document.getElementById('deviceToggle'); const deviceText = document.getElementById('deviceText'); deviceText.textContent = deviceToggle.checked ? 'GPU Mode' : 'CPU Mode'; } async generateSpeech() { if (this.isGenerating) return; const textInput = document.getElementById('textInput'); const generateBtn = document.getElementById('generateBtn'); const status = document.getElementById('status'); const audioSection = document.getElementById('audioSection'); const voiceSelect = document.getElementById('voiceSelect'); const text = textInput.value.trim(); const voice = voiceSelect.value; const useGPU = document.getElementById('deviceToggle').checked; if (!text) return; this.isGenerating = true; generateBtn.disabled = true; generateBtn.querySelector('.spinner').style.display = 'inline-block'; generateBtn.querySelector('.btn-text').textContent = 'Generating...'; status.classList.remove('hidden', 'success', 'error'); status.textContent = 'Loading TTS model...'; status.classList.add('loading'); try { // Use a reliable TTS model that works with transformers.js const device = useGPU && navigator.gpu ? { device: 'webgpu' } : undefined; this.tts = await window.pipeline('text-to-audio', 'onnx-community/mms-tts-eng', device); status.textContent = 'Generating speech...'; // Map voice selection to speaker embeddings or parameters const speaker = this.getSpeakerEmbedding(voice); const output = await this.tts(text, { speaker, generate_speech: true, do_sample: true, temperature: 0.7 }); // Create audio from output const audioData = await this.createAudioBuffer(output); this.playAudioBuffer(audioData); status.textContent = 'Speech generated successfully!'; status.classList.remove('loading'); status.classList.add('success'); audioSection.classList.remove('hidden'); } catch (error) { console.error('TTS Error:', error); status.textContent = `Error: ${error.message || 'Failed to generate speech'}`; status.classList.remove('loading'); status.classList.add('error'); } finally { this.isGenerating = false; generateBtn.disabled = false; generateBtn.querySelector('.spinner').style.display = 'none'; generateBtn.querySelector('.btn-text').textContent = 'Generate Speech'; setTimeout(() => status.classList.add('hidden'), 5000); } } getSpeakerEmbedding(voice) { // Simple speaker mapping - in a real implementation this would be proper embeddings const speakers = { 'F1': [0.1, 0.2, 0.8], 'F2': [0.3, 0.1, 0.7], 'M1': [0.8, 0.2, 0.1], 'M2': [0.7, 0.3, 0.2] }; return speakers[voice] || speakers['F1']; } async createAudioBuffer(audioOutput) { // Convert model output to Web Audio API buffer this.audioContext = new (window.AudioContext || window.webkitAudioContext)(); if (audioOutput.audio) { // Assuming output.audio is Float32Array or similar const buffer = this.audioContext.createBuffer(1, audioOutput.audio.length, 22050); const channelData = buffer.getChannelData(0); channelData.set(audioOutput.audio); return buffer; } // Fallback: generate simple tone for demo return await this.generateTone(440, 2); } async generateTone(frequency, duration) { const sampleRate = this.audioContext.sampleRate; const buffer = this.audioContext.createBuffer(1, duration * sampleRate, sampleRate); const data = buffer.getChannelData(0); for (let i = 0; i < data.length; i++) { data[i] = Math.sin(2 * Math.PI * frequency * i / sampleRate) * 0.1; } return buffer; } async playAudioBuffer(buffer) { const source = this.audioContext.createBufferSource(); source.buffer = buffer; source.connect(this.audioContext.destination); document.getElementById('audioPlayer').src = await this.bufferToWave(buffer); document.getElementById('audioPlayer').play(); } async playAudio() { const audioPlayer = document.getElementById('audioPlayer'); if (this.audioContext.state === 'suspended') { await this.audioContext.resume(); } audioPlayer.play(); } async downloadAudio() { const audioPlayer = document.getElementById('audioPlayer'); const audioBlob = await this.audioToBlob(audioPlayer.src); const url = URL.createObjectURL(audioBlob); const a = document.createElement('a'); a.href = url; a.download = 'supertonic-speech.wav'; a.click(); URL.revokeObjectURL(url); } async bufferToWave(buffer) { // Simplified wave generation const length = buffer.length * 2; const arrayBuffer = new ArrayBuffer(44 + length); const view = new DataView(arrayBuffer); // WAV header const writeString = (offset, string) => { for (let i = 0; i < string.length; i++) { view.setUint8(offset + i, string.charCodeAt(i)); } }; writeString(0, 'RIFF'); view.setUint32(4, 36 + length, true); writeString(8, 'WAVE'); writeString(12, 'fmt '); view.setUint32(16, 16, true); view.setUint16(20, 1, true); view.setUint16(22, 1, true); view.setUint32(24, 22050, true); view.setUint32(28, 22050 * 2, true); view.setUint16(32, 2, true); view.setUint16(34, 16, true); writeString(36, 'data'); view.setUint32(40, length, true); const channelData = buffer.getChannelData(0); let offset = 44; for (let i = 0; i < channelData.length; i++) { const sample = Math.max(-1, Math.min(1, channelData[i])); view.setInt16(offset, sample < 0 ? sample * 0x8000 : sample * 0x7FFF, true); offset += 2; } return URL.createObjectURL(new Blob([arrayBuffer], { type: 'audio/wav' })); } async audioToBlob(src) { const response = await fetch(src); return await response.blob(); } } // Initialize app when DOM is loaded document.addEventListener('DOMContentLoaded', () => { new SupertonicTTS(); });