Spaces:

akhaliq
/

anycoder-8ba491a7

Running

App Files Files Community

anycoder-8ba491a7 / index.js

akhaliq's picture

akhaliq HF Staff

Upload index.js with huggingface_hub

88e9fcd verified 5 months ago

history blame contribute delete

8.85 kB

	// index.js content here
	class SupertonicTTS {
	constructor() {
	this.tts = null;
	this.audioContext = null;
	this.isGenerating = false;
	this.init();
	}

	async init() {
	this.bindEvents();
	this.updateCharCount();
	await this.checkWebGPU();
	}

	bindEvents() {
	const textInput = document.getElementById('textInput');
	const generateBtn = document.getElementById('generateBtn');
	const deviceToggle = document.getElementById('deviceToggle');
	const playBtn = document.getElementById('playBtn');
	const downloadBtn = document.getElementById('downloadBtn');
	const voiceSelect = document.getElementById('voiceSelect');

	textInput.addEventListener('input', () => {
	this.updateCharCount();
	this.toggleGenerateBtn();
	});

	generateBtn.addEventListener('click', () => this.generateSpeech());
	deviceToggle.addEventListener('change', () => this.updateDeviceMode());
	playBtn.addEventListener('click', () => this.playAudio());
	downloadBtn.addEventListener('click', () => this.downloadAudio());
	}

	updateCharCount() {
	const textInput = document.getElementById('textInput');
	const charCount = document.getElementById('charCount');
	const length = textInput.value.length;
	charCount.textContent = `${length}/500`;
	charCount.className = length > 450 ? 'warning' : '';
	}

	toggleGenerateBtn() {
	const textInput = document.getElementById('textInput');
	const generateBtn = document.getElementById('generateBtn');
	generateBtn.disabled = !textInput.value.trim();
	}

	async checkWebGPU() {
	if (!navigator.gpu) {
	document.getElementById('deviceToggle').disabled = true;
	document.getElementById('deviceText').textContent = 'WebGPU not supported';
	return;
	}
	}

	updateDeviceMode() {
	const deviceToggle = document.getElementById('deviceToggle');
	const deviceText = document.getElementById('deviceText');
	deviceText.textContent = deviceToggle.checked ? 'GPU Mode' : 'CPU Mode';
	}

	async generateSpeech() {
	if (this.isGenerating) return;

	const textInput = document.getElementById('textInput');
	const generateBtn = document.getElementById('generateBtn');
	const status = document.getElementById('status');
	const audioSection = document.getElementById('audioSection');
	const voiceSelect = document.getElementById('voiceSelect');

	const text = textInput.value.trim();
	const voice = voiceSelect.value;
	const useGPU = document.getElementById('deviceToggle').checked;

	if (!text) return;

	this.isGenerating = true;
	generateBtn.disabled = true;
	generateBtn.querySelector('.spinner').style.display = 'inline-block';
	generateBtn.querySelector('.btn-text').textContent = 'Generating...';
	status.classList.remove('hidden', 'success', 'error');
	status.textContent = 'Loading TTS model...';
	status.classList.add('loading');

	try {
	// Use a reliable TTS model that works with transformers.js
	const device = useGPU && navigator.gpu ? { device: 'webgpu' } : undefined;
	this.tts = await window.pipeline('text-to-audio', 'onnx-community/mms-tts-eng', device);

	status.textContent = 'Generating speech...';

	// Map voice selection to speaker embeddings or parameters
	const speaker = this.getSpeakerEmbedding(voice);

	const output = await this.tts(text, {
	speaker,
	generate_speech: true,
	do_sample: true,
	temperature: 0.7
	});

	// Create audio from output
	const audioData = await this.createAudioBuffer(output);
	this.playAudioBuffer(audioData);

	status.textContent = 'Speech generated successfully!';
	status.classList.remove('loading');
	status.classList.add('success');
	audioSection.classList.remove('hidden');

	} catch (error) {
	console.error('TTS Error:', error);
	status.textContent = `Error: ${error.message \|\| 'Failed to generate speech'}`;
	status.classList.remove('loading');
	status.classList.add('error');
	} finally {
	this.isGenerating = false;
	generateBtn.disabled = false;
	generateBtn.querySelector('.spinner').style.display = 'none';
	generateBtn.querySelector('.btn-text').textContent = 'Generate Speech';
	setTimeout(() => status.classList.add('hidden'), 5000);
	}
	}

	getSpeakerEmbedding(voice) {
	// Simple speaker mapping - in a real implementation this would be proper embeddings
	const speakers = {
	'F1': [0.1, 0.2, 0.8],
	'F2': [0.3, 0.1, 0.7],
	'M1': [0.8, 0.2, 0.1],
	'M2': [0.7, 0.3, 0.2]
	};
	return speakers[voice] \|\| speakers['F1'];
	}

	async createAudioBuffer(audioOutput) {
	// Convert model output to Web Audio API buffer
	this.audioContext = new (window.AudioContext \|\| window.webkitAudioContext)();

	if (audioOutput.audio) {
	// Assuming output.audio is Float32Array or similar
	const buffer = this.audioContext.createBuffer(1, audioOutput.audio.length, 22050);
	const channelData = buffer.getChannelData(0);
	channelData.set(audioOutput.audio);
	return buffer;
	}

	// Fallback: generate simple tone for demo
	return await this.generateTone(440, 2);
	}

	async generateTone(frequency, duration) {
	const sampleRate = this.audioContext.sampleRate;
	const buffer = this.audioContext.createBuffer(1, duration * sampleRate, sampleRate);
	const data = buffer.getChannelData(0);

	for (let i = 0; i < data.length; i++) {
	data[i] = Math.sin(2 * Math.PI * frequency * i / sampleRate) * 0.1;
	}
	return buffer;
	}

	async playAudioBuffer(buffer) {
	const source = this.audioContext.createBufferSource();
	source.buffer = buffer;
	source.connect(this.audioContext.destination);

	document.getElementById('audioPlayer').src = await this.bufferToWave(buffer);
	document.getElementById('audioPlayer').play();
	}

	async playAudio() {
	const audioPlayer = document.getElementById('audioPlayer');
	if (this.audioContext.state === 'suspended') {
	await this.audioContext.resume();
	}
	audioPlayer.play();
	}

	async downloadAudio() {
	const audioPlayer = document.getElementById('audioPlayer');
	const audioBlob = await this.audioToBlob(audioPlayer.src);
	const url = URL.createObjectURL(audioBlob);
	const a = document.createElement('a');
	a.href = url;
	a.download = 'supertonic-speech.wav';
	a.click();
	URL.revokeObjectURL(url);
	}

	async bufferToWave(buffer) {
	// Simplified wave generation
	const length = buffer.length * 2;
	const arrayBuffer = new ArrayBuffer(44 + length);
	const view = new DataView(arrayBuffer);

	// WAV header
	const writeString = (offset, string) => {
	for (let i = 0; i < string.length; i++) {
	view.setUint8(offset + i, string.charCodeAt(i));
	}
	};

	writeString(0, 'RIFF');
	view.setUint32(4, 36 + length, true);
	writeString(8, 'WAVE');
	writeString(12, 'fmt ');
	view.setUint32(16, 16, true);
	view.setUint16(20, 1, true);
	view.setUint16(22, 1, true);
	view.setUint32(24, 22050, true);
	view.setUint32(28, 22050 * 2, true);
	view.setUint16(32, 2, true);
	view.setUint16(34, 16, true);
	writeString(36, 'data');
	view.setUint32(40, length, true);

	const channelData = buffer.getChannelData(0);
	let offset = 44;
	for (let i = 0; i < channelData.length; i++) {
	const sample = Math.max(-1, Math.min(1, channelData[i]));
	view.setInt16(offset, sample < 0 ? sample * 0x8000 : sample * 0x7FFF, true);
	offset += 2;
	}

	return URL.createObjectURL(new Blob([arrayBuffer], { type: 'audio/wav' }));
	}

	async audioToBlob(src) {
	const response = await fetch(src);
	return await response.blob();
	}
	}

	// Initialize app when DOM is loaded
	document.addEventListener('DOMContentLoaded', () => {
	new SupertonicTTS();
	});