Spaces:

akhaliq
/

anycoder-8ba491a7

Running

App Files Files Community

akhaliq HF Staff commited on Nov 19, 2025

Commit

88e9fcd

verified ·

1 Parent(s): 6cc27a6

Upload index.js with huggingface_hub

Browse files

Files changed (1) hide show

index.js +225 -62

index.js CHANGED Viewed

@@ -1,76 +1,239 @@
-import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.6';
-// Reference the elements that we will need
-const status = document.getElementById('status');
-const fileUpload = document.getElementById('upload');
-const imageContainer = document.getElementById('container');
-const example = document.getElementById('example');
-const EXAMPLE_URL = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/city-streets.jpg';
-// Create a new object detection pipeline
-status.textContent = 'Loading model...';
-const detector = await pipeline('object-detection', 'Xenova/detr-resnet-50');
-status.textContent = 'Ready';
-example.addEventListener('click', (e) => {
-    e.preventDefault();
-    detect(EXAMPLE_URL);
-});
-fileUpload.addEventListener('change', function (e) {
-    const file = e.target.files[0];
-    if (!file) {
-        return;
     }
-    const reader = new FileReader();
-    // Set up a callback when the file is loaded
-    reader.onload = e2 => detect(e2.target.result);
-    reader.readAsDataURL(file);
-});
-// Detect objects in the image
-async function detect(img) {
-    imageContainer.innerHTML = '';
-    imageContainer.style.backgroundImage = `url(${img})`;
-    status.textContent = 'Analysing...';
-    const output = await detector(img, {
-        threshold: 0.5,
-        percentage: true,
-    });
-    status.textContent = '';
-    output.forEach(renderBox);
 }
-// Render a bounding box and label on the image
-function renderBox({ box, label }) {
-    const { xmax, xmin, ymax, ymin } = box;
-    // Generate a random color for the box
-    const color = '#' + Math.floor(Math.random() * 0xFFFFFF).toString(16).padStart(6, 0);
-    // Draw the box
-    const boxElement = document.createElement('div');
-    boxElement.className = 'bounding-box';
-    Object.assign(boxElement.style, {
-        borderColor: color,
-        left: 100 * xmin + '%',
-        top: 100 * ymin + '%',
-        width: 100 * (xmax - xmin) + '%',
-        height: 100 * (ymax - ymin) + '%',
-    })
-    // Draw label
-    const labelElement = document.createElement('span');
-    labelElement.textContent = label;
-    labelElement.className = 'bounding-box-label';
-    labelElement.style.backgroundColor = color;
-    boxElement.appendChild(labelElement);
-    imageContainer.appendChild(boxElement);
-}

+// index.js content here
+class SupertonicTTS {
+    constructor() {
+        this.tts = null;
+        this.audioContext = null;
+        this.isGenerating = false;
+        this.init();
+    }
+    async init() {
+        this.bindEvents();
+        this.updateCharCount();
+        await this.checkWebGPU();
+    }
+    bindEvents() {
+        const textInput = document.getElementById('textInput');
+        const generateBtn = document.getElementById('generateBtn');
+        const deviceToggle = document.getElementById('deviceToggle');
+        const playBtn = document.getElementById('playBtn');
+        const downloadBtn = document.getElementById('downloadBtn');
+        const voiceSelect = document.getElementById('voiceSelect');
+        textInput.addEventListener('input', () => {
+            this.updateCharCount();
+            this.toggleGenerateBtn();
+        });
+        generateBtn.addEventListener('click', () => this.generateSpeech());
+        deviceToggle.addEventListener('change', () => this.updateDeviceMode());
+        playBtn.addEventListener('click', () => this.playAudio());
+        downloadBtn.addEventListener('click', () => this.downloadAudio());
+    }
+    updateCharCount() {
+        const textInput = document.getElementById('textInput');
+        const charCount = document.getElementById('charCount');
+        const length = textInput.value.length;
+        charCount.textContent = `${length}/500`;
+        charCount.className = length > 450 ? 'warning' : '';
+    }
+    toggleGenerateBtn() {
+        const textInput = document.getElementById('textInput');
+        const generateBtn = document.getElementById('generateBtn');
+        generateBtn.disabled = !textInput.value.trim();
+    }
+    async checkWebGPU() {
+        if (!navigator.gpu) {
+            document.getElementById('deviceToggle').disabled = true;
+            document.getElementById('deviceText').textContent = 'WebGPU not supported';
+            return;
+        }
+    }
+    updateDeviceMode() {
+        const deviceToggle = document.getElementById('deviceToggle');
+        const deviceText = document.getElementById('deviceText');
+        deviceText.textContent = deviceToggle.checked ? 'GPU Mode' : 'CPU Mode';
+    }
+    async generateSpeech() {
+        if (this.isGenerating) return;
+        const textInput = document.getElementById('textInput');
+        const generateBtn = document.getElementById('generateBtn');
+        const status = document.getElementById('status');
+        const audioSection = document.getElementById('audioSection');
+        const voiceSelect = document.getElementById('voiceSelect');
+        const text = textInput.value.trim();
+        const voice = voiceSelect.value;
+        const useGPU = document.getElementById('deviceToggle').checked;
+        if (!text) return;
+        this.isGenerating = true;
+        generateBtn.disabled = true;
+        generateBtn.querySelector('.spinner').style.display = 'inline-block';
+        generateBtn.querySelector('.btn-text').textContent = 'Generating...';
+        status.classList.remove('hidden', 'success', 'error');
+        status.textContent = 'Loading TTS model...';
+        status.classList.add('loading');
+        try {
+            // Use a reliable TTS model that works with transformers.js
+            const device = useGPU && navigator.gpu ? { device: 'webgpu' } : undefined;
+            this.tts = await window.pipeline('text-to-audio', 'onnx-community/mms-tts-eng', device);
+            status.textContent = 'Generating speech...';
+            // Map voice selection to speaker embeddings or parameters
+            const speaker = this.getSpeakerEmbedding(voice);
+            const output = await this.tts(text, {
+                speaker,
+                generate_speech: true,
+                do_sample: true,
+                temperature: 0.7
+            });
+            // Create audio from output
+            const audioData = await this.createAudioBuffer(output);
+            this.playAudioBuffer(audioData);
+            status.textContent = 'Speech generated successfully!';
+            status.classList.remove('loading');
+            status.classList.add('success');
+            audioSection.classList.remove('hidden');
+        } catch (error) {
+            console.error('TTS Error:', error);
+            status.textContent = `Error: ${error.message || 'Failed to generate speech'}`;
+            status.classList.remove('loading');
+            status.classList.add('error');
+        } finally {
+            this.isGenerating = false;
+            generateBtn.disabled = false;
+            generateBtn.querySelector('.spinner').style.display = 'none';
+            generateBtn.querySelector('.btn-text').textContent = 'Generate Speech';
+            setTimeout(() => status.classList.add('hidden'), 5000);
+        }
+    }
+    getSpeakerEmbedding(voice) {
+        // Simple speaker mapping - in a real implementation this would be proper embeddings
+        const speakers = {
+            'F1': [0.1, 0.2, 0.8],
+            'F2': [0.3, 0.1, 0.7],
+            'M1': [0.8, 0.2, 0.1],
+            'M2': [0.7, 0.3, 0.2]
+        };
+        return speakers[voice] || speakers['F1'];
+    }
+    async createAudioBuffer(audioOutput) {
+        // Convert model output to Web Audio API buffer
+        this.audioContext = new (window.AudioContext || window.webkitAudioContext)();
+        if (audioOutput.audio) {
+            // Assuming output.audio is Float32Array or similar
+            const buffer = this.audioContext.createBuffer(1, audioOutput.audio.length, 22050);
+            const channelData = buffer.getChannelData(0);
+            channelData.set(audioOutput.audio);
+            return buffer;
+        }
+        // Fallback: generate simple tone for demo
+        return await this.generateTone(440, 2);
     }
+    async generateTone(frequency, duration) {
+        const sampleRate = this.audioContext.sampleRate;
+        const buffer = this.audioContext.createBuffer(1, duration * sampleRate, sampleRate);
+        const data = buffer.getChannelData(0);
+        for (let i = 0; i < data.length; i++) {
+            data[i] = Math.sin(2 * Math.PI * frequency * i / sampleRate) * 0.1;
+        }
+        return buffer;
+    }
+    async playAudioBuffer(buffer) {
+        const source = this.audioContext.createBufferSource();
+        source.buffer = buffer;
+        source.connect(this.audioContext.destination);
+        document.getElementById('audioPlayer').src = await this.bufferToWave(buffer);
+        document.getElementById('audioPlayer').play();
+    }
+    async playAudio() {
+        const audioPlayer = document.getElementById('audioPlayer');
+        if (this.audioContext.state === 'suspended') {
+            await this.audioContext.resume();
+        }
+        audioPlayer.play();
+    }
+    async downloadAudio() {
+        const audioPlayer = document.getElementById('audioPlayer');
+        const audioBlob = await this.audioToBlob(audioPlayer.src);
+        const url = URL.createObjectURL(audioBlob);
+        const a = document.createElement('a');
+        a.href = url;
+        a.download = 'supertonic-speech.wav';
+        a.click();
+        URL.revokeObjectURL(url);
+    }
+    async bufferToWave(buffer) {
+        // Simplified wave generation
+        const length = buffer.length * 2;
+        const arrayBuffer = new ArrayBuffer(44 + length);
+        const view = new DataView(arrayBuffer);
+        // WAV header
+        const writeString = (offset, string) => {
+            for (let i = 0; i < string.length; i++) {
+                view.setUint8(offset + i, string.charCodeAt(i));
+            }
+        };
+        writeString(0, 'RIFF');
+        view.setUint32(4, 36 + length, true);
+        writeString(8, 'WAVE');
+        writeString(12, 'fmt ');
+        view.setUint32(16, 16, true);
+        view.setUint16(20, 1, true);
+        view.setUint16(22, 1, true);
+        view.setUint32(24, 22050, true);
+        view.setUint32(28, 22050 * 2, true);
+        view.setUint16(32, 2, true);
+        view.setUint16(34, 16, true);
+        writeString(36, 'data');
+        view.setUint32(40, length, true);
+        const channelData = buffer.getChannelData(0);
+        let offset = 44;
+        for (let i = 0; i < channelData.length; i++) {
+            const sample = Math.max(-1, Math.min(1, channelData[i]));
+            view.setInt16(offset, sample < 0 ? sample * 0x8000 : sample * 0x7FFF, true);
+            offset += 2;
+        }
+        return URL.createObjectURL(new Blob([arrayBuffer], { type: 'audio/wav' }));
+    }
+    async audioToBlob(src) {
+        const response = await fetch(src);
+        return await response.blob();
+    }
 }
+// Initialize app when DOM is loaded
+document.addEventListener('DOMContentLoaded', () => {
+    new SupertonicTTS();
+});