Spaces:

Chillarmo
/

ATOM-WebGPU

Running

App Files Files Community

Chillarmo commited on Nov 25, 2025

Commit

07ef252

verified ·

1 Parent(s): d5e2966

Update index.js

Browse files

Files changed (1) hide show

index.js +310 -57

index.js CHANGED Viewed

@@ -1,76 +1,329 @@
 import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.6';
-// Reference the elements that we will need
 const status = document.getElementById('status');
-const fileUpload = document.getElementById('upload');
-const imageContainer = document.getElementById('container');
-const example = document.getElementById('example');
-const EXAMPLE_URL = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/city-streets.jpg';
-// Create a new object detection pipeline
-status.textContent = 'Loading model...';
-const detector = await pipeline('object-detection', 'Xenova/detr-resnet-50');
-status.textContent = 'Ready';
-example.addEventListener('click', (e) => {
-    e.preventDefault();
-    detect(EXAMPLE_URL);
-});
-fileUpload.addEventListener('change', function (e) {
-    const file = e.target.files[0];
-    if (!file) {
-        return;
     }
-    const reader = new FileReader();
-    // Set up a callback when the file is loaded
-    reader.onload = e2 => detect(e2.target.result);
-    reader.readAsDataURL(file);
-});
-// Detect objects in the image
-async function detect(img) {
-    imageContainer.innerHTML = '';
-    imageContainer.style.backgroundImage = `url(${img})`;
-    status.textContent = 'Analysing...';
-    const output = await detector(img, {
-        threshold: 0.5,
-        percentage: true,
     });
-    status.textContent = '';
-    output.forEach(renderBox);
 }
-// Render a bounding box and label on the image
-function renderBox({ box, label }) {
-    const { xmax, xmin, ymax, ymin } = box;
-    // Generate a random color for the box
-    const color = '#' + Math.floor(Math.random() * 0xFFFFFF).toString(16).padStart(6, 0);
-    // Draw the box
-    const boxElement = document.createElement('div');
-    boxElement.className = 'bounding-box';
-    Object.assign(boxElement.style, {
-        borderColor: color,
-        left: 100 * xmin + '%',
-        top: 100 * ymin + '%',
-        width: 100 * (xmax - xmin) + '%',
-        height: 100 * (ymax - ymin) + '%',
-    })
-    // Draw label
-    const labelElement = document.createElement('span');
-    labelElement.textContent = label;
-    labelElement.className = 'bounding-box-label';
-    labelElement.style.backgroundColor = color;
-    boxElement.appendChild(labelElement);
-    imageContainer.appendChild(boxElement);
 }

 import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.6';
+// Get DOM elements
 const status = document.getElementById('status');
+const startBtn = document.getElementById('startBtn');
+const stopBtn = document.getElementById('stopBtn');
+const clearBtn = document.getElementById('clearBtn');
+const transcriptionContainer = document.getElementById('transcriptionContainer');
+const chunkLengthSelect = document.getElementById('chunkLength');
+const useWebGPUCheckbox = document.getElementById('useWebGPU');
+const chunkCountDisplay = document.getElementById('chunkCount');
+const recordingTimeDisplay = document.getElementById('recordingTime');
+const visualizerBars = document.querySelectorAll('.bar');
+// State
+let transcriber = null;
+let mediaStream = null;
+let audioContext = null;
+let mediaRecorder = null;
+let recordedChunks = [];
+let isRecording = false;
+let chunkCount = 0;
+let recordingStartTime = null;
+let recordingInterval = null;
+let analyser = null;
+let animationId = null;
+// Initialize the ATOM model
+async function initModel() {
+    try {
+        status.textContent = 'Loading ATOM model... This may take a minute.';
+        status.className = 'loading';
+        const device = useWebGPUCheckbox.checked ? 'webgpu' : 'wasm';
+        // Load your custom ATOM model
+        transcriber = await pipeline(
+            'automatic-speech-recognition',
+            'Chillarmo/ATOM',
+            {
+                device: device,
+                progress_callback: (progress) => {
+                    if (progress.status === 'downloading') {
+                        const percent = Math.round((progress.loaded / progress.total) * 100);
+                        status.textContent = `Downloading ${progress.file}: ${percent}%`;
+                    } else if (progress.status === 'loading') {
+                        status.textContent = `Loading ${progress.file}...`;
+                    }
+                }
+            }
+        );
+        status.textContent = 'Model loaded! Ready to transcribe Armenian speech.';
+        status.className = 'ready';
+        startBtn.disabled = false;
+    } catch (error) {
+        console.error('Model loading error:', error);
+        status.textContent = `Error loading model: ${error.message}`;
+        status.className = 'error';
     }
+}
+// Format time as MM:SS
+function formatTime(seconds) {
+    const mins = Math.floor(seconds / 60);
+    const secs = Math.floor(seconds % 60);
+    return `${mins.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')}`;
+}
+// Update recording time
+function updateRecordingTime() {
+    if (recordingStartTime) {
+        const elapsed = (Date.now() - recordingStartTime) / 1000;
+        recordingTimeDisplay.textContent = formatTime(elapsed);
+    }
+}
+// Visualize audio
+function visualizeAudio() {
+    if (!analyser || !isRecording) return;
+    const dataArray = new Uint8Array(analyser.frequencyBinCount);
+    analyser.getByteFrequencyData(dataArray);
+    // Sample the data for visualization
+    const barCount = visualizerBars.length;
+    const step = Math.floor(dataArray.length / barCount);
+    visualizerBars.forEach((bar, index) => {
+        const value = dataArray[index * step];
+        const height = (value / 255) * 70 + 4; // 4px minimum, 74px maximum
+        bar.style.height = `${height}px`;
+    });
+    animationId = requestAnimationFrame(visualizeAudio);
+}
+// Start recording
+async function startRecording() {
+    try {
+        // Request microphone access
+        mediaStream = await navigator.mediaDevices.getUserMedia({
+            audio: {
+                channelCount: 1,
+                sampleRate: 16000,
+            }
+        });
+        // Set up audio context for visualization
+        audioContext = new AudioContext({ sampleRate: 16000 });
+        const source = audioContext.createMediaStreamSource(mediaStream);
+        analyser = audioContext.createAnalyser();
+        analyser.fftSize = 256;
+        source.connect(analyser);
+        // Set up MediaRecorder
+        mediaRecorder = new MediaRecorder(mediaStream);
+        recordedChunks = [];
+        mediaRecorder.ondataavailable = (event) => {
+            if (event.data.size > 0) {
+                recordedChunks.push(event.data);
+            }
+        };
+        mediaRecorder.onstop = async () => {
+            if (recordedChunks.length > 0) {
+                await processAudioChunk(recordedChunks);
+                recordedChunks = [];
+            }
+        };
+        // Start recording
+        const chunkDuration = parseInt(chunkLengthSelect.value) * 1000;
+        mediaRecorder.start();
+        // Schedule automatic chunk processing
+        const chunkInterval = setInterval(() => {
+            if (!isRecording) {
+                clearInterval(chunkInterval);
+                return;
+            }
+            mediaRecorder.stop();
+            mediaRecorder.start();
+        }, chunkDuration);
+        isRecording = true;
+        recordingStartTime = Date.now();
+        recordingInterval = setInterval(updateRecordingTime, 100);
+        status.textContent = 'Recording... Speak in Armenian';
+        status.className = 'recording';
+        startBtn.disabled = true;
+        stopBtn.disabled = false;
+        // Start visualization
+        visualizeAudio();
+    } catch (error) {
+        console.error('Error starting recording:', error);
+        status.textContent = `Error: ${error.message}`;
+        status.className = 'error';
+    }
+}
+// Stop recording
+function stopRecording() {
+    isRecording = false;
+    if (mediaRecorder && mediaRecorder.state !== 'inactive') {
+        mediaRecorder.stop();
+    }
+    if (mediaStream) {
+        mediaStream.getTracks().forEach(track => track.stop());
+    }
+    if (audioContext) {
+        audioContext.close();
+    }
+    if (recordingInterval) {
+        clearInterval(recordingInterval);
+    }
+    if (animationId) {
+        cancelAnimationFrame(animationId);
+    }
+    // Reset visualizer
+    visualizerBars.forEach(bar => {
+        bar.style.height = '4px';
     });
+    status.textContent = 'Recording stopped. Ready for next recording.';
+    status.className = 'ready';
+    startBtn.disabled = false;
+    stopBtn.disabled = true;
 }
+// Process audio chunk
+async function processAudioChunk(chunks) {
+    try {
+        status.textContent = 'Processing audio...';
+        status.className = 'processing';
+        // Create audio blob
+        const audioBlob = new Blob(chunks, { type: 'audio/webm' });
+        // Convert to array buffer
+        const arrayBuffer = await audioBlob.arrayBuffer();
+        // Decode audio
+        const tempAudioContext = new (window.AudioContext || window.webkitAudioContext)();
+        const audioBuffer = await tempAudioContext.decodeAudioData(arrayBuffer);
+        // Get audio data as Float32Array
+        const audioData = audioBuffer.getChannelData(0);
+        // Transcribe with ATOM model
+        const result = await transcriber(audioData, {
+            sampling_rate: audioBuffer.sampleRate,
+        });
+        // Add to transcription
+        if (result && result.text && result.text.trim()) {
+            addTranscription(result.text.trim());
+            chunkCount++;
+            chunkCountDisplay.textContent = chunkCount;
+        }
+        if (isRecording) {
+            status.textContent = 'Recording... Speak in Armenian';
+            status.className = 'recording';
+        } else {
+            status.textContent = 'Ready for next recording.';
+            status.className = 'ready';
+        }
+        tempAudioContext.close();
+    } catch (error) {
+        console.error('Error processing audio:', error);
+        status.textContent = `Processing error: ${error.message}`;
+        status.className = 'error';
+        // Restore recording status if still recording
+        setTimeout(() => {
+            if (isRecording) {
+                status.textContent = 'Recording... Speak in Armenian';
+                status.className = 'recording';
+            }
+        }, 2000);
+    }
+}
+// Add transcription to UI
+function addTranscription(text) {
+    // Remove empty state if present
+    const emptyState = transcriptionContainer.querySelector('.empty-state');
+    if (emptyState) {
+        emptyState.remove();
+    }
+    // Create transcription item
+    const item = document.createElement('div');
+    item.className = 'transcription-item';
+    const timestamp = document.createElement('div');
+    timestamp.className = 'timestamp';
+    timestamp.textContent = new Date().toLocaleTimeString();
+    const textDiv = document.createElement('div');
+    textDiv.className = 'text';
+    textDiv.textContent = text;
+    item.appendChild(timestamp);
+    item.appendChild(textDiv);
+    transcriptionContainer.appendChild(item);
+    // Auto-scroll to bottom
+    transcriptionContainer.scrollTop = transcriptionContainer.scrollHeight;
+}
+// Clear transcriptions
+function clearTranscriptions() {
+    transcriptionContainer.innerHTML = `
+        <div class="empty-state">
+            <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 11a7 7 0 01-7 7m0 0a7 7 0 01-7-7m7 7v4m0 0H8m4 0h4m-4-8a3 3 0 01-3-3V5a3 3 0 116 0v6a3 3 0 01-3 3z" />
+            </svg>
+            <p>Click "Start Recording" to begin transcribing Armenian speech</p>
+        </div>
+    `;
+    chunkCount = 0;
+    chunkCountDisplay.textContent = '0';
+    recordingTimeDisplay.textContent = '00:00';
+}
+// Event listeners
+startBtn.addEventListener('click', startRecording);
+stopBtn.addEventListener('click', stopRecording);
+clearBtn.addEventListener('click', clearTranscriptions);
+// Check WebGPU support
+if (useWebGPUCheckbox.checked && !navigator.gpu) {
+    status.textContent = 'WebGPU not supported, falling back to WASM';
+    status.className = 'error';
+    useWebGPUCheckbox.checked = false;
+    setTimeout(() => initModel(), 2000);
+} else {
+    // Initialize model on load
+    initModel();
 }
+// Re-initialize if WebGPU setting changes
+useWebGPUCheckbox.addEventListener('change', () => {
+    if (isRecording) {
+        alert('Please stop recording before changing acceleration settings');
+        useWebGPUCheckbox.checked = !useWebGPUCheckbox.checked;
+        return;
+    }
+    status.textContent = 'Reinitializing model...';
+    status.className = 'loading';
+    startBtn.disabled = true;
+    initModel();
+});