Chillarmo commited on
Commit
07ef252
·
verified ·
1 Parent(s): d5e2966

Update index.js

Browse files
Files changed (1) hide show
  1. index.js +310 -57
index.js CHANGED
@@ -1,76 +1,329 @@
1
  import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.6';
2
 
3
- // Reference the elements that we will need
4
  const status = document.getElementById('status');
5
- const fileUpload = document.getElementById('upload');
6
- const imageContainer = document.getElementById('container');
7
- const example = document.getElementById('example');
 
 
 
 
 
 
8
 
9
- const EXAMPLE_URL = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/city-streets.jpg';
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- // Create a new object detection pipeline
12
- status.textContent = 'Loading model...';
13
- const detector = await pipeline('object-detection', 'Xenova/detr-resnet-50');
14
- status.textContent = 'Ready';
15
-
16
- example.addEventListener('click', (e) => {
17
- e.preventDefault();
18
- detect(EXAMPLE_URL);
19
- });
20
-
21
- fileUpload.addEventListener('change', function (e) {
22
- const file = e.target.files[0];
23
- if (!file) {
24
- return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  }
 
26
 
27
- const reader = new FileReader();
28
-
29
- // Set up a callback when the file is loaded
30
- reader.onload = e2 => detect(e2.target.result);
 
 
31
 
32
- reader.readAsDataURL(file);
33
- });
 
 
 
 
 
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
- // Detect objects in the image
37
- async function detect(img) {
38
- imageContainer.innerHTML = '';
39
- imageContainer.style.backgroundImage = `url(${img})`;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- status.textContent = 'Analysing...';
42
- const output = await detector(img, {
43
- threshold: 0.5,
44
- percentage: true,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  });
46
- status.textContent = '';
47
- output.forEach(renderBox);
 
 
 
48
  }
49
 
50
- // Render a bounding box and label on the image
51
- function renderBox({ box, label }) {
52
- const { xmax, xmin, ymax, ymin } = box;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
- // Generate a random color for the box
55
- const color = '#' + Math.floor(Math.random() * 0xFFFFFF).toString(16).padStart(6, 0);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
- // Draw the box
58
- const boxElement = document.createElement('div');
59
- boxElement.className = 'bounding-box';
60
- Object.assign(boxElement.style, {
61
- borderColor: color,
62
- left: 100 * xmin + '%',
63
- top: 100 * ymin + '%',
64
- width: 100 * (xmax - xmin) + '%',
65
- height: 100 * (ymax - ymin) + '%',
66
- })
 
 
 
 
67
 
68
- // Draw label
69
- const labelElement = document.createElement('span');
70
- labelElement.textContent = label;
71
- labelElement.className = 'bounding-box-label';
72
- labelElement.style.backgroundColor = color;
73
 
74
- boxElement.appendChild(labelElement);
75
- imageContainer.appendChild(boxElement);
 
 
 
 
 
 
 
76
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.6';
2
 
3
+ // Get DOM elements
4
  const status = document.getElementById('status');
5
+ const startBtn = document.getElementById('startBtn');
6
+ const stopBtn = document.getElementById('stopBtn');
7
+ const clearBtn = document.getElementById('clearBtn');
8
+ const transcriptionContainer = document.getElementById('transcriptionContainer');
9
+ const chunkLengthSelect = document.getElementById('chunkLength');
10
+ const useWebGPUCheckbox = document.getElementById('useWebGPU');
11
+ const chunkCountDisplay = document.getElementById('chunkCount');
12
+ const recordingTimeDisplay = document.getElementById('recordingTime');
13
+ const visualizerBars = document.querySelectorAll('.bar');
14
 
15
+ // State
16
+ let transcriber = null;
17
+ let mediaStream = null;
18
+ let audioContext = null;
19
+ let mediaRecorder = null;
20
+ let recordedChunks = [];
21
+ let isRecording = false;
22
+ let chunkCount = 0;
23
+ let recordingStartTime = null;
24
+ let recordingInterval = null;
25
+ let analyser = null;
26
+ let animationId = null;
27
 
28
+ // Initialize the ATOM model
29
+ async function initModel() {
30
+ try {
31
+ status.textContent = 'Loading ATOM model... This may take a minute.';
32
+ status.className = 'loading';
33
+
34
+ const device = useWebGPUCheckbox.checked ? 'webgpu' : 'wasm';
35
+
36
+ // Load your custom ATOM model
37
+ transcriber = await pipeline(
38
+ 'automatic-speech-recognition',
39
+ 'Chillarmo/ATOM',
40
+ {
41
+ device: device,
42
+ progress_callback: (progress) => {
43
+ if (progress.status === 'downloading') {
44
+ const percent = Math.round((progress.loaded / progress.total) * 100);
45
+ status.textContent = `Downloading ${progress.file}: ${percent}%`;
46
+ } else if (progress.status === 'loading') {
47
+ status.textContent = `Loading ${progress.file}...`;
48
+ }
49
+ }
50
+ }
51
+ );
52
+
53
+ status.textContent = 'Model loaded! Ready to transcribe Armenian speech.';
54
+ status.className = 'ready';
55
+ startBtn.disabled = false;
56
+ } catch (error) {
57
+ console.error('Model loading error:', error);
58
+ status.textContent = `Error loading model: ${error.message}`;
59
+ status.className = 'error';
60
  }
61
+ }
62
 
63
+ // Format time as MM:SS
64
+ function formatTime(seconds) {
65
+ const mins = Math.floor(seconds / 60);
66
+ const secs = Math.floor(seconds % 60);
67
+ return `${mins.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')}`;
68
+ }
69
 
70
+ // Update recording time
71
+ function updateRecordingTime() {
72
+ if (recordingStartTime) {
73
+ const elapsed = (Date.now() - recordingStartTime) / 1000;
74
+ recordingTimeDisplay.textContent = formatTime(elapsed);
75
+ }
76
+ }
77
 
78
+ // Visualize audio
79
+ function visualizeAudio() {
80
+ if (!analyser || !isRecording) return;
81
+
82
+ const dataArray = new Uint8Array(analyser.frequencyBinCount);
83
+ analyser.getByteFrequencyData(dataArray);
84
+
85
+ // Sample the data for visualization
86
+ const barCount = visualizerBars.length;
87
+ const step = Math.floor(dataArray.length / barCount);
88
+
89
+ visualizerBars.forEach((bar, index) => {
90
+ const value = dataArray[index * step];
91
+ const height = (value / 255) * 70 + 4; // 4px minimum, 74px maximum
92
+ bar.style.height = `${height}px`;
93
+ });
94
+
95
+ animationId = requestAnimationFrame(visualizeAudio);
96
+ }
97
 
98
+ // Start recording
99
+ async function startRecording() {
100
+ try {
101
+ // Request microphone access
102
+ mediaStream = await navigator.mediaDevices.getUserMedia({
103
+ audio: {
104
+ channelCount: 1,
105
+ sampleRate: 16000,
106
+ }
107
+ });
108
+
109
+ // Set up audio context for visualization
110
+ audioContext = new AudioContext({ sampleRate: 16000 });
111
+ const source = audioContext.createMediaStreamSource(mediaStream);
112
+ analyser = audioContext.createAnalyser();
113
+ analyser.fftSize = 256;
114
+ source.connect(analyser);
115
+
116
+ // Set up MediaRecorder
117
+ mediaRecorder = new MediaRecorder(mediaStream);
118
+ recordedChunks = [];
119
+
120
+ mediaRecorder.ondataavailable = (event) => {
121
+ if (event.data.size > 0) {
122
+ recordedChunks.push(event.data);
123
+ }
124
+ };
125
+
126
+ mediaRecorder.onstop = async () => {
127
+ if (recordedChunks.length > 0) {
128
+ await processAudioChunk(recordedChunks);
129
+ recordedChunks = [];
130
+ }
131
+ };
132
+
133
+ // Start recording
134
+ const chunkDuration = parseInt(chunkLengthSelect.value) * 1000;
135
+ mediaRecorder.start();
136
+
137
+ // Schedule automatic chunk processing
138
+ const chunkInterval = setInterval(() => {
139
+ if (!isRecording) {
140
+ clearInterval(chunkInterval);
141
+ return;
142
+ }
143
+
144
+ mediaRecorder.stop();
145
+ mediaRecorder.start();
146
+ }, chunkDuration);
147
+
148
+ isRecording = true;
149
+ recordingStartTime = Date.now();
150
+ recordingInterval = setInterval(updateRecordingTime, 100);
151
+
152
+ status.textContent = 'Recording... Speak in Armenian';
153
+ status.className = 'recording';
154
+ startBtn.disabled = true;
155
+ stopBtn.disabled = false;
156
+
157
+ // Start visualization
158
+ visualizeAudio();
159
+
160
+ } catch (error) {
161
+ console.error('Error starting recording:', error);
162
+ status.textContent = `Error: ${error.message}`;
163
+ status.className = 'error';
164
+ }
165
+ }
166
 
167
+ // Stop recording
168
+ function stopRecording() {
169
+ isRecording = false;
170
+
171
+ if (mediaRecorder && mediaRecorder.state !== 'inactive') {
172
+ mediaRecorder.stop();
173
+ }
174
+
175
+ if (mediaStream) {
176
+ mediaStream.getTracks().forEach(track => track.stop());
177
+ }
178
+
179
+ if (audioContext) {
180
+ audioContext.close();
181
+ }
182
+
183
+ if (recordingInterval) {
184
+ clearInterval(recordingInterval);
185
+ }
186
+
187
+ if (animationId) {
188
+ cancelAnimationFrame(animationId);
189
+ }
190
+
191
+ // Reset visualizer
192
+ visualizerBars.forEach(bar => {
193
+ bar.style.height = '4px';
194
  });
195
+
196
+ status.textContent = 'Recording stopped. Ready for next recording.';
197
+ status.className = 'ready';
198
+ startBtn.disabled = false;
199
+ stopBtn.disabled = true;
200
  }
201
 
202
+ // Process audio chunk
203
+ async function processAudioChunk(chunks) {
204
+ try {
205
+ status.textContent = 'Processing audio...';
206
+ status.className = 'processing';
207
+
208
+ // Create audio blob
209
+ const audioBlob = new Blob(chunks, { type: 'audio/webm' });
210
+
211
+ // Convert to array buffer
212
+ const arrayBuffer = await audioBlob.arrayBuffer();
213
+
214
+ // Decode audio
215
+ const tempAudioContext = new (window.AudioContext || window.webkitAudioContext)();
216
+ const audioBuffer = await tempAudioContext.decodeAudioData(arrayBuffer);
217
+
218
+ // Get audio data as Float32Array
219
+ const audioData = audioBuffer.getChannelData(0);
220
+
221
+ // Transcribe with ATOM model
222
+ const result = await transcriber(audioData, {
223
+ sampling_rate: audioBuffer.sampleRate,
224
+ });
225
+
226
+ // Add to transcription
227
+ if (result && result.text && result.text.trim()) {
228
+ addTranscription(result.text.trim());
229
+ chunkCount++;
230
+ chunkCountDisplay.textContent = chunkCount;
231
+ }
232
+
233
+ if (isRecording) {
234
+ status.textContent = 'Recording... Speak in Armenian';
235
+ status.className = 'recording';
236
+ } else {
237
+ status.textContent = 'Ready for next recording.';
238
+ status.className = 'ready';
239
+ }
240
+
241
+ tempAudioContext.close();
242
+
243
+ } catch (error) {
244
+ console.error('Error processing audio:', error);
245
+ status.textContent = `Processing error: ${error.message}`;
246
+ status.className = 'error';
247
+
248
+ // Restore recording status if still recording
249
+ setTimeout(() => {
250
+ if (isRecording) {
251
+ status.textContent = 'Recording... Speak in Armenian';
252
+ status.className = 'recording';
253
+ }
254
+ }, 2000);
255
+ }
256
+ }
257
 
258
+ // Add transcription to UI
259
+ function addTranscription(text) {
260
+ // Remove empty state if present
261
+ const emptyState = transcriptionContainer.querySelector('.empty-state');
262
+ if (emptyState) {
263
+ emptyState.remove();
264
+ }
265
+
266
+ // Create transcription item
267
+ const item = document.createElement('div');
268
+ item.className = 'transcription-item';
269
+
270
+ const timestamp = document.createElement('div');
271
+ timestamp.className = 'timestamp';
272
+ timestamp.textContent = new Date().toLocaleTimeString();
273
+
274
+ const textDiv = document.createElement('div');
275
+ textDiv.className = 'text';
276
+ textDiv.textContent = text;
277
+
278
+ item.appendChild(timestamp);
279
+ item.appendChild(textDiv);
280
+
281
+ transcriptionContainer.appendChild(item);
282
+
283
+ // Auto-scroll to bottom
284
+ transcriptionContainer.scrollTop = transcriptionContainer.scrollHeight;
285
+ }
286
 
287
+ // Clear transcriptions
288
+ function clearTranscriptions() {
289
+ transcriptionContainer.innerHTML = `
290
+ <div class="empty-state">
291
+ <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke="currentColor">
292
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 11a7 7 0 01-7 7m0 0a7 7 0 01-7-7m7 7v4m0 0H8m4 0h4m-4-8a3 3 0 01-3-3V5a3 3 0 116 0v6a3 3 0 01-3 3z" />
293
+ </svg>
294
+ <p>Click "Start Recording" to begin transcribing Armenian speech</p>
295
+ </div>
296
+ `;
297
+ chunkCount = 0;
298
+ chunkCountDisplay.textContent = '0';
299
+ recordingTimeDisplay.textContent = '00:00';
300
+ }
301
 
302
+ // Event listeners
303
+ startBtn.addEventListener('click', startRecording);
304
+ stopBtn.addEventListener('click', stopRecording);
305
+ clearBtn.addEventListener('click', clearTranscriptions);
 
306
 
307
+ // Check WebGPU support
308
+ if (useWebGPUCheckbox.checked && !navigator.gpu) {
309
+ status.textContent = 'WebGPU not supported, falling back to WASM';
310
+ status.className = 'error';
311
+ useWebGPUCheckbox.checked = false;
312
+ setTimeout(() => initModel(), 2000);
313
+ } else {
314
+ // Initialize model on load
315
+ initModel();
316
  }
317
+
318
+ // Re-initialize if WebGPU setting changes
319
+ useWebGPUCheckbox.addEventListener('change', () => {
320
+ if (isRecording) {
321
+ alert('Please stop recording before changing acceleration settings');
322
+ useWebGPUCheckbox.checked = !useWebGPUCheckbox.checked;
323
+ return;
324
+ }
325
+ status.textContent = 'Reinitializing model...';
326
+ status.className = 'loading';
327
+ startBtn.disabled = true;
328
+ initModel();
329
+ });