gsaon commited on
Commit
dbe1114
·
verified ·
1 Parent(s): 2abe16e

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.js +11 -5
  2. index.html +33 -7
app.js CHANGED
@@ -59,6 +59,7 @@ const clearBtn = document.getElementById('clearBtn');
59
  const progressSection = document.getElementById('progressSection');
60
  const progressFill = document.getElementById('progressFill');
61
  const progressText = document.getElementById('progressText');
 
62
  const gpuInfo = document.getElementById('gpuInfo');
63
 
64
  // Recording state
@@ -126,7 +127,7 @@ async function initModels() {
126
  const fileProgress = {};
127
  model = await GraniteSpeechForConditionalGeneration.from_pretrained(MODEL_ID, {
128
  dtype: {
129
- audio_encoder: 'q4f16',
130
  embed_tokens: 'q4f16',
131
  decoder_model_merged: 'q4f16',
132
  },
@@ -240,10 +241,15 @@ async function transcribe() {
240
  showProgress(true);
241
 
242
  try {
243
- // Get speech segments using VAD
244
- updateProgress(5, 'Detecting speech segments...');
245
- const segments = await getSpeechSegments(currentAudioData, SAMPLE_RATE);
246
- console.log(`VAD found ${segments.length} segment(s)`);
 
 
 
 
 
247
 
248
  // Start audio playback immediately
249
  audioPlayer.currentTime = 0;
 
59
  const progressSection = document.getElementById('progressSection');
60
  const progressFill = document.getElementById('progressFill');
61
  const progressText = document.getElementById('progressText');
62
+ const vadCheckbox = document.getElementById('vadCheckbox');
63
  const gpuInfo = document.getElementById('gpuInfo');
64
 
65
  // Recording state
 
127
  const fileProgress = {};
128
  model = await GraniteSpeechForConditionalGeneration.from_pretrained(MODEL_ID, {
129
  dtype: {
130
+ audio_encoder: 'q4',
131
  embed_tokens: 'q4f16',
132
  decoder_model_merged: 'q4f16',
133
  },
 
241
  showProgress(true);
242
 
243
  try {
244
+ // Get speech segments using VAD, or treat entire audio as one segment
245
+ let segments;
246
+ if (vadCheckbox.checked) {
247
+ updateProgress(5, 'Detecting speech segments...');
248
+ segments = await getSpeechSegments(currentAudioData, SAMPLE_RATE);
249
+ console.log(`VAD found ${segments.length} segment(s)`);
250
+ } else {
251
+ segments = [{ start: 0, end: currentAudioData.length / SAMPLE_RATE }];
252
+ }
253
 
254
  // Start audio playback immediately
255
  audioPlayer.currentTime = 0;
index.html CHANGED
@@ -11,8 +11,7 @@
11
  <div class="browser-error" id="browserError" style="display: none;">
12
  <div class="browser-error-content">
13
  <h2>Browser Not Supported</h2>
14
- <p>This demo requires <strong>WebGPU</strong>, which is not available in your browser.</p>
15
- <p>Please use <strong>Google Chrome 113+</strong> or <strong>Microsoft Edge 113+</strong> on desktop.</p>
16
  </div>
17
  </div>
18
 
@@ -90,6 +89,10 @@
90
  <option value="translate_ja">Translate to Japanese</option>
91
  </select>
92
  </div>
 
 
 
 
93
  <label class="checkbox-row">
94
  <input type="checkbox" id="punctuationCheckbox" checked>
95
  <span>Add punctuation (English only)</span>
@@ -127,7 +130,7 @@
127
  <!-- Footer -->
128
  <div class="footer">
129
  Made with
130
- <a href="https://huggingface.co/ibm-granite/granite-4.0-1b-speech" target="_blank">Granite 4.0 1B Speech</a> (quantized)
131
  and
132
  <a href="https://huggingface.co/docs/transformers.js" target="_blank">Transformers.js</a>
133
  <br>
@@ -137,10 +140,33 @@
137
  </div>
138
 
139
  <script>
140
- if (!navigator.gpu) {
141
- document.getElementById('browserError').style.display = 'flex';
142
- document.getElementById('appContainer').style.display = 'none';
143
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  </script>
145
  <!-- ORT global is retained for VAD (vad.js) and punctuation (punctuator.js) which use WASM -->
146
  <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.all.min.js"></script>
 
11
  <div class="browser-error" id="browserError" style="display: none;">
12
  <div class="browser-error-content">
13
  <h2>Browser Not Supported</h2>
14
+ <p>This demo requires <strong>WebGPU</strong> on a desktop browser. Please use <strong>Google Chrome 113+</strong> or <strong>Microsoft Edge 113+</strong>.</p>
 
15
  </div>
16
  </div>
17
 
 
89
  <option value="translate_ja">Translate to Japanese</option>
90
  </select>
91
  </div>
92
+ <label class="checkbox-row">
93
+ <input type="checkbox" id="vadCheckbox" checked>
94
+ <span>Speech segmentation (VAD)</span>
95
+ </label>
96
  <label class="checkbox-row">
97
  <input type="checkbox" id="punctuationCheckbox" checked>
98
  <span>Add punctuation (English only)</span>
 
130
  <!-- Footer -->
131
  <div class="footer">
132
  Made with
133
+ <a href="https://huggingface.co/ibm-granite/granite-4.0-1b-speech" target="_blank">Granite 4.0 1B Speech</a>
134
  and
135
  <a href="https://huggingface.co/docs/transformers.js" target="_blank">Transformers.js</a>
136
  <br>
 
140
  </div>
141
 
142
  <script>
143
+ (async () => {
144
+ let supported = false;
145
+ let reason = '';
146
+ try {
147
+ if (navigator.gpu) {
148
+ const adapter = await navigator.gpu.requestAdapter();
149
+ if (adapter) {
150
+ const info = await adapter.requestAdapterInfo?.() || {};
151
+ const isSoftware = /swiftshader|llvmpipe|software/i.test(info.description || '');
152
+ if (!isSoftware) supported = true;
153
+ else reason = 'Software rendering detected — a GPU with WebGPU support is required.';
154
+ }
155
+ }
156
+ } catch (e) {}
157
+ const isMobile = /Android|iPhone|iPad|iPod|Mobile/i.test(navigator.userAgent);
158
+ if (isMobile) {
159
+ supported = false;
160
+ reason = 'Mobile browsers are not yet supported. This demo requires ~1.4 GB of GPU memory which exceeds mobile device limits. Please use a desktop browser.';
161
+ }
162
+ if (!supported) {
163
+ document.getElementById('browserError').style.display = 'flex';
164
+ document.getElementById('appContainer').style.display = 'none';
165
+ if (reason) {
166
+ document.querySelector('.browser-error-content p').textContent = reason;
167
+ }
168
+ }
169
+ })();
170
  </script>
171
  <!-- ORT global is retained for VAD (vad.js) and punctuation (punctuator.js) which use WASM -->
172
  <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.all.min.js"></script>