Spaces:
Running
Running
Upload 2 files
Browse files- app.js +11 -5
- index.html +33 -7
app.js
CHANGED
|
@@ -59,6 +59,7 @@ const clearBtn = document.getElementById('clearBtn');
|
|
| 59 |
const progressSection = document.getElementById('progressSection');
|
| 60 |
const progressFill = document.getElementById('progressFill');
|
| 61 |
const progressText = document.getElementById('progressText');
|
|
|
|
| 62 |
const gpuInfo = document.getElementById('gpuInfo');
|
| 63 |
|
| 64 |
// Recording state
|
|
@@ -126,7 +127,7 @@ async function initModels() {
|
|
| 126 |
const fileProgress = {};
|
| 127 |
model = await GraniteSpeechForConditionalGeneration.from_pretrained(MODEL_ID, {
|
| 128 |
dtype: {
|
| 129 |
-
audio_encoder: '
|
| 130 |
embed_tokens: 'q4f16',
|
| 131 |
decoder_model_merged: 'q4f16',
|
| 132 |
},
|
|
@@ -240,10 +241,15 @@ async function transcribe() {
|
|
| 240 |
showProgress(true);
|
| 241 |
|
| 242 |
try {
|
| 243 |
-
// Get speech segments using VAD
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
|
| 248 |
// Start audio playback immediately
|
| 249 |
audioPlayer.currentTime = 0;
|
|
|
|
| 59 |
const progressSection = document.getElementById('progressSection');
|
| 60 |
const progressFill = document.getElementById('progressFill');
|
| 61 |
const progressText = document.getElementById('progressText');
|
| 62 |
+
const vadCheckbox = document.getElementById('vadCheckbox');
|
| 63 |
const gpuInfo = document.getElementById('gpuInfo');
|
| 64 |
|
| 65 |
// Recording state
|
|
|
|
| 127 |
const fileProgress = {};
|
| 128 |
model = await GraniteSpeechForConditionalGeneration.from_pretrained(MODEL_ID, {
|
| 129 |
dtype: {
|
| 130 |
+
audio_encoder: 'q4',
|
| 131 |
embed_tokens: 'q4f16',
|
| 132 |
decoder_model_merged: 'q4f16',
|
| 133 |
},
|
|
|
|
| 241 |
showProgress(true);
|
| 242 |
|
| 243 |
try {
|
| 244 |
+
// Get speech segments using VAD, or treat entire audio as one segment
|
| 245 |
+
let segments;
|
| 246 |
+
if (vadCheckbox.checked) {
|
| 247 |
+
updateProgress(5, 'Detecting speech segments...');
|
| 248 |
+
segments = await getSpeechSegments(currentAudioData, SAMPLE_RATE);
|
| 249 |
+
console.log(`VAD found ${segments.length} segment(s)`);
|
| 250 |
+
} else {
|
| 251 |
+
segments = [{ start: 0, end: currentAudioData.length / SAMPLE_RATE }];
|
| 252 |
+
}
|
| 253 |
|
| 254 |
// Start audio playback immediately
|
| 255 |
audioPlayer.currentTime = 0;
|
index.html
CHANGED
|
@@ -11,8 +11,7 @@
|
|
| 11 |
<div class="browser-error" id="browserError" style="display: none;">
|
| 12 |
<div class="browser-error-content">
|
| 13 |
<h2>Browser Not Supported</h2>
|
| 14 |
-
<p>This demo requires <strong>WebGPU</strong>
|
| 15 |
-
<p>Please use <strong>Google Chrome 113+</strong> or <strong>Microsoft Edge 113+</strong> on desktop.</p>
|
| 16 |
</div>
|
| 17 |
</div>
|
| 18 |
|
|
@@ -90,6 +89,10 @@
|
|
| 90 |
<option value="translate_ja">Translate to Japanese</option>
|
| 91 |
</select>
|
| 92 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
<label class="checkbox-row">
|
| 94 |
<input type="checkbox" id="punctuationCheckbox" checked>
|
| 95 |
<span>Add punctuation (English only)</span>
|
|
@@ -127,7 +130,7 @@
|
|
| 127 |
<!-- Footer -->
|
| 128 |
<div class="footer">
|
| 129 |
Made with
|
| 130 |
-
<a href="https://huggingface.co/ibm-granite/granite-4.0-1b-speech" target="_blank">Granite 4.0 1B Speech</a>
|
| 131 |
and
|
| 132 |
<a href="https://huggingface.co/docs/transformers.js" target="_blank">Transformers.js</a>
|
| 133 |
<br>
|
|
@@ -137,10 +140,33 @@
|
|
| 137 |
</div>
|
| 138 |
|
| 139 |
<script>
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
</script>
|
| 145 |
<!-- ORT global is retained for VAD (vad.js) and punctuation (punctuator.js) which use WASM -->
|
| 146 |
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.all.min.js"></script>
|
|
|
|
| 11 |
<div class="browser-error" id="browserError" style="display: none;">
|
| 12 |
<div class="browser-error-content">
|
| 13 |
<h2>Browser Not Supported</h2>
|
| 14 |
+
<p>This demo requires <strong>WebGPU</strong> on a desktop browser. Please use <strong>Google Chrome 113+</strong> or <strong>Microsoft Edge 113+</strong>.</p>
|
|
|
|
| 15 |
</div>
|
| 16 |
</div>
|
| 17 |
|
|
|
|
| 89 |
<option value="translate_ja">Translate to Japanese</option>
|
| 90 |
</select>
|
| 91 |
</div>
|
| 92 |
+
<label class="checkbox-row">
|
| 93 |
+
<input type="checkbox" id="vadCheckbox" checked>
|
| 94 |
+
<span>Speech segmentation (VAD)</span>
|
| 95 |
+
</label>
|
| 96 |
<label class="checkbox-row">
|
| 97 |
<input type="checkbox" id="punctuationCheckbox" checked>
|
| 98 |
<span>Add punctuation (English only)</span>
|
|
|
|
| 130 |
<!-- Footer -->
|
| 131 |
<div class="footer">
|
| 132 |
Made with
|
| 133 |
+
<a href="https://huggingface.co/ibm-granite/granite-4.0-1b-speech" target="_blank">Granite 4.0 1B Speech</a>
|
| 134 |
and
|
| 135 |
<a href="https://huggingface.co/docs/transformers.js" target="_blank">Transformers.js</a>
|
| 136 |
<br>
|
|
|
|
| 140 |
</div>
|
| 141 |
|
| 142 |
<script>
|
| 143 |
+
(async () => {
|
| 144 |
+
let supported = false;
|
| 145 |
+
let reason = '';
|
| 146 |
+
try {
|
| 147 |
+
if (navigator.gpu) {
|
| 148 |
+
const adapter = await navigator.gpu.requestAdapter();
|
| 149 |
+
if (adapter) {
|
| 150 |
+
const info = await adapter.requestAdapterInfo?.() || {};
|
| 151 |
+
const isSoftware = /swiftshader|llvmpipe|software/i.test(info.description || '');
|
| 152 |
+
if (!isSoftware) supported = true;
|
| 153 |
+
else reason = 'Software rendering detected — a GPU with WebGPU support is required.';
|
| 154 |
+
}
|
| 155 |
+
}
|
| 156 |
+
} catch (e) {}
|
| 157 |
+
const isMobile = /Android|iPhone|iPad|iPod|Mobile/i.test(navigator.userAgent);
|
| 158 |
+
if (isMobile) {
|
| 159 |
+
supported = false;
|
| 160 |
+
reason = 'Mobile browsers are not yet supported. This demo requires ~1.4 GB of GPU memory which exceeds mobile device limits. Please use a desktop browser.';
|
| 161 |
+
}
|
| 162 |
+
if (!supported) {
|
| 163 |
+
document.getElementById('browserError').style.display = 'flex';
|
| 164 |
+
document.getElementById('appContainer').style.display = 'none';
|
| 165 |
+
if (reason) {
|
| 166 |
+
document.querySelector('.browser-error-content p').textContent = reason;
|
| 167 |
+
}
|
| 168 |
+
}
|
| 169 |
+
})();
|
| 170 |
</script>
|
| 171 |
<!-- ORT global is retained for VAD (vad.js) and punctuation (punctuator.js) which use WASM -->
|
| 172 |
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.all.min.js"></script>
|