Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>Granite Speech WebGPU</title> | |
| <link rel="stylesheet" href="style.css?v=3"> | |
| </head> | |
| <body> | |
| <!-- Browser compatibility banner (hidden by default) --> | |
| <div class="browser-error" id="browserError" style="display: none;"> | |
| <div class="browser-error-content"> | |
| <h2>Browser Not Supported</h2> | |
| <p>This demo requires <strong>WebGPU</strong> on a desktop browser. Please use <strong>Google Chrome 113+</strong> or <strong>Microsoft Edge 113+</strong>.</p> | |
| </div> | |
| </div> | |
| <div class="container" id="appContainer"> | |
| <!-- Header --> | |
| <h1>Granite Speech WebGPU</h1> | |
| <h2>Speech recognition and translation directly in your browser</h2> | |
| <!-- Audio Input Card --> | |
| <div class="input-card"> | |
| <div class="input-options"> | |
| <button id="recordBtn" class="input-tile" disabled> | |
| <svg class="mic-icon" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor"> | |
| <path stroke-linecap="round" stroke-linejoin="round" d="M12 18.75a6 6 0 006-6v-1.5m-6 7.5a6 6 0 01-6-6v-1.5m6 7.5v3.75m-3.75 0h7.5M12 15.75a3 3 0 01-3-3V4.5a3 3 0 116 0v8.25a3 3 0 01-3 3z" /> | |
| </svg> | |
| <svg class="stop-icon" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" style="display: none;"> | |
| <rect x="6" y="6" width="12" height="12" rx="1" /> | |
| </svg> | |
| <span>Record</span> | |
| </button> | |
| <div class="divider"></div> | |
| <label class="input-tile file-label"> | |
| <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor"> | |
| <path stroke-linecap="round" stroke-linejoin="round" d="M3.75 9.776c.112-.017.227-.026.344-.026h15.812c.117 0 .232.009.344.026m-16.5 0a2.25 2.25 0 00-1.883 2.542l.857 6a2.25 2.25 0 002.227 1.932H19.05a2.25 2.25 0 002.227-1.932l.857-6a2.25 2.25 0 00-1.883-2.542m-16.5 0V6A2.25 2.25 0 016 3.75h3.879a1.5 1.5 0 011.06.44l2.122 2.12a1.5 1.5 0 001.06.44H18A2.25 2.25 0 0120.25 9v.776" /> | |
| </svg> | |
| <span>Upload</span> | |
| <input type="file" id="audioFile" accept="audio/*" hidden> | |
| </label> | |
| </div> | |
| <div class="progress-bar"> | |
| <div class="progress-fill" id="progressFill"></div> | |
| </div> | |
| </div> | |
| <!-- Status --> | |
| <div class="status-section" id="statusSection"> | |
| <span class="status-dot" id="statusDot"></span> | |
| <span id="statusText">Loading...</span> | |
| </div> | |
| <!-- Audio Player (hidden initially) --> | |
| <div class="audio-player" id="audioPreview" style="display: none;"> | |
| <button class="play-btn" id="playBtn"> | |
| <svg class="play-icon" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor"> | |
| <path d="M8 5v14l11-7z"/> | |
| </svg> | |
| <svg class="pause-icon" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" style="display: none;"> | |
| <path d="M6 19h4V5H6v14zm8-14v14h4V5h-4z"/> | |
| </svg> | |
| </button> | |
| <div class="waveform-container"> | |
| <canvas id="waveformCanvas"></canvas> | |
| <div class="waveform-progress" id="waveformProgress"></div> | |
| </div> | |
| <span class="audio-time" id="audioTime">0:00</span> | |
| <button class="clear-btn" id="clearBtn" title="Clear"> | |
| <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor"> | |
| <path stroke-linecap="round" stroke-linejoin="round" d="M6 18L18 6M6 6l12 12" /> | |
| </svg> | |
| </button> | |
| <audio id="audioPlayer" style="display: none;"></audio> | |
| </div> | |
| <!-- Transcribe Section --> | |
| <div class="transcribe-section" id="transcribeSection" style="display: none;"> | |
| <div class="task-row"> | |
| <label for="promptSelect">Task:</label> | |
| <select id="promptSelect"> | |
| <option value="transcribe">Transcribe</option> | |
| <option value="translate_en">Translate to English</option> | |
| <option value="translate_fr">Translate to French</option> | |
| <option value="translate_de">Translate to German</option> | |
| <option value="translate_es">Translate to Spanish</option> | |
| <option value="translate_pt">Translate to Portuguese</option> | |
| <option value="translate_ja">Translate to Japanese</option> | |
| </select> | |
| </div> | |
| <div class="checkbox-group"> | |
| <label class="checkbox-row"> | |
| <input type="checkbox" id="vadCheckbox" checked> | |
| <span>VAD segmentation</span> | |
| </label> | |
| <label class="checkbox-row"> | |
| <input type="checkbox" id="punctuationCheckbox" checked> | |
| <span>Punctuation</span> | |
| </label> | |
| </div> | |
| <button id="transcribeBtn" class="transcribe-btn" disabled> | |
| Transcribe | |
| </button> | |
| </div> | |
| <!-- Model Loading Progress --> | |
| <div class="model-progress" id="progressSection" style="display: none;"> | |
| <span id="progressText">Loading model...</span> | |
| </div> | |
| <!-- Transcript Output --> | |
| <div class="transcript-card" id="transcriptCard" style="display: none;"> | |
| <div class="transcript-header"> | |
| <span>Transcript</span> | |
| <div class="transcript-actions"> | |
| <button id="copyBtn" class="icon-btn" title="Copy"> | |
| <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor"> | |
| <path stroke-linecap="round" stroke-linejoin="round" d="M15.666 3.888A2.25 2.25 0 0013.5 2.25h-3c-1.03 0-1.9.693-2.166 1.638m7.332 0c.055.194.084.4.084.612v0a.75.75 0 01-.75.75H9.75a.75.75 0 01-.75-.75v0c0-.212.03-.418.084-.612m7.332 0c.646.049 1.288.11 1.927.184 1.1.128 1.907 1.077 1.907 2.185V19.5a2.25 2.25 0 01-2.25 2.25H6.75A2.25 2.25 0 014.5 19.5V6.257c0-1.108.806-2.057 1.907-2.185a48.208 48.208 0 011.927-.184" /> | |
| </svg> | |
| </button> | |
| <button id="downloadBtn" class="icon-btn" title="Download"> | |
| <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor"> | |
| <path stroke-linecap="round" stroke-linejoin="round" d="M3 16.5v2.25A2.25 2.25 0 005.25 21h13.5A2.25 2.25 0 0021 18.75V16.5M16.5 12L12 16.5m0 0L7.5 12m4.5 4.5V3" /> | |
| </svg> | |
| </button> | |
| </div> | |
| </div> | |
| <div class="transcript-output" id="outputText"></div> | |
| </div> | |
| <!-- Footer --> | |
| <div class="footer"> | |
| Made with | |
| <a href="https://huggingface.co/ibm-granite/granite-4.0-1b-speech" target="_blank">Granite 4.0 1B Speech</a> (quantized) | |
| and | |
| <a href="https://huggingface.co/docs/transformers.js" target="_blank">Transformers.js</a> | |
| <br> | |
| <span class="privacy-note">Your audio and transcription never leave your device</span> | |
| </div> | |
| <div class="gpu-info" id="gpuInfo"></div> | |
| </div> | |
| <script> | |
| (async () => { | |
| let supported = false; | |
| let reason = ''; | |
| try { | |
| if (navigator.gpu) { | |
| const adapter = await navigator.gpu.requestAdapter(); | |
| if (adapter) { | |
| const info = await adapter.requestAdapterInfo?.() || {}; | |
| const isSoftware = /swiftshader|llvmpipe|software/i.test(info.description || ''); | |
| if (!isSoftware) supported = true; | |
| else reason = 'Software rendering detected — a GPU with WebGPU support is required.'; | |
| } | |
| } | |
| } catch (e) {} | |
| const isMobile = /Android|iPhone|iPad|iPod|Mobile/i.test(navigator.userAgent); | |
| if (isMobile) { | |
| supported = false; | |
| reason = 'Mobile browsers are not yet supported. This demo requires ~1.4 GB of GPU memory which exceeds mobile device limits. Please use a desktop browser.'; | |
| } | |
| if (!supported) { | |
| document.getElementById('browserError').style.display = 'flex'; | |
| document.getElementById('appContainer').style.display = 'none'; | |
| if (reason) { | |
| document.querySelector('.browser-error-content p').textContent = reason; | |
| } | |
| } | |
| })(); | |
| </script> | |
| <!-- ORT global is retained for VAD (vad.js) and punctuation (punctuator.js) which use WASM --> | |
| <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.all.min.js"></script> | |
| <script src="vad.js?v=1"></script> | |
| <script src="punctuator.js?v=3"></script> | |
| <script type="module" src="app.js?v=55"></script> | |
| </body> | |
| </html> | |