granite-speech-webgpu / index.html
gsaon's picture
Update index.html
e8fd884 verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Granite Speech WebGPU</title>
<link rel="stylesheet" href="style.css?v=3">
</head>
<body>
<!-- Browser compatibility banner (hidden by default) -->
<div class="browser-error" id="browserError" style="display: none;">
<div class="browser-error-content">
<h2>Browser Not Supported</h2>
<p>This demo requires <strong>WebGPU</strong> on a desktop browser. Please use <strong>Google Chrome 113+</strong> or <strong>Microsoft Edge 113+</strong>.</p>
</div>
</div>
<div class="container" id="appContainer">
<!-- Header -->
<h1>Granite Speech WebGPU</h1>
<h2>Speech recognition and translation directly in your browser</h2>
<!-- Audio Input Card -->
<div class="input-card">
<div class="input-options">
<button id="recordBtn" class="input-tile" disabled>
<svg class="mic-icon" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" d="M12 18.75a6 6 0 006-6v-1.5m-6 7.5a6 6 0 01-6-6v-1.5m6 7.5v3.75m-3.75 0h7.5M12 15.75a3 3 0 01-3-3V4.5a3 3 0 116 0v8.25a3 3 0 01-3 3z" />
</svg>
<svg class="stop-icon" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" style="display: none;">
<rect x="6" y="6" width="12" height="12" rx="1" />
</svg>
<span>Record</span>
</button>
<div class="divider"></div>
<label class="input-tile file-label">
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" d="M3.75 9.776c.112-.017.227-.026.344-.026h15.812c.117 0 .232.009.344.026m-16.5 0a2.25 2.25 0 00-1.883 2.542l.857 6a2.25 2.25 0 002.227 1.932H19.05a2.25 2.25 0 002.227-1.932l.857-6a2.25 2.25 0 00-1.883-2.542m-16.5 0V6A2.25 2.25 0 016 3.75h3.879a1.5 1.5 0 011.06.44l2.122 2.12a1.5 1.5 0 001.06.44H18A2.25 2.25 0 0120.25 9v.776" />
</svg>
<span>Upload</span>
<input type="file" id="audioFile" accept="audio/*" hidden>
</label>
</div>
<div class="progress-bar">
<div class="progress-fill" id="progressFill"></div>
</div>
</div>
<!-- Status -->
<div class="status-section" id="statusSection">
<span class="status-dot" id="statusDot"></span>
<span id="statusText">Loading...</span>
</div>
<!-- Audio Player (hidden initially) -->
<div class="audio-player" id="audioPreview" style="display: none;">
<button class="play-btn" id="playBtn">
<svg class="play-icon" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor">
<path d="M8 5v14l11-7z"/>
</svg>
<svg class="pause-icon" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" style="display: none;">
<path d="M6 19h4V5H6v14zm8-14v14h4V5h-4z"/>
</svg>
</button>
<div class="waveform-container">
<canvas id="waveformCanvas"></canvas>
<div class="waveform-progress" id="waveformProgress"></div>
</div>
<span class="audio-time" id="audioTime">0:00</span>
<button class="clear-btn" id="clearBtn" title="Clear">
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" d="M6 18L18 6M6 6l12 12" />
</svg>
</button>
<audio id="audioPlayer" style="display: none;"></audio>
</div>
<!-- Transcribe Section -->
<div class="transcribe-section" id="transcribeSection" style="display: none;">
<div class="task-row">
<label for="promptSelect">Task:</label>
<select id="promptSelect">
<option value="transcribe">Transcribe</option>
<option value="translate_en">Translate to English</option>
<option value="translate_fr">Translate to French</option>
<option value="translate_de">Translate to German</option>
<option value="translate_es">Translate to Spanish</option>
<option value="translate_pt">Translate to Portuguese</option>
<option value="translate_ja">Translate to Japanese</option>
</select>
</div>
<div class="checkbox-group">
<label class="checkbox-row">
<input type="checkbox" id="vadCheckbox" checked>
<span>VAD segmentation</span>
</label>
<label class="checkbox-row">
<input type="checkbox" id="punctuationCheckbox" checked>
<span>Punctuation</span>
</label>
</div>
<button id="transcribeBtn" class="transcribe-btn" disabled>
Transcribe
</button>
</div>
<!-- Model Loading Progress -->
<div class="model-progress" id="progressSection" style="display: none;">
<span id="progressText">Loading model...</span>
</div>
<!-- Transcript Output -->
<div class="transcript-card" id="transcriptCard" style="display: none;">
<div class="transcript-header">
<span>Transcript</span>
<div class="transcript-actions">
<button id="copyBtn" class="icon-btn" title="Copy">
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" d="M15.666 3.888A2.25 2.25 0 0013.5 2.25h-3c-1.03 0-1.9.693-2.166 1.638m7.332 0c.055.194.084.4.084.612v0a.75.75 0 01-.75.75H9.75a.75.75 0 01-.75-.75v0c0-.212.03-.418.084-.612m7.332 0c.646.049 1.288.11 1.927.184 1.1.128 1.907 1.077 1.907 2.185V19.5a2.25 2.25 0 01-2.25 2.25H6.75A2.25 2.25 0 014.5 19.5V6.257c0-1.108.806-2.057 1.907-2.185a48.208 48.208 0 011.927-.184" />
</svg>
</button>
<button id="downloadBtn" class="icon-btn" title="Download">
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" d="M3 16.5v2.25A2.25 2.25 0 005.25 21h13.5A2.25 2.25 0 0021 18.75V16.5M16.5 12L12 16.5m0 0L7.5 12m4.5 4.5V3" />
</svg>
</button>
</div>
</div>
<div class="transcript-output" id="outputText"></div>
</div>
<!-- Footer -->
<div class="footer">
Made with
<a href="https://huggingface.co/ibm-granite/granite-4.0-1b-speech" target="_blank">Granite 4.0 1B Speech</a> (quantized)
and
<a href="https://huggingface.co/docs/transformers.js" target="_blank">Transformers.js</a>
<br>
<span class="privacy-note">Your audio and transcription never leave your device</span>
</div>
<div class="gpu-info" id="gpuInfo"></div>
</div>
<script>
(async () => {
let supported = false;
let reason = '';
try {
if (navigator.gpu) {
const adapter = await navigator.gpu.requestAdapter();
if (adapter) {
const info = await adapter.requestAdapterInfo?.() || {};
const isSoftware = /swiftshader|llvmpipe|software/i.test(info.description || '');
if (!isSoftware) supported = true;
else reason = 'Software rendering detected — a GPU with WebGPU support is required.';
}
}
} catch (e) {}
const isMobile = /Android|iPhone|iPad|iPod|Mobile/i.test(navigator.userAgent);
if (isMobile) {
supported = false;
reason = 'Mobile browsers are not yet supported. This demo requires ~1.4 GB of GPU memory which exceeds mobile device limits. Please use a desktop browser.';
}
if (!supported) {
document.getElementById('browserError').style.display = 'flex';
document.getElementById('appContainer').style.display = 'none';
if (reason) {
document.querySelector('.browser-error-content p').textContent = reason;
}
}
})();
</script>
<!-- ORT global is retained for VAD (vad.js) and punctuation (punctuator.js) which use WASM -->
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.24.3/dist/ort.all.min.js"></script>
<script src="vad.js?v=1"></script>
<script src="punctuator.js?v=3"></script>
<script type="module" src="app.js?v=55"></script>
</body>
</html>