| | <!DOCTYPE html> |
| | <html lang="en"> |
| | <head> |
| | <meta charset="UTF-8"> |
| | <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| | <title>Voxtral Realtime 4B β Live Speech-to-Text</title> |
| | <link rel="preconnect" href="https://fonts.googleapis.com"> |
| | <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> |
| | <link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;500;600;700&family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet"> |
| | <style> |
| | * { box-sizing: border-box; margin: 0; padding: 0; } |
| | |
| | body { |
| | font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif; |
| | background-color: #FFFAEB; |
| | background-image: |
| | linear-gradient(#E9E2CB 1px, transparent 1px), |
| | linear-gradient(90deg, #E9E2CB 1px, transparent 1px); |
| | background-size: 40px 40px; |
| | color: #1E1E1E; |
| | min-height: 100vh; |
| | display: flex; flex-direction: column; |
| | align-items: center; padding: 2rem 1rem; |
| | } |
| | a { color: #FF8205; text-decoration: none; } |
| | a:hover { text-decoration: underline; } |
| | |
| | .container { max-width: 680px; width: 100%; } |
| | |
| | |
| | .header-card { |
| | background: linear-gradient(135deg, #FFFAEB 0%, #FFF0C3 100%); |
| | border: 2px solid #E9E2CB; |
| | border-top: 4px solid #FF8205; |
| | border-radius: 8px; |
| | padding: 1.75rem 2rem; |
| | margin-bottom: 1.25rem; |
| | box-shadow: 0 4px 24px rgba(0,0,0,0.06); |
| | } |
| | .header-title { |
| | font-size: 1.6rem; font-weight: 700; color: #1E1E1E; |
| | letter-spacing: -0.02em; |
| | display: flex; align-items: center; gap: 0.6rem; |
| | } |
| | .header-title .accent { color: #FF8205; } |
| | .header-subtitle { |
| | color: #555; font-size: 0.85rem; margin-top: 0.35rem; |
| | } |
| | .header-links { |
| | display: flex; align-items: center; gap: 0.5rem; flex-wrap: wrap; |
| | margin-top: 0.6rem; font-size: 0.72rem; color: #888; |
| | font-family: 'JetBrains Mono', monospace; |
| | } |
| | .header-links .sep { color: #E9E2CB; } |
| | |
| | |
| | .load-card { |
| | background: rgba(255,255,255,0.6); |
| | border: 2px solid #E9E2CB; |
| | border-radius: 8px; |
| | padding: 2rem; |
| | text-align: center; |
| | box-shadow: 0 4px 24px rgba(0,0,0,0.04); |
| | margin-bottom: 1.25rem; |
| | } |
| | .load-card.hidden { display: none; } |
| | .config-row { |
| | display: flex; gap: 0.5rem; justify-content: center; |
| | margin-bottom: 1.25rem; |
| | } |
| | .config-row select { |
| | background: #FFFAEB; border: 1.5px solid #E9E2CB; border-radius: 4px; |
| | padding: 0.5rem 0.75rem; color: #1E1E1E; font-size: 0.8rem; |
| | font-family: 'JetBrains Mono', monospace; cursor: pointer; |
| | } |
| | .config-row select:focus { outline: none; border-color: #FF8205; } |
| | .load-btn { |
| | background: #FF8205; border: none; border-radius: 4px; |
| | padding: 0.75rem 2.5rem; color: #fff; |
| | font-family: 'JetBrains Mono', monospace; |
| | font-size: 0.8rem; font-weight: 700; |
| | text-transform: uppercase; letter-spacing: 0.08em; |
| | cursor: pointer; transition: all 0.2s; |
| | } |
| | .load-btn:hover { background: #E67300; } |
| | .load-btn:disabled { opacity: 0.5; cursor: not-allowed; } |
| | .load-hint { |
| | font-size: 0.7rem; color: #888; margin-top: 1rem; |
| | font-family: 'JetBrains Mono', monospace; |
| | } |
| | .progress-wrap { |
| | margin-top: 1rem; opacity: 0; transition: opacity 0.3s; |
| | } |
| | .progress-wrap.visible { opacity: 1; } |
| | .progress-track { |
| | width: 100%; height: 4px; background: #E9E2CB; border-radius: 2px; |
| | overflow: hidden; |
| | } |
| | .progress-fill { |
| | height: 100%; width: 0%; border-radius: 2px; |
| | background: #FF8205; transition: width 0.3s; |
| | } |
| | .load-status { |
| | font-family: 'JetBrains Mono', monospace; |
| | font-size: 0.7rem; color: #888; margin-top: 0.4rem; |
| | min-height: 1.2em; |
| | } |
| | |
| | |
| | .transcript-card { |
| | background: #FFFAEB; |
| | border: 2px solid #E9E2CB; |
| | border-radius: 8px; |
| | box-shadow: 0 8px 32px rgba(0,0,0,0.06); |
| | overflow: hidden; |
| | margin-bottom: 0.75rem; |
| | } |
| | .card-header { |
| | background: rgba(255,255,255,0.6); |
| | border-bottom: 1px solid #E9E2CB; |
| | padding: 0.6rem 1rem; |
| | display: flex; align-items: center; justify-content: space-between; |
| | } |
| | .card-header-left { |
| | display: flex; align-items: center; gap: 0.6rem; |
| | } |
| | .card-title { |
| | font-family: 'JetBrains Mono', monospace; |
| | font-size: 0.6rem; font-weight: 700; color: #1E1E1E; |
| | text-transform: uppercase; letter-spacing: 0.1em; |
| | } |
| | .status-badge { |
| | display: inline-flex; align-items: center; gap: 0.4rem; |
| | padding: 0.2rem 0.6rem; border-radius: 2px; |
| | font-family: 'JetBrains Mono', monospace; |
| | font-size: 0.55rem; font-weight: 700; |
| | text-transform: uppercase; letter-spacing: 0.08em; |
| | } |
| | .status-idle { |
| | background: #f0f0f0; color: #888; border: 1px solid #E9E2CB; |
| | } |
| | .status-listening { |
| | background: rgba(255,130,5,0.15); color: #CC6A04; |
| | border: 1px solid #FF8205; |
| | } |
| | .status-transcribing { |
| | background: rgba(255,130,5,0.25); color: #CC6A04; |
| | border: 1px solid #FF8205; |
| | } |
| | .status-dot { |
| | width: 6px; height: 6px; border-radius: 50%; |
| | background: currentColor; |
| | } |
| | .status-dot.pulse { |
| | animation: pulse 1.2s ease-in-out infinite; |
| | } |
| | @keyframes pulse { |
| | 0%, 100% { opacity: 1; transform: scale(1); } |
| | 50% { opacity: 0.4; transform: scale(0.7); } |
| | } |
| | |
| | .card-content { |
| | padding: 1.25rem 1.25rem 1.5rem; |
| | min-height: 180px; |
| | background-image: |
| | linear-gradient(rgba(0,0,0,0.02) 1px, transparent 1px), |
| | linear-gradient(90deg, rgba(0,0,0,0.02) 1px, transparent 1px); |
| | background-size: 20px 20px; |
| | } |
| | #transcript { |
| | font-family: 'JetBrains Mono', monospace; |
| | font-size: 0.95rem; line-height: 1.8; color: #1E1E1E; |
| | white-space: pre-wrap; word-break: break-word; |
| | } |
| | #transcript.placeholder { |
| | color: #bbb; font-style: italic; |
| | } |
| | .transcript-cursor { |
| | display: inline-block; width: 8px; height: 16px; |
| | background: #FF8205; margin-left: 3px; vertical-align: middle; |
| | animation: blink 1s step-end infinite; |
| | } |
| | @keyframes blink { |
| | 0%, 100% { opacity: 1; } |
| | 50% { opacity: 0; } |
| | } |
| | .card-footer { |
| | background: rgba(255,255,255,0.4); |
| | border-top: 1px solid #E9E2CB; |
| | padding: 0.4rem 1rem; |
| | font-family: 'JetBrains Mono', monospace; |
| | font-size: 0.6rem; color: #888; |
| | display: flex; justify-content: space-between; |
| | } |
| | |
| | |
| | .controls { |
| | display: flex; flex-direction: column; align-items: center; |
| | gap: 0.75rem; margin-bottom: 1.25rem; |
| | } |
| | .mic-btn { |
| | width: 88px; height: 88px; border-radius: 50%; |
| | border: 3px solid #E9E2CB; background: rgba(255,255,255,0.7); |
| | cursor: pointer; display: flex; align-items: center; justify-content: center; |
| | transition: all 0.2s; |
| | box-shadow: 0 4px 16px rgba(0,0,0,0.06); |
| | } |
| | .mic-btn:hover { border-color: #FF8205; background: rgba(255,130,5,0.05); } |
| | .mic-btn.listening { |
| | border-color: #FF8205; background: rgba(255,130,5,0.1); |
| | box-shadow: 0 0 0 0 rgba(255,130,5,0.3); |
| | animation: ring 2s ease-out infinite; |
| | } |
| | .mic-btn.disabled { opacity: 0.3; cursor: not-allowed; pointer-events: none; } |
| | @keyframes ring { |
| | 0% { box-shadow: 0 0 0 0 rgba(255,130,5,0.3); } |
| | 100% { box-shadow: 0 0 0 20px rgba(255,130,5,0); } |
| | } |
| | .mic-btn svg { width: 36px; height: 36px; fill: #999; transition: fill 0.2s; } |
| | .mic-btn.listening svg { fill: #FF8205; } |
| | |
| | .waveform { width: 100%; max-width: 480px; height: 44px; } |
| | .waveform canvas { |
| | width: 100%; height: 100%; display: block; border-radius: 4px; |
| | border: 1px solid #E9E2CB; background: rgba(255,255,255,0.4); |
| | } |
| | |
| | |
| | .footer { |
| | text-align: center; font-size: 0.65rem; color: #aaa; |
| | font-family: 'JetBrains Mono', monospace; |
| | margin-top: 0.5rem; |
| | } |
| | </style> |
| | <script type="module" crossorigin src="/assets/index-BuZjYFHI.js"></script> |
| | </head> |
| | <body> |
| | <div class="container"> |
| |
|
| | |
| | <div class="header-card"> |
| | <div class="header-title"> |
| | <span class="accent">Voxtral</span> Realtime 4B |
| | </div> |
| | <p class="header-subtitle"> |
| | Real-time speech transcription running entirely in your browser via WebGPU |
| | </p> |
| | <div class="header-links"> |
| | <a href="https://huggingface.co/mistralai/Voxtral-Mini-4B-Realtime-2602" target="_blank">mistralai/Voxtral-Mini-4B-Realtime-2602</a> |
| | <span class="sep">·</span> |
| | <a href="https://huggingface.co/onnx-community/Voxtral-Mini-4B-Realtime-2602-ONNX" target="_blank">ONNX weights</a> |
| | <span class="sep">·</span> |
| | <a href="https://huggingface.co/docs/transformers.js" target="_blank">transformers.js</a> |
| | </div> |
| | </div> |
| |
|
| | |
| | <div class="load-card" id="loadSection"> |
| | <div class="config-row"> |
| | <select id="dtype" title="Quantization"> |
| | <option value="q4">q4 (~1.5 GB)</option> |
| | <option value="q4f16">q4f16 (~1.5 GB)</option> |
| | <option value="fp16">fp16 (~8 GB)</option> |
| | </select> |
| | <select id="device" title="Backend"> |
| | <option value="webgpu">WebGPU</option> |
| | <option value="wasm">WASM (CPU)</option> |
| | </select> |
| | </div> |
| | <button class="load-btn" id="loadBtn" onclick="window.__loadModel()">Load Model</button> |
| | <div class="load-hint">Requires WebGPU (Chrome 113+, Edge 113+). Models are cached after first download.</div> |
| | <div class="progress-wrap" id="progressWrap"> |
| | <div class="progress-track"><div class="progress-fill" id="progressFill"></div></div> |
| | <div class="load-status" id="loadStatus"></div> |
| | </div> |
| | </div> |
| |
|
| | |
| | <div id="activeUI" style="display:none"> |
| | <div class="controls"> |
| | <button class="mic-btn" id="micBtn" title="Start / stop listening"> |
| | <svg viewBox="0 0 24 24"><path d="M12 14c1.66 0 3-1.34 3-3V5c0-1.66-1.34-3-3-3S9 3.34 9 5v6c0 1.66 1.34 3 3 3zm-1-9c0-.55.45-1 1-1s1 .45 1 1v6c0 .55-.45 1-1 1s-1-.45-1-1V5zm6 6c0 2.76-2.24 5-5 5s-5-2.24-5-5H5c0 3.53 2.61 6.43 6 6.92V21h2v-3.08c3.39-.49 6-3.39 6-6.92h-2z"/></svg> |
| | </button> |
| | <div class="waveform"><canvas id="waveCanvas"></canvas></div> |
| | </div> |
| | </div> |
| |
|
| | |
| | <div class="transcript-card" id="transcriptCard" style="display:none"> |
| | <div class="card-header"> |
| | <div class="card-header-left"> |
| | <span class="card-title">Transcript</span> |
| | </div> |
| | <span class="status-badge status-idle" id="statusBadge"> |
| | <span class="status-dot" id="statusDot"></span> |
| | <span id="statusText">Idle</span> |
| | </span> |
| | </div> |
| | <div class="card-content"> |
| | <div id="transcript" class="placeholder">Press the mic button and start speaking...</div> |
| | </div> |
| | <div class="card-footer"> |
| | <span id="timing"></span> |
| | <span>voxtral-realtime-4b · in-browser</span> |
| | </div> |
| | </div> |
| |
|
| | <div class="footer"> |
| | No data leaves your device · Powered by <a href="https://huggingface.co/docs/transformers.js" target="_blank">transformers.js</a> + WebGPU |
| | </div> |
| | </div> |
| |
|
| | </body> |
| | </html> |
| |
|