Upload folder using huggingface_hub
Browse files- assets/{index-CDAO7ueL.js → index-_bf5fS8E.js} +0 -0
- index.html +151 -65
assets/{index-CDAO7ueL.js → index-_bf5fS8E.js}
RENAMED
|
The diff for this file is too large to render.
See raw diff
|
|
|
index.html
CHANGED
|
@@ -6,114 +6,200 @@
|
|
| 6 |
<title>Voxtral Realtime 4B — Live Speech-to-Text</title>
|
| 7 |
<style>
|
| 8 |
* { box-sizing: border-box; margin: 0; padding: 0; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
body {
|
| 10 |
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
|
| 11 |
-
background:
|
| 12 |
min-height: 100vh; display: flex; flex-direction: column;
|
| 13 |
align-items: center; padding: 2rem 1rem;
|
| 14 |
}
|
|
|
|
|
|
|
| 15 |
.container { max-width: 580px; width: 100%; }
|
| 16 |
-
h1 { font-size: 1.3rem; text-align: center; }
|
| 17 |
-
.subtitle { color: #666; font-size: 0.8rem; text-align: center; margin: 0.25rem 0 1.5rem; }
|
| 18 |
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
.mic-btn {
|
| 21 |
-
width:
|
| 22 |
-
border:
|
| 23 |
cursor: pointer; display: flex; align-items: center; justify-content: center;
|
| 24 |
-
transition: all 0.2s;
|
| 25 |
}
|
| 26 |
-
.mic-btn:hover { border-color:
|
| 27 |
-
.mic-btn.
|
| 28 |
-
border-color:
|
| 29 |
-
box-shadow: 0 0 0 0 rgba(
|
| 30 |
animation: ring 2s ease-out infinite;
|
| 31 |
}
|
| 32 |
-
.mic-btn.disabled { opacity: 0.
|
| 33 |
@keyframes ring {
|
| 34 |
-
0% { box-shadow: 0 0 0 0 rgba(
|
| 35 |
-
100% { box-shadow: 0 0 0
|
| 36 |
}
|
| 37 |
-
.mic-btn svg { width:
|
| 38 |
-
.mic-btn.
|
| 39 |
|
| 40 |
-
|
|
|
|
| 41 |
.waveform canvas { width: 100%; height: 100%; display: block; border-radius: 6px; }
|
| 42 |
|
|
|
|
| 43 |
#status {
|
| 44 |
-
text-align: center; font-size: 0.
|
| 45 |
-
min-height: 1.2em; margin: 0.
|
| 46 |
-
}
|
| 47 |
-
|
| 48 |
-
.progress-bar {
|
| 49 |
-
width: 100%; height: 3px; background: #1a1a1a; border-radius: 2px;
|
| 50 |
-
overflow: hidden; margin: 0.5rem 0; opacity: 0; transition: opacity 0.3s;
|
| 51 |
-
}
|
| 52 |
-
.progress-bar.visible { opacity: 1; }
|
| 53 |
-
.progress-bar .fill {
|
| 54 |
-
height: 100%; background: #2563eb; border-radius: 2px;
|
| 55 |
-
transition: width 0.3s; width: 0%;
|
| 56 |
}
|
| 57 |
|
|
|
|
| 58 |
.transcript-card {
|
| 59 |
-
background:
|
| 60 |
-
padding: 1.25rem; margin-top:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
}
|
| 62 |
-
.transcript-label { font-size: 0.7rem; color: #555; text-transform: uppercase; letter-spacing: 0.08em; margin-bottom: 0.75rem; }
|
| 63 |
#transcript {
|
| 64 |
font-family: 'SF Mono', 'Fira Code', 'Cascadia Code', monospace;
|
| 65 |
-
font-size: 0.
|
| 66 |
white-space: pre-wrap; word-break: break-word;
|
| 67 |
}
|
| 68 |
-
#transcript.placeholder { color: #
|
| 69 |
|
| 70 |
-
|
| 71 |
-
display: flex; gap: 0.5rem; justify-content: center; margin-top: 1.25rem;
|
| 72 |
-
}
|
| 73 |
-
.config-row select {
|
| 74 |
-
background: #141414; border: 1px solid #282828; border-radius: 6px;
|
| 75 |
-
padding: 0.4rem 0.6rem; color: #999; font-size: 0.75rem;
|
| 76 |
-
}
|
| 77 |
.timing {
|
| 78 |
-
text-align: center; font-size: 0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
}
|
| 80 |
</style>
|
| 81 |
-
<script type="module" crossorigin src="/assets/index-
|
| 82 |
</head>
|
| 83 |
<body>
|
| 84 |
<div class="container">
|
| 85 |
-
<
|
| 86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
-
<
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
</div>
|
| 93 |
|
| 94 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
-
|
| 97 |
-
|
|
|
|
| 98 |
|
| 99 |
-
<div class="transcript-card">
|
| 100 |
<div class="transcript-label">Transcript</div>
|
| 101 |
<div id="transcript" class="placeholder">Speak into your microphone...</div>
|
| 102 |
</div>
|
|
|
|
| 103 |
|
| 104 |
-
<div class="
|
| 105 |
-
<
|
| 106 |
-
|
| 107 |
-
<option value="q4f16">q4f16</option>
|
| 108 |
-
<option value="q8">q8</option>
|
| 109 |
-
<option value="fp16">fp16</option>
|
| 110 |
-
</select>
|
| 111 |
-
<select id="device">
|
| 112 |
-
<option value="webgpu">WebGPU</option>
|
| 113 |
-
<option value="wasm">WASM</option>
|
| 114 |
-
</select>
|
| 115 |
</div>
|
| 116 |
-
<div class="timing" id="timing"></div>
|
| 117 |
</div>
|
| 118 |
|
| 119 |
</body>
|
|
|
|
| 6 |
<title>Voxtral Realtime 4B — Live Speech-to-Text</title>
|
| 7 |
<style>
|
| 8 |
* { box-sizing: border-box; margin: 0; padding: 0; }
|
| 9 |
+
:root {
|
| 10 |
+
--mistral-orange: #FF7000;
|
| 11 |
+
--mistral-amber: #FF8C00;
|
| 12 |
+
--mistral-dark: #1A1008;
|
| 13 |
+
--mistral-surface: #181210;
|
| 14 |
+
--mistral-border: #2A2018;
|
| 15 |
+
--mistral-muted: #887860;
|
| 16 |
+
--mistral-text: #F0E8E0;
|
| 17 |
+
--mistral-green: #4ADE80;
|
| 18 |
+
}
|
| 19 |
body {
|
| 20 |
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
|
| 21 |
+
background: var(--mistral-dark); color: var(--mistral-text);
|
| 22 |
min-height: 100vh; display: flex; flex-direction: column;
|
| 23 |
align-items: center; padding: 2rem 1rem;
|
| 24 |
}
|
| 25 |
+
a { color: var(--mistral-orange); text-decoration: none; }
|
| 26 |
+
a:hover { text-decoration: underline; }
|
| 27 |
.container { max-width: 580px; width: 100%; }
|
|
|
|
|
|
|
| 28 |
|
| 29 |
+
/* Header */
|
| 30 |
+
.header { text-align: center; margin-bottom: 2rem; }
|
| 31 |
+
h1 {
|
| 32 |
+
font-size: 1.5rem; font-weight: 700;
|
| 33 |
+
background: linear-gradient(135deg, var(--mistral-orange), var(--mistral-amber));
|
| 34 |
+
-webkit-background-clip: text; -webkit-text-fill-color: transparent;
|
| 35 |
+
background-clip: text;
|
| 36 |
+
}
|
| 37 |
+
.subtitle { color: var(--mistral-muted); font-size: 0.8rem; margin-top: 0.25rem; }
|
| 38 |
+
.model-link {
|
| 39 |
+
display: inline-flex; align-items: center; gap: 0.3rem;
|
| 40 |
+
font-size: 0.75rem; margin-top: 0.5rem; color: var(--mistral-muted);
|
| 41 |
+
}
|
| 42 |
+
.model-link a { color: var(--mistral-orange); }
|
| 43 |
+
|
| 44 |
+
/* Load section */
|
| 45 |
+
.load-section {
|
| 46 |
+
background: var(--mistral-surface); border: 1px solid var(--mistral-border);
|
| 47 |
+
border-radius: 12px; padding: 1.5rem; margin-bottom: 1.5rem;
|
| 48 |
+
text-align: center;
|
| 49 |
+
}
|
| 50 |
+
.load-section.hidden { display: none; }
|
| 51 |
+
.config-row {
|
| 52 |
+
display: flex; gap: 0.5rem; justify-content: center; margin-bottom: 1rem;
|
| 53 |
+
}
|
| 54 |
+
.config-row select {
|
| 55 |
+
background: var(--mistral-dark); border: 1px solid var(--mistral-border);
|
| 56 |
+
border-radius: 8px; padding: 0.5rem 0.75rem; color: var(--mistral-text);
|
| 57 |
+
font-size: 0.8rem; cursor: pointer;
|
| 58 |
+
}
|
| 59 |
+
.config-row select:focus { outline: 1px solid var(--mistral-orange); }
|
| 60 |
+
.load-btn {
|
| 61 |
+
background: linear-gradient(135deg, var(--mistral-orange), var(--mistral-amber));
|
| 62 |
+
border: none; border-radius: 10px; padding: 0.7rem 2rem;
|
| 63 |
+
color: #fff; font-size: 0.9rem; font-weight: 600;
|
| 64 |
+
cursor: pointer; transition: all 0.2s; letter-spacing: 0.02em;
|
| 65 |
+
}
|
| 66 |
+
.load-btn:hover { filter: brightness(1.1); transform: translateY(-1px); }
|
| 67 |
+
.load-btn:active { transform: translateY(0); }
|
| 68 |
+
.load-btn:disabled { opacity: 0.5; cursor: not-allowed; transform: none; filter: none; }
|
| 69 |
+
.load-hint { font-size: 0.7rem; color: var(--mistral-muted); margin-top: 0.75rem; }
|
| 70 |
+
|
| 71 |
+
/* Progress */
|
| 72 |
+
.progress-bar {
|
| 73 |
+
width: 100%; height: 4px; background: var(--mistral-dark); border-radius: 2px;
|
| 74 |
+
overflow: hidden; margin-top: 0.75rem; opacity: 0; transition: opacity 0.3s;
|
| 75 |
+
}
|
| 76 |
+
.progress-bar.visible { opacity: 1; }
|
| 77 |
+
.progress-bar .fill {
|
| 78 |
+
height: 100%; border-radius: 2px;
|
| 79 |
+
background: linear-gradient(90deg, var(--mistral-orange), var(--mistral-amber));
|
| 80 |
+
transition: width 0.3s; width: 0%;
|
| 81 |
+
}
|
| 82 |
+
#loadStatus { font-size: 0.75rem; color: var(--mistral-muted); margin-top: 0.5rem; min-height: 1.2em; }
|
| 83 |
+
|
| 84 |
+
/* Mic */
|
| 85 |
+
.mic-wrap { display: flex; justify-content: center; margin: 1.25rem 0; }
|
| 86 |
.mic-btn {
|
| 87 |
+
width: 80px; height: 80px; border-radius: 50%;
|
| 88 |
+
border: 2px solid var(--mistral-border); background: var(--mistral-surface);
|
| 89 |
cursor: pointer; display: flex; align-items: center; justify-content: center;
|
| 90 |
+
transition: all 0.2s;
|
| 91 |
}
|
| 92 |
+
.mic-btn:hover { border-color: var(--mistral-orange); }
|
| 93 |
+
.mic-btn.listening {
|
| 94 |
+
border-color: var(--mistral-green); background: rgba(74,222,128,0.05);
|
| 95 |
+
box-shadow: 0 0 0 0 rgba(74,222,128,0.3);
|
| 96 |
animation: ring 2s ease-out infinite;
|
| 97 |
}
|
| 98 |
+
.mic-btn.disabled { opacity: 0.25; cursor: not-allowed; pointer-events: none; }
|
| 99 |
@keyframes ring {
|
| 100 |
+
0% { box-shadow: 0 0 0 0 rgba(74,222,128,0.3); }
|
| 101 |
+
100% { box-shadow: 0 0 0 18px rgba(74,222,128,0); }
|
| 102 |
}
|
| 103 |
+
.mic-btn svg { width: 32px; height: 32px; fill: var(--mistral-muted); transition: fill 0.2s; }
|
| 104 |
+
.mic-btn.listening svg { fill: var(--mistral-green); }
|
| 105 |
|
| 106 |
+
/* Waveform */
|
| 107 |
+
.waveform { height: 44px; margin: 0.25rem 0; }
|
| 108 |
.waveform canvas { width: 100%; height: 100%; display: block; border-radius: 6px; }
|
| 109 |
|
| 110 |
+
/* Status */
|
| 111 |
#status {
|
| 112 |
+
text-align: center; font-size: 0.75rem; color: var(--mistral-muted);
|
| 113 |
+
min-height: 1.2em; margin: 0.25rem 0;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
}
|
| 115 |
|
| 116 |
+
/* Transcript */
|
| 117 |
.transcript-card {
|
| 118 |
+
background: var(--mistral-surface); border: 1px solid var(--mistral-border);
|
| 119 |
+
border-radius: 12px; padding: 1.25rem; margin-top: 0.75rem; min-height: 140px;
|
| 120 |
+
}
|
| 121 |
+
.transcript-label {
|
| 122 |
+
font-size: 0.65rem; color: var(--mistral-muted);
|
| 123 |
+
text-transform: uppercase; letter-spacing: 0.1em; margin-bottom: 0.75rem;
|
| 124 |
}
|
|
|
|
| 125 |
#transcript {
|
| 126 |
font-family: 'SF Mono', 'Fira Code', 'Cascadia Code', monospace;
|
| 127 |
+
font-size: 0.9rem; line-height: 1.7; color: var(--mistral-text);
|
| 128 |
white-space: pre-wrap; word-break: break-word;
|
| 129 |
}
|
| 130 |
+
#transcript.placeholder { color: #554830; font-style: italic; font-family: inherit; }
|
| 131 |
|
| 132 |
+
/* Timing */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
.timing {
|
| 134 |
+
text-align: center; font-size: 0.65rem; color: var(--mistral-muted);
|
| 135 |
+
margin-top: 0.5rem;
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
/* Footer */
|
| 139 |
+
.footer {
|
| 140 |
+
margin-top: 2rem; text-align: center; font-size: 0.65rem;
|
| 141 |
+
color: var(--mistral-muted); line-height: 1.6;
|
| 142 |
}
|
| 143 |
</style>
|
| 144 |
+
<script type="module" crossorigin src="/assets/index-_bf5fS8E.js"></script>
|
| 145 |
</head>
|
| 146 |
<body>
|
| 147 |
<div class="container">
|
| 148 |
+
<div class="header">
|
| 149 |
+
<h1>Voxtral Realtime 4B</h1>
|
| 150 |
+
<p class="subtitle">Speech-to-Text in the browser with transformers.js + WebGPU</p>
|
| 151 |
+
<p class="model-link">
|
| 152 |
+
Model: <a href="https://huggingface.co/mistralai/Voxtral-Mini-4B-Realtime-2602" target="_blank" rel="noopener">
|
| 153 |
+
mistralai/Voxtral-Mini-4B-Realtime-2602
|
| 154 |
+
</a>
|
| 155 |
+
·
|
| 156 |
+
ONNX: <a href="https://huggingface.co/onnx-community/Voxtral-Mini-4B-Realtime-2602-ONNX" target="_blank" rel="noopener">
|
| 157 |
+
onnx-community/Voxtral-Mini-4B-Realtime-2602-ONNX
|
| 158 |
+
</a>
|
| 159 |
+
</p>
|
| 160 |
+
</div>
|
| 161 |
|
| 162 |
+
<!-- Load Model -->
|
| 163 |
+
<div class="load-section" id="loadSection">
|
| 164 |
+
<div class="config-row">
|
| 165 |
+
<select id="dtype" title="Quantization">
|
| 166 |
+
<option value="q4">q4 (~1.5 GB)</option>
|
| 167 |
+
<option value="q4f16">q4f16 (~1.5 GB)</option>
|
| 168 |
+
<option value="fp16">fp16 (~8 GB)</option>
|
| 169 |
+
</select>
|
| 170 |
+
<select id="device" title="Backend">
|
| 171 |
+
<option value="webgpu">WebGPU</option>
|
| 172 |
+
<option value="wasm">WASM (CPU)</option>
|
| 173 |
+
</select>
|
| 174 |
+
</div>
|
| 175 |
+
<button class="load-btn" id="loadBtn" onclick="window.__loadModel()">Load Model</button>
|
| 176 |
+
<div class="load-hint">Requires a browser with WebGPU support (Chrome 113+, Edge 113+)</div>
|
| 177 |
+
<div class="progress-bar" id="progressBar"><div class="fill" id="progressFill"></div></div>
|
| 178 |
+
<div id="loadStatus"></div>
|
| 179 |
</div>
|
| 180 |
|
| 181 |
+
<!-- Active UI (hidden until model loads) -->
|
| 182 |
+
<div id="activeUI" style="display:none">
|
| 183 |
+
<div class="mic-wrap">
|
| 184 |
+
<button class="mic-btn" id="micBtn" title="Click to start/stop listening">
|
| 185 |
+
<svg viewBox="0 0 24 24"><path d="M12 14c1.66 0 3-1.34 3-3V5c0-1.66-1.34-3-3-3S9 3.34 9 5v6c0 1.66 1.34 3 3 3zm-1-9c0-.55.45-1 1-1s1 .45 1 1v6c0 .55-.45 1-1 1s-1-.45-1-1V5zm6 6c0 2.76-2.24 5-5 5s-5-2.24-5-5H5c0 3.53 2.61 6.43 6 6.92V21h2v-3.08c3.39-.49 6-3.39 6-6.92h-2z"/></svg>
|
| 186 |
+
</button>
|
| 187 |
+
</div>
|
| 188 |
|
| 189 |
+
<div class="waveform"><canvas id="waveCanvas"></canvas></div>
|
| 190 |
+
<div id="status">Click the mic to start listening</div>
|
| 191 |
+
</div>
|
| 192 |
|
| 193 |
+
<div class="transcript-card" id="transcriptCard" style="display:none">
|
| 194 |
<div class="transcript-label">Transcript</div>
|
| 195 |
<div id="transcript" class="placeholder">Speak into your microphone...</div>
|
| 196 |
</div>
|
| 197 |
+
<div class="timing" id="timing"></div>
|
| 198 |
|
| 199 |
+
<div class="footer">
|
| 200 |
+
Powered by <a href="https://huggingface.co/docs/transformers.js" target="_blank">transformers.js</a>
|
| 201 |
+
· Runs entirely in your browser
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
</div>
|
|
|
|
| 203 |
</div>
|
| 204 |
|
| 205 |
</body>
|