shreyask commited on
Commit
e745b13
·
verified ·
1 Parent(s): 508140e

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -1,10 +1,19 @@
1
  ---
2
- title: Voxtral Realtime 4b
3
- emoji: 📚
4
- colorFrom: blue
5
  colorTo: green
6
  sdk: static
7
  pinned: false
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Voxtral Realtime 4B
3
+ emoji: 🎙️
4
+ colorFrom: gray
5
  colorTo: green
6
  sdk: static
7
  pinned: false
8
+ license: apache-2.0
9
+ short_description: Speech-to-Text in the browser with transformers.js + WebGPU
10
  ---
11
 
12
+ # Voxtral Realtime 4B Live Speech-to-Text
13
+
14
+ Real-time speech transcription running entirely in your browser using [Voxtral-Mini-4B-Realtime](https://huggingface.co/mistralai/Voxtral-Mini-4B-Realtime-2602) via [transformers.js](https://github.com/huggingface/transformers.js) + WebGPU.
15
+
16
+ - Click the mic to start listening
17
+ - VAD automatically detects speech segments
18
+ - Words appear as the model generates them
19
+ - All processing happens locally — no server needed
assets/index-CDAO7ueL.js ADDED
The diff for this file is too large to render. See raw diff
 
assets/ort-wasm-simd-threaded.asyncify-COni_aV6.wasm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1c17268b97461e8128bc564ba43e705d1dbad66cdf0d98e46d70e34afabadf5
3
+ size 22364280
index.html CHANGED
@@ -1,19 +1,120 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  </html>
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Voxtral Realtime 4B — Live Speech-to-Text</title>
7
+ <style>
8
+ * { box-sizing: border-box; margin: 0; padding: 0; }
9
+ body {
10
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
11
+ background: #0a0a0a; color: #e0e0e0;
12
+ min-height: 100vh; display: flex; flex-direction: column;
13
+ align-items: center; padding: 2rem 1rem;
14
+ }
15
+ .container { max-width: 580px; width: 100%; }
16
+ h1 { font-size: 1.3rem; text-align: center; }
17
+ .subtitle { color: #666; font-size: 0.8rem; text-align: center; margin: 0.25rem 0 1.5rem; }
18
+
19
+ .mic-wrap { display: flex; justify-content: center; margin: 1.5rem 0; }
20
+ .mic-btn {
21
+ width: 96px; height: 96px; border-radius: 50%;
22
+ border: 3px solid #333; background: #151515;
23
+ cursor: pointer; display: flex; align-items: center; justify-content: center;
24
+ transition: all 0.2s; position: relative;
25
+ }
26
+ .mic-btn:hover { border-color: #555; background: #1a1a1a; }
27
+ .mic-btn.recording {
28
+ border-color: #22c55e; background: #0a1a0a;
29
+ box-shadow: 0 0 0 0 rgba(34,197,94,0.4);
30
+ animation: ring 2s ease-out infinite;
31
+ }
32
+ .mic-btn.disabled { opacity: 0.3; cursor: not-allowed; }
33
+ @keyframes ring {
34
+ 0% { box-shadow: 0 0 0 0 rgba(34,197,94,0.4); }
35
+ 100% { box-shadow: 0 0 0 20px rgba(34,197,94,0); }
36
+ }
37
+ .mic-btn svg { width: 36px; height: 36px; fill: #888; transition: fill 0.2s; }
38
+ .mic-btn.recording svg { fill: #22c55e; }
39
+
40
+ .waveform { height: 48px; margin: 0.5rem 0; }
41
+ .waveform canvas { width: 100%; height: 100%; display: block; border-radius: 6px; }
42
+
43
+ #status {
44
+ text-align: center; font-size: 0.8rem; color: #666;
45
+ min-height: 1.2em; margin: 0.5rem 0;
46
+ }
47
+
48
+ .progress-bar {
49
+ width: 100%; height: 3px; background: #1a1a1a; border-radius: 2px;
50
+ overflow: hidden; margin: 0.5rem 0; opacity: 0; transition: opacity 0.3s;
51
+ }
52
+ .progress-bar.visible { opacity: 1; }
53
+ .progress-bar .fill {
54
+ height: 100%; background: #2563eb; border-radius: 2px;
55
+ transition: width 0.3s; width: 0%;
56
+ }
57
+
58
+ .transcript-card {
59
+ background: #111; border: 1px solid #1e1e1e; border-radius: 10px;
60
+ padding: 1.25rem; margin-top: 1rem; min-height: 160px;
61
+ }
62
+ .transcript-label { font-size: 0.7rem; color: #555; text-transform: uppercase; letter-spacing: 0.08em; margin-bottom: 0.75rem; }
63
+ #transcript {
64
+ font-family: 'SF Mono', 'Fira Code', 'Cascadia Code', monospace;
65
+ font-size: 0.95rem; line-height: 1.7; color: #d4d4d4;
66
+ white-space: pre-wrap; word-break: break-word;
67
+ }
68
+ #transcript.placeholder { color: #444; font-style: italic; font-family: inherit; }
69
+
70
+ .config-row {
71
+ display: flex; gap: 0.5rem; justify-content: center; margin-top: 1.25rem;
72
+ }
73
+ .config-row select {
74
+ background: #141414; border: 1px solid #282828; border-radius: 6px;
75
+ padding: 0.4rem 0.6rem; color: #999; font-size: 0.75rem;
76
+ }
77
+ .timing {
78
+ text-align: center; font-size: 0.7rem; color: #555; margin-top: 0.5rem;
79
+ }
80
+ </style>
81
+ <script type="module" crossorigin src="/assets/index-CDAO7ueL.js"></script>
82
+ </head>
83
+ <body>
84
+ <div class="container">
85
+ <h1>Voxtral Realtime 4B</h1>
86
+ <p class="subtitle">Speech-to-Text in the browser with transformers.js (local build)</p>
87
+
88
+ <div class="mic-wrap">
89
+ <button class="mic-btn disabled" id="micBtn" title="Click to start/stop listening">
90
+ <svg viewBox="0 0 24 24"><path d="M12 14c1.66 0 3-1.34 3-3V5c0-1.66-1.34-3-3-3S9 3.34 9 5v6c0 1.66 1.34 3 3 3zm-1-9c0-.55.45-1 1-1s1 .45 1 1v6c0 .55-.45 1-1 1s-1-.45-1-1V5zm6 6c0 2.76-2.24 5-5 5s-5-2.24-5-5H5c0 3.53 2.61 6.43 6 6.92V21h2v-3.08c3.39-.49 6-3.39 6-6.92h-2z"/></svg>
91
+ </button>
92
+ </div>
93
+
94
+ <div class="waveform"><canvas id="waveCanvas"></canvas></div>
95
+
96
+ <div id="status">Loading model...</div>
97
+ <div class="progress-bar" id="progressBar"><div class="fill" id="progressFill"></div></div>
98
+
99
+ <div class="transcript-card">
100
+ <div class="transcript-label">Transcript</div>
101
+ <div id="transcript" class="placeholder">Speak into your microphone...</div>
102
+ </div>
103
+
104
+ <div class="config-row">
105
+ <select id="dtype">
106
+ <option value="q4">q4</option>
107
+ <option value="q4f16">q4f16</option>
108
+ <option value="q8">q8</option>
109
+ <option value="fp16">fp16</option>
110
+ </select>
111
+ <select id="device">
112
+ <option value="webgpu">WebGPU</option>
113
+ <option value="wasm">WASM</option>
114
+ </select>
115
+ </div>
116
+ <div class="timing" id="timing"></div>
117
+ </div>
118
+
119
+ </body>
120
  </html>