MinaNasser commited on
Commit
b3ecc6c
·
1 Parent(s): 387e8b6

3rd_LOcal

Browse files
Client/client.html DELETED
@@ -1,684 +0,0 @@
1
- <!DOCTYPE html>
2
- <html lang="en">
3
- <head>
4
- <meta charset="UTF-8" />
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
- <title>TRANSCRIBE — Live Audio</title>
7
- <link href="https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@300;400;600&family=Barlow+Condensed:wght@300;500;700&display=swap" rel="stylesheet" />
8
- <style>
9
- *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
10
-
11
- :root {
12
- --bg: #0a0a0a;
13
- --surface: #111111;
14
- --border: #222222;
15
- --accent: #e8ff47;
16
- --accent2: #ff4747;
17
- --text: #d4d4d4;
18
- --muted: #555;
19
- --mono: 'IBM Plex Mono', monospace;
20
- --display: 'Barlow Condensed', sans-serif;
21
- }
22
-
23
- body {
24
- background: var(--bg);
25
- color: var(--text);
26
- font-family: var(--mono);
27
- font-size: 13px;
28
- min-height: 100vh;
29
- display: flex;
30
- flex-direction: column;
31
- }
32
-
33
- /* ── Header ── */
34
- header {
35
- display: flex;
36
- align-items: center;
37
- justify-content: space-between;
38
- padding: 18px 32px;
39
- border-bottom: 1px solid var(--border);
40
- position: sticky;
41
- top: 0;
42
- background: var(--bg);
43
- z-index: 10;
44
- }
45
-
46
- .logo {
47
- font-family: var(--display);
48
- font-weight: 700;
49
- font-size: 26px;
50
- letter-spacing: 6px;
51
- color: #fff;
52
- text-transform: uppercase;
53
- }
54
- .logo span { color: var(--accent); }
55
-
56
- .status-pill {
57
- display: flex;
58
- align-items: center;
59
- gap: 8px;
60
- font-size: 11px;
61
- letter-spacing: 2px;
62
- text-transform: uppercase;
63
- color: var(--muted);
64
- }
65
- .dot {
66
- width: 8px; height: 8px;
67
- border-radius: 50%;
68
- background: var(--muted);
69
- transition: background 0.3s, box-shadow 0.3s;
70
- }
71
- .dot.live { background: var(--accent2); box-shadow: 0 0 8px var(--accent2); }
72
- .dot.ready { background: var(--accent); box-shadow: 0 0 8px var(--accent); }
73
-
74
- /* ── Main layout ── */
75
- main {
76
- display: grid;
77
- grid-template-columns: 340px 1fr;
78
- flex: 1;
79
- overflow: hidden;
80
- }
81
-
82
- /* ── Sidebar ── */
83
- aside {
84
- border-right: 1px solid var(--border);
85
- padding: 28px 24px;
86
- display: flex;
87
- flex-direction: column;
88
- gap: 28px;
89
- overflow-y: auto;
90
- }
91
-
92
- .section-label {
93
- font-size: 10px;
94
- letter-spacing: 3px;
95
- text-transform: uppercase;
96
- color: var(--muted);
97
- margin-bottom: 10px;
98
- }
99
-
100
- /* Input */
101
- .field { display: flex; flex-direction: column; gap: 6px; }
102
- label { font-size: 10px; letter-spacing: 2px; text-transform: uppercase; color: var(--muted); }
103
-
104
- input[type="text"] {
105
- background: var(--surface);
106
- border: 1px solid var(--border);
107
- color: var(--text);
108
- font-family: var(--mono);
109
- font-size: 13px;
110
- padding: 10px 14px;
111
- outline: none;
112
- transition: border-color 0.2s;
113
- width: 100%;
114
- }
115
- input[type="text"]:focus { border-color: var(--accent); }
116
-
117
- /* Buttons */
118
- .btn {
119
- font-family: var(--display);
120
- font-weight: 700;
121
- font-size: 15px;
122
- letter-spacing: 3px;
123
- text-transform: uppercase;
124
- border: none;
125
- cursor: pointer;
126
- padding: 12px 20px;
127
- transition: all 0.15s;
128
- width: 100%;
129
- }
130
- .btn-primary {
131
- background: var(--accent);
132
- color: #000;
133
- }
134
- .btn-primary:hover { background: #fff; }
135
- .btn-primary:active { transform: scale(0.98); }
136
-
137
- .btn-danger {
138
- background: transparent;
139
- color: var(--accent2);
140
- border: 1px solid var(--accent2);
141
- }
142
- .btn-danger:hover { background: var(--accent2); color: #fff; }
143
-
144
- .btn-ghost {
145
- background: transparent;
146
- color: var(--text);
147
- border: 1px solid var(--border);
148
- font-size: 12px;
149
- }
150
- .btn-ghost:hover { border-color: var(--text); }
151
-
152
- .btn:disabled {
153
- opacity: 0.35;
154
- cursor: not-allowed;
155
- }
156
-
157
- /* Upload */
158
- .upload-zone {
159
- border: 1px dashed var(--border);
160
- padding: 24px 16px;
161
- text-align: center;
162
- cursor: pointer;
163
- transition: border-color 0.2s, background 0.2s;
164
- position: relative;
165
- }
166
- .upload-zone:hover { border-color: var(--accent); background: rgba(232,255,71,0.03); }
167
- .upload-zone input { position: absolute; inset: 0; opacity: 0; cursor: pointer; }
168
- .upload-zone .icon { font-size: 28px; margin-bottom: 8px; }
169
- .upload-zone p { color: var(--muted); font-size: 11px; line-height: 1.7; }
170
- .upload-zone .filename { color: var(--accent); margin-top: 6px; font-size: 11px; }
171
-
172
- /* Visualizer */
173
- .viz-wrap {
174
- border: 1px solid var(--border);
175
- height: 64px;
176
- display: flex;
177
- align-items: center;
178
- justify-content: center;
179
- overflow: hidden;
180
- background: var(--surface);
181
- }
182
- canvas#viz { width: 100%; height: 100%; display: block; }
183
-
184
- /* Chunk counter */
185
- .stats-row {
186
- display: flex;
187
- justify-content: space-between;
188
- border: 1px solid var(--border);
189
- padding: 12px 16px;
190
- }
191
- .stat { display: flex; flex-direction: column; gap: 2px; align-items: center; }
192
- .stat-val { font-family: var(--display); font-size: 22px; font-weight: 700; color: var(--accent); }
193
- .stat-key { font-size: 9px; letter-spacing: 2px; text-transform: uppercase; color: var(--muted); }
194
-
195
- /* ── Transcript panel ── */
196
- .transcript-panel {
197
- display: flex;
198
- flex-direction: column;
199
- overflow: hidden;
200
- }
201
-
202
- .panel-toolbar {
203
- display: flex;
204
- align-items: center;
205
- justify-content: space-between;
206
- padding: 16px 28px;
207
- border-bottom: 1px solid var(--border);
208
- gap: 12px;
209
- }
210
- .panel-toolbar h2 {
211
- font-family: var(--display);
212
- font-size: 18px;
213
- font-weight: 500;
214
- letter-spacing: 4px;
215
- text-transform: uppercase;
216
- color: #fff;
217
- }
218
- .toolbar-actions { display: flex; gap: 8px; }
219
- .toolbar-actions .btn { width: auto; padding: 8px 16px; font-size: 11px; }
220
-
221
- #transcript-container {
222
- flex: 1;
223
- overflow-y: auto;
224
- padding: 28px;
225
- display: flex;
226
- flex-direction: column;
227
- gap: 14px;
228
- }
229
-
230
- .empty-state {
231
- flex: 1;
232
- display: flex;
233
- flex-direction: column;
234
- align-items: center;
235
- justify-content: center;
236
- gap: 12px;
237
- color: var(--muted);
238
- }
239
- .empty-state .big-icon { font-size: 48px; opacity: 0.3; }
240
- .empty-state p { font-size: 11px; letter-spacing: 2px; text-transform: uppercase; }
241
-
242
- /* Transcript entry */
243
- .entry {
244
- display: grid;
245
- grid-template-columns: 52px 1fr auto;
246
- gap: 16px;
247
- align-items: start;
248
- padding: 16px;
249
- border: 1px solid var(--border);
250
- background: var(--surface);
251
- animation: slideIn 0.25s ease;
252
- transition: border-color 0.2s;
253
- }
254
- .entry:hover { border-color: #333; }
255
-
256
- @keyframes slideIn {
257
- from { opacity: 0; transform: translateY(6px); }
258
- to { opacity: 1; transform: translateY(0); }
259
- }
260
-
261
- .entry-chunk {
262
- font-family: var(--display);
263
- font-size: 28px;
264
- font-weight: 700;
265
- color: var(--border);
266
- line-height: 1;
267
- text-align: right;
268
- padding-top: 2px;
269
- }
270
-
271
- .entry-body { display: flex; flex-direction: column; gap: 4px; }
272
- .entry-text { color: #e8e8e8; line-height: 1.65; font-size: 13px; }
273
- .entry-meta { display: flex; gap: 10px; color: var(--muted); font-size: 10px; letter-spacing: 1px; }
274
-
275
- .lang-badge {
276
- background: #1a1a1a;
277
- border: 1px solid var(--border);
278
- padding: 2px 8px;
279
- font-size: 10px;
280
- letter-spacing: 1px;
281
- text-transform: uppercase;
282
- color: var(--accent);
283
- }
284
-
285
- .entry-time {
286
- font-size: 10px;
287
- color: var(--muted);
288
- white-space: nowrap;
289
- letter-spacing: 1px;
290
- padding-top: 3px;
291
- }
292
-
293
- /* Error toast */
294
- #toast {
295
- position: fixed;
296
- bottom: 28px;
297
- right: 28px;
298
- background: var(--accent2);
299
- color: #fff;
300
- padding: 12px 20px;
301
- font-size: 12px;
302
- letter-spacing: 1px;
303
- display: none;
304
- z-index: 100;
305
- max-width: 340px;
306
- }
307
-
308
- /* Scrollbar */
309
- ::-webkit-scrollbar { width: 4px; }
310
- ::-webkit-scrollbar-track { background: transparent; }
311
- ::-webkit-scrollbar-thumb { background: var(--border); }
312
-
313
- /* REC pulse */
314
- @keyframes pulse { 0%,100%{opacity:1} 50%{opacity:0.3} }
315
- .pulsing { animation: pulse 1s infinite; }
316
-
317
- @media (max-width: 768px) {
318
- main { grid-template-columns: 1fr; }
319
- aside { border-right: none; border-bottom: 1px solid var(--border); }
320
- }
321
- </style>
322
- </head>
323
- <body>
324
-
325
- <header>
326
- <div class="logo">Trans<span>·</span>cribe</div>
327
- <div class="status-pill">
328
- <div class="dot" id="status-dot"></div>
329
- <span id="status-text">IDLE</span>
330
- </div>
331
- </header>
332
-
333
- <main>
334
- <!-- ── Sidebar ── -->
335
- <aside>
336
-
337
- <!-- Session -->
338
- <div>
339
- <div class="section-label">Session</div>
340
- <div class="field" style="margin-bottom:10px">
341
- <label for="session-input">Session ID</label>
342
- <input type="text" id="session-input" placeholder="auto-generated" />
343
- </div>
344
- </div>
345
-
346
- <!-- Live Recording -->
347
- <div>
348
- <div class="section-label">Live Recording</div>
349
- <div class="viz-wrap" style="margin-bottom:10px">
350
- <canvas id="viz"></canvas>
351
- </div>
352
- <div class="stats-row" style="margin-bottom:12px">
353
- <div class="stat">
354
- <div class="stat-val" id="chunk-count">0</div>
355
- <div class="stat-key">Chunks</div>
356
- </div>
357
- <div class="stat">
358
- <div class="stat-val" id="word-count">0</div>
359
- <div class="stat-key">Words</div>
360
- </div>
361
- <div class="stat">
362
- <div class="stat-val" id="duration">0s</div>
363
- <div class="stat-key">Duration</div>
364
- </div>
365
- </div>
366
- <button class="btn btn-primary" id="btn-record">▶ START RECORDING</button>
367
- <div style="height:8px"></div>
368
- <button class="btn btn-danger" id="btn-stop" disabled>■ STOP</button>
369
- </div>
370
-
371
- <!-- File Upload -->
372
- <div>
373
- <div class="section-label">File Upload</div>
374
- <div class="upload-zone" id="upload-zone">
375
- <input type="file" id="file-input" accept="audio/*,video/*" />
376
- <div class="icon">⬆</div>
377
- <p>Drop audio file here<br/>or click to browse</p>
378
- <div class="filename" id="file-name"></div>
379
- </div>
380
- <div style="height:10px"></div>
381
- <button class="btn btn-ghost" id="btn-upload" disabled>TRANSCRIBE FILE</button>
382
- </div>
383
-
384
- <!-- Clear -->
385
- <div style="margin-top:auto">
386
- <button class="btn btn-ghost" id="btn-clear">CLEAR TRANSCRIPT</button>
387
- </div>
388
-
389
- </aside>
390
-
391
- <!-- ── Transcript Panel ── -->
392
- <section class="transcript-panel">
393
- <div class="panel-toolbar">
394
- <h2>Transcript</h2>
395
- <div class="toolbar-actions">
396
- <button class="btn btn-ghost" id="btn-copy">COPY ALL</button>
397
- <button class="btn btn-ghost" id="btn-export">EXPORT .TXT</button>
398
- </div>
399
- </div>
400
- <div id="transcript-container">
401
- <div class="empty-state" id="empty-state">
402
- <div class="big-icon">🎙</div>
403
- <p>Start recording or upload a file</p>
404
- </div>
405
- </div>
406
- </section>
407
- </main>
408
-
409
- <div id="toast"></div>
410
-
411
- <script>
412
- // ── Config ──────────────────────────────────────────────────────────────
413
- // Point these at your FastAPI server — change the port if needed (default uvicorn: 8000)
414
- const API_HOST = `${window.location.hostname}:6060`;
415
- const BASE_URL = `http://${API_HOST}`;
416
- const WS_BASE = `ws://${API_HOST}`;
417
- const APP_PATH = '/IntegraAI/voiceapi';
418
-
419
- // ── State ────────────────────────────────────────────────────────────────
420
- let ws = null;
421
- let mediaRecorder = null;
422
- let audioCtx = null, analyser = null, animId = null;
423
- let chunkNumber = 0;
424
- let wordCount = 0;
425
- let recSeconds = 0;
426
- let timerHandle = null;
427
- let entries = [];
428
-
429
- // ── DOM refs ─────────────────────────────────────────────────────────────
430
- const sessionInput = document.getElementById('session-input');
431
- const btnRecord = document.getElementById('btn-record');
432
- const btnStop = document.getElementById('btn-stop');
433
- const btnUpload = document.getElementById('btn-upload');
434
- const btnClear = document.getElementById('btn-clear');
435
- const btnCopy = document.getElementById('btn-copy');
436
- const btnExport = document.getElementById('btn-export');
437
- const fileInput = document.getElementById('file-input');
438
- const fileName = document.getElementById('file-name');
439
- const container = document.getElementById('transcript-container');
440
- const emptyState = document.getElementById('empty-state');
441
- const statusDot = document.getElementById('status-dot');
442
- const statusText = document.getElementById('status-text');
443
- const chunkEl = document.getElementById('chunk-count');
444
- const wordEl = document.getElementById('word-count');
445
- const durationEl = document.getElementById('duration');
446
- const canvas = document.getElementById('viz');
447
- const ctx2d = canvas.getContext('2d');
448
- const toast = document.getElementById('toast');
449
-
450
- // ── Helpers ───────────────────────────────────────────────────────────────
451
- function genSessionId() {
452
- return 'ses_' + Math.random().toString(36).slice(2, 10);
453
- }
454
-
455
- function setStatus(state) {
456
- statusDot.className = 'dot';
457
- if (state === 'live') { statusDot.classList.add('live'); statusText.textContent = 'RECORDING'; statusDot.classList.add('pulsing'); }
458
- if (state === 'ready') { statusDot.classList.add('ready'); statusText.textContent = 'CONNECTED'; }
459
- if (state === 'idle') { statusText.textContent = 'IDLE'; }
460
- if (state === 'busy') { statusText.textContent = 'PROCESSING'; }
461
- }
462
-
463
- function showToast(msg, duration = 4000) {
464
- toast.textContent = msg;
465
- toast.style.display = 'block';
466
- clearTimeout(toast._t);
467
- toast._t = setTimeout(() => toast.style.display = 'none', duration);
468
- }
469
-
470
- function addEntry(chunk, text, language, time) {
471
- emptyState.style.display = 'none';
472
- entries.push({ chunk, text, language, time });
473
-
474
- const words = text.trim().split(/\s+/).length;
475
- wordCount += words;
476
- wordEl.textContent = wordCount;
477
-
478
- const el = document.createElement('div');
479
- el.className = 'entry';
480
- el.innerHTML = `
481
- <div class="entry-chunk">${String(chunk).padStart(2,'0')}</div>
482
- <div class="entry-body">
483
- <div class="entry-text">${escapeHtml(text)}</div>
484
- <div class="entry-meta">
485
- <span class="lang-badge">${language || '??'}</span>
486
- <span>${words} word${words !== 1 ? 's' : ''}</span>
487
- </div>
488
- </div>
489
- <div class="entry-time">${time}</div>
490
- `;
491
- container.appendChild(el);
492
- container.scrollTop = container.scrollHeight;
493
- }
494
-
495
- function escapeHtml(s) {
496
- return s.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;');
497
- }
498
-
499
- function nowTime() {
500
- return new Date().toLocaleTimeString('en-US', { hour12: false });
501
- }
502
-
503
- // ── Visualizer ────────────────────────────────────────────────────────────
504
- function startViz(stream) {
505
- if (!audioCtx) audioCtx = new (window.AudioContext || window.webkitAudioContext)();
506
- analyser = audioCtx.createAnalyser();
507
- analyser.fftSize = 128;
508
- audioCtx.createMediaStreamSource(stream).connect(analyser);
509
- const buf = new Uint8Array(analyser.frequencyBinCount);
510
-
511
- function draw() {
512
- animId = requestAnimationFrame(draw);
513
- canvas.width = canvas.offsetWidth;
514
- canvas.height = canvas.offsetHeight;
515
- analyser.getByteFrequencyData(buf);
516
- ctx2d.clearRect(0, 0, canvas.width, canvas.height);
517
- const bw = canvas.width / buf.length;
518
- buf.forEach((v, i) => {
519
- const h = (v / 255) * canvas.height;
520
- ctx2d.fillStyle = `rgba(232,255,71,${0.3 + (v/255)*0.7})`;
521
- ctx2d.fillRect(i * bw, canvas.height - h, bw - 1, h);
522
- });
523
- }
524
- draw();
525
- }
526
-
527
- function stopViz() {
528
- if (animId) cancelAnimationFrame(animId);
529
- ctx2d.clearRect(0, 0, canvas.width, canvas.height);
530
- }
531
-
532
- // ── WebSocket recording ────────────────────────────────────────────────────
533
- async function startRecording() {
534
- const sessionId = sessionInput.value.trim() || genSessionId();
535
- sessionInput.value = sessionId;
536
-
537
- try {
538
- const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
539
-
540
- // WebSocket
541
- ws = new WebSocket(`${WS_BASE}${APP_PATH}/ws/${sessionId}`);
542
- ws.binaryType = 'arraybuffer';
543
-
544
- ws.onopen = () => {
545
- setStatus('live');
546
- btnRecord.disabled = true;
547
- btnStop.disabled = false;
548
- chunkNumber = 0; recSeconds = 0;
549
-
550
- const CHUNK_MS = 10000; // 10 seconds per chunk
551
-
552
- function startChunk() {
553
- if (ws.readyState !== WebSocket.OPEN) return;
554
-
555
- const chunks = [];
556
- mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm' });
557
-
558
- mediaRecorder.ondataavailable = (e) => {
559
- if (e.data.size > 0) chunks.push(e.data);
560
- };
561
-
562
- mediaRecorder.onstop = () => {
563
- if (chunks.length && ws.readyState === WebSocket.OPEN) {
564
- // Complete WebM per cycle: own EBML header, timestamps reset to 0
565
- const blob = new Blob(chunks, { type: 'audio/webm' });
566
- ws.send(blob);
567
- chunkNumber++;
568
- chunkEl.textContent = chunkNumber;
569
- }
570
- if (ws.readyState === WebSocket.OPEN) startChunk();
571
- };
572
-
573
- mediaRecorder.start();
574
- setTimeout(() => {
575
- if (mediaRecorder.state === 'recording') mediaRecorder.stop();
576
- }, CHUNK_MS);
577
- }
578
-
579
- startChunk();
580
-
581
- timerHandle = setInterval(() => {
582
- recSeconds++;
583
- durationEl.textContent = recSeconds < 60
584
- ? recSeconds + 's'
585
- : Math.floor(recSeconds/60) + 'm' + (recSeconds%60) + 's';
586
- }, 1000);
587
-
588
- startViz(stream);
589
- };
590
-
591
- ws.onmessage = (e) => {
592
- const data = JSON.parse(e.data);
593
- if (data.error) { showToast('Server: ' + data.error); return; }
594
- addEntry(data.chunk_number, data.text, data.language, nowTime());
595
- };
596
-
597
- ws.onerror = () => showToast('WebSocket error — check server.');
598
- ws.onclose = () => { setStatus('idle'); };
599
-
600
- } catch (err) {
601
- showToast('Microphone access denied or unavailable.');
602
- }
603
- }
604
-
605
- function stopRecording() {
606
- if (ws) ws.close(); // close WS first so onstop doesn't start a new chunk
607
- if (mediaRecorder && mediaRecorder.state !== 'inactive') mediaRecorder.stop();
608
- clearInterval(timerHandle);
609
- stopViz();
610
- setStatus('idle');
611
- btnRecord.disabled = false;
612
- btnStop.disabled = true;
613
- }
614
-
615
- // ── File upload ────────────────────────────────────────────────────────────
616
- fileInput.addEventListener('change', () => {
617
- if (fileInput.files.length > 0) {
618
- fileName.textContent = fileInput.files[0].name;
619
- btnUpload.disabled = false;
620
- }
621
- });
622
-
623
- btnUpload.addEventListener('click', async () => {
624
- if (!fileInput.files.length) return;
625
- const file = fileInput.files[0];
626
- const sessionId = sessionInput.value.trim() || genSessionId();
627
- sessionInput.value = sessionId;
628
-
629
- const form = new FormData();
630
- form.append('file', file);
631
- form.append('session_id', sessionId);
632
- form.append('chunk_number', '0');
633
-
634
- btnUpload.disabled = true;
635
- btnUpload.textContent = 'UPLOADING…';
636
- setStatus('busy');
637
-
638
- try {
639
- const resp = await fetch(`${BASE_URL}${APP_PATH}/transcribe`, { method: 'POST', body: form });
640
- if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
641
- const data = await resp.json();
642
- addEntry(data.chunk_number, data.text, data.language, nowTime());
643
- setStatus('idle');
644
- } catch (err) {
645
- showToast('Upload failed: ' + err.message);
646
- setStatus('idle');
647
- } finally {
648
- btnUpload.textContent = 'TRANSCRIBE FILE';
649
- btnUpload.disabled = false;
650
- }
651
- });
652
-
653
- // ── Controls ──────────────────────────────────────────────────────────────
654
- btnRecord.addEventListener('click', startRecording);
655
- btnStop.addEventListener('click', stopRecording);
656
-
657
- btnClear.addEventListener('click', () => {
658
- entries = []; wordCount = 0; chunkNumber = 0; recSeconds = 0;
659
- wordEl.textContent = '0'; chunkEl.textContent = '0'; durationEl.textContent = '0s';
660
- container.innerHTML = '';
661
- container.appendChild(emptyState);
662
- emptyState.style.display = '';
663
- });
664
-
665
- btnCopy.addEventListener('click', () => {
666
- const text = entries.map(e => `[${e.time}][${e.language}] ${e.text}`).join('\n');
667
- navigator.clipboard.writeText(text).then(() => showToast('Copied to clipboard!', 2000));
668
- });
669
-
670
- btnExport.addEventListener('click', () => {
671
- const text = entries.map(e => `[Chunk ${e.chunk}][${e.time}][${e.language}]\n${e.text}\n`).join('\n');
672
- const blob = new Blob([text], { type: 'text/plain' });
673
- const a = document.createElement('a');
674
- a.href = URL.createObjectURL(blob);
675
- a.download = `transcript_${sessionInput.value || 'session'}.txt`;
676
- a.click();
677
- });
678
-
679
- // ── Auto-generate session on load ─────────────────────────────────────────
680
- sessionInput.value = genSessionId();
681
- </script>
682
-
683
- </body>
684
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
CustomSTT_COLAB.ipynb DELETED
@@ -1,259 +0,0 @@
1
- {
2
- "nbformat": 4,
3
- "nbformat_minor": 0,
4
- "metadata": {
5
- "colab": {
6
- "provenance": [],
7
- "gpuType": "T4"
8
- },
9
- "kernelspec": {
10
- "name": "python3",
11
- "display_name": "Python 3"
12
- },
13
- "language_info": {
14
- "name": "python"
15
- },
16
- "accelerator": "GPU"
17
- },
18
- "cells": [
19
- {
20
- "cell_type": "markdown",
21
- "source": [
22
- "## Run First two cells only"
23
- ],
24
- "metadata": {
25
- "id": "jRmIrf0il4AC"
26
- }
27
- },
28
- {
29
- "cell_type": "code",
30
- "source": [
31
- "# ── 1. Install dependencies ───────────────────────────────────────────────────\n",
32
- "!pip install -q fastapi uvicorn python-multipart pyngrok faster-whisper\n",
33
- "\n",
34
- "# ── 2. Imports ────────────────────────────────────────────────────────────────\n",
35
- "import os\n",
36
- "import asyncio\n",
37
- "import tempfile\n",
38
- "import threading\n",
39
- "import time\n",
40
- "import functools\n",
41
- "import logging\n",
42
- "\n",
43
- "from fastapi import FastAPI, File, UploadFile, HTTPException\n",
44
- "from fastapi.responses import JSONResponse\n",
45
- "import uvicorn\n",
46
- "from faster_whisper import WhisperModel\n",
47
- "from faster_whisper.audio import decode_audio\n",
48
- "from pyngrok import ngrok\n",
49
- "\n",
50
- "# ── 3. Logging setup ─────────────────────────────────────────────────────────\n",
51
- "LOG_FILE = \"/content/stt_server.log\"\n",
52
- "\n",
53
- "for handler in logging.root.handlers[:]:\n",
54
- " logging.root.removeHandler(handler)\n",
55
- "\n",
56
- "logging.basicConfig(\n",
57
- " level=logging.INFO,\n",
58
- " format=\"%(asctime)s [%(levelname)s] %(message)s\",\n",
59
- " handlers=[\n",
60
- " logging.FileHandler(LOG_FILE),\n",
61
- " logging.StreamHandler()\n",
62
- " ],\n",
63
- " force=True # ← overrides uvicorn's logger hijack\n",
64
- ")\n",
65
- "logger = logging.getLogger(__name__)\n",
66
- "\n",
67
- "# ── 4. Ngrok auth ─────────────────────────────────────────────────────────────\n",
68
- "ngrok.set_auth_token(\"3491amoA7gGNvQYJDLUYwNdlTqu_83s75o2cESPNrWdfhTDBN\")\n",
69
- "\n",
70
- "# ── 5. Load both models ───────────────────────────────────────────────────────\n",
71
- "logger.info(\"Loading Arabic model...\")\n",
72
- "arabic_model = WhisperModel(\"MinaNasser/Whisper-Small-MN-int8\", compute_type=\"int8\", device=\"cuda\")\n",
73
- "\n",
74
- "logger.info(\"Loading English model...\")\n",
75
- "english_model = WhisperModel(\"Systran/faster-whisper-large-v3\", compute_type=\"int8\", device=\"cuda\")\n",
76
- "\n",
77
- "logger.info(\"✅ Both models loaded.\")\n",
78
- "\n",
79
- "# ── 6. FastAPI app ────────────────────────────────────────────────────────────\n",
80
- "app = FastAPI(title=\"Custom Arabic/English STT\")\n",
81
- "\n",
82
- "@app.get(\"/health\")\n",
83
- "async def health():\n",
84
- " return {\"status\": \"ok\", \"models\": [\"arabic\", \"english\"]}\n",
85
- "\n",
86
- "@app.post(\"/transcribe\")\n",
87
- "async def transcribe(file: UploadFile = File(...)):\n",
88
- " with tempfile.NamedTemporaryFile(delete=False, suffix=\".wav\") as tmp:\n",
89
- " tmp.write(await file.read())\n",
90
- " tmp_path = tmp.name\n",
91
- "\n",
92
- " try:\n",
93
- " loop = asyncio.get_event_loop()\n",
94
- "\n",
95
- " # ── Step 1: detect language\n",
96
- " def detect():\n",
97
- " waveform = decode_audio(tmp_path)\n",
98
- " language, probability, _ = english_model.detect_language(waveform)\n",
99
- " return language, probability\n",
100
- "\n",
101
- " language, probability = await loop.run_in_executor(None, detect)\n",
102
- " logger.info(f\"Detected language: {language} ({probability:.2f})\")\n",
103
- "\n",
104
- " # ── Step 2: route to correct model\n",
105
- " if language == \"ar\":\n",
106
- " logger.info(f\"used OUR MODEL\")\n",
107
- " fn = functools.partial(arabic_model.transcribe, tmp_path, language=\"ar\",vad_filter=True)\n",
108
- " else:\n",
109
- " logger.info(f\"used ENG MODEL\")\n",
110
- " fn = functools.partial(english_model.transcribe, tmp_path, language=language,vad_filter=True)\n",
111
- "\n",
112
- " segments, info = await loop.run_in_executor(None, fn)\n",
113
- " transcript = \" \".join(seg.text for seg in segments)\n",
114
- "\n",
115
- " logger.info(f\"Transcript [{info.language}]: {transcript[:80]}...\")\n",
116
- "\n",
117
- " return JSONResponse(content={\n",
118
- " \"text\": transcript,\n",
119
- " \"language\": info.language,\n",
120
- " \"language_probability\": info.language_probability,\n",
121
- " })\n",
122
- " except Exception as e:\n",
123
- " logger.error(f\"Transcription failed: {e}\")\n",
124
- " raise HTTPException(status_code=500, detail=str(e))\n",
125
- " finally:\n",
126
- " if os.path.exists(tmp_path):\n",
127
- " os.unlink(tmp_path)\n",
128
- "\n",
129
- "# ── 7. Cleanup & start server ─────────────────────────────────────────────────\n",
130
- "try:\n",
131
- " ngrok.kill()\n",
132
- "except Exception:\n",
133
- " pass\n",
134
- "\n",
135
- "!fuser -k 8000/tcp || true\n",
136
- "time.sleep(1)\n",
137
- "\n",
138
- "def run_server():\n",
139
- " uvicorn.run(app, host=\"0.0.0.0\", port=8000, log_level=\"info\", log_config=None)\n",
140
- "\n",
141
- "thread = threading.Thread(target=run_server, daemon=True)\n",
142
- "thread.start()\n",
143
- "time.sleep(5)\n",
144
- "\n",
145
- "# ── 8. Expose via ngrok ───────────────────────────────────────────────────────\n",
146
- "public_url = ngrok.connect(8000).public_url\n",
147
- "logger.info(f\"Public URL: {public_url}\")\n",
148
- "print(f\"\\n Public URL : {public_url}\")\n",
149
- "print(f\" Health check: {public_url}/health\")\n",
150
- "print(f\" Transcribe : {public_url}/transcribe\")\n",
151
- "print(f\"\\n Set CUSTOM_STT_URL={public_url}\")"
152
- ],
153
- "metadata": {
154
- "id": "CETOGHw9ZIzs"
155
- },
156
- "execution_count": null,
157
- "outputs": []
158
- },
159
- {
160
- "cell_type": "code",
161
- "source": [
162
- "!tail -f /content/stt_server.log"
163
- ],
164
- "metadata": {
165
- "id": "INTTVax9ZLYO"
166
- },
167
- "execution_count": null,
168
- "outputs": []
169
- },
170
- {
171
- "cell_type": "code",
172
- "source": [
173
- "# ── 1. Install dependencies ───────────────────────────────────────────────────\n",
174
- "!pip install -q fastapi uvicorn python-multipart pyngrok faster-whisper\n",
175
- "\n",
176
- "# ── 2. Imports ────────────────────────────────────────────────────────────────\n",
177
- "import os\n",
178
- "import asyncio\n",
179
- "import tempfile\n",
180
- "import threading\n",
181
- "import time\n",
182
- "import functools\n",
183
- "\n",
184
- "from fastapi import FastAPI, File, UploadFile, HTTPException\n",
185
- "from fastapi.responses import JSONResponse\n",
186
- "import uvicorn\n",
187
- "from faster_whisper import WhisperModel\n",
188
- "from pyngrok import ngrok\n",
189
- "\n",
190
- "# ── 3. Ngrok auth ─────────────────────────────────────────────────────────────\n",
191
- "ngrok.set_auth_token(\"3491amoA7gGNvQYJDLUYwNdlTqu_83s75o2cESPNrWdfhTDBN\")\n",
192
- "\n",
193
- "# ── 4. Load model ─────────────────────────────────────────────────────────────\n",
194
- "MODEL_NAME = \"Systran/faster-whisper-large-v3\"\n",
195
- "#MODEL_NAME = \"MinaNasser/Whisper-Base-MN-EG-int8\"\n",
196
- "#MODEL_NAME = \"MinaNasser/Whisper-Small-MN-int8\"\n",
197
- "model = WhisperModel(MODEL_NAME, compute_type=\"int8\", device=\"cuda\")\n",
198
- "\n",
199
- "# ── 5. FastAPI app ────────────────────────────────────────────────────────────\n",
200
- "app = FastAPI(title=\"Custom Arabic STT\")\n",
201
- "\n",
202
- "@app.get(\"/health\")\n",
203
- "async def health():\n",
204
- " return {\"status\": \"ok\"}\n",
205
- "\n",
206
- "@app.post(\"/transcribe\")\n",
207
- "async def transcribe(file: UploadFile = File(...)):\n",
208
- " with tempfile.NamedTemporaryFile(delete=False, suffix=\".wav\") as tmp:\n",
209
- " tmp.write(await file.read())\n",
210
- " tmp_path = tmp.name\n",
211
- "\n",
212
- " try:\n",
213
- " loop = asyncio.get_event_loop()\n",
214
- " # functools.partial lets us pass keyword args through run_in_executor\n",
215
- " fn = functools.partial(model.transcribe, tmp_path) #, language=\"ar\"\n",
216
- " segments, info = await loop.run_in_executor(None, fn)\n",
217
- " transcript = \" \".join(seg.text for seg in segments)\n",
218
- " return JSONResponse(content={\n",
219
- " \"text\": transcript,\n",
220
- " \"language\": info.language,\n",
221
- " \"language_probability\": info.language_probability,\n",
222
- " })\n",
223
- " except Exception as e:\n",
224
- " raise HTTPException(status_code=500, detail=str(e))\n",
225
- " finally:\n",
226
- " if os.path.exists(tmp_path):\n",
227
- " os.unlink(tmp_path)\n",
228
- "\n",
229
- "# ── 6. Cleanup & start server ─────────────────────────────────────────────────\n",
230
- "try:\n",
231
- " ngrok.kill()\n",
232
- "except Exception:\n",
233
- " pass\n",
234
- "\n",
235
- "!fuser -k 8000/tcp || true\n",
236
- "time.sleep(1)\n",
237
- "\n",
238
- "def run_server():\n",
239
- " uvicorn.run(app, host=\"0.0.0.0\", port=8000, log_level=\"info\")\n",
240
- "\n",
241
- "thread = threading.Thread(target=run_server, daemon=True)\n",
242
- "thread.start()\n",
243
- "time.sleep(5) # wait for server to be ready\n",
244
- "\n",
245
- "# ── 7. Expose via ngrok ───────────────────────────────────────────────────────\n",
246
- "public_url = ngrok.connect(8000).public_url\n",
247
- "print(f\"\\n Public URL : {public_url}\")\n",
248
- "print(f\" Health check: {public_url}/health\")\n",
249
- "print(f\" Transcribe : {public_url}/transcribe\")\n",
250
- "print(f\"\\n Set CUSTOM_STT_URL={public_url} (no trailing slash, no /transcribe)\")"
251
- ],
252
- "metadata": {
253
- "id": "QupX525ER_Kw"
254
- },
255
- "execution_count": null,
256
- "outputs": []
257
- }
258
- ]
259
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/database.py CHANGED
@@ -18,7 +18,9 @@ engine = create_async_engine(
18
  DB_URL,
19
  echo=True,
20
  future=True,
21
- connect_args={"ssl": "require"}
 
 
22
  )
23
  AsyncSessionLocal = sessionmaker(
24
  bind=engine, class_=AsyncSession, expire_on_commit=False
 
18
  DB_URL,
19
  echo=True,
20
  future=True,
21
+ connect_args={"ssl": "require",
22
+ "statement_cache_size": 0,
23
+ }
24
  )
25
  AsyncSessionLocal = sessionmaker(
26
  bind=engine, class_=AsyncSession, expire_on_commit=False
routes/transcripe.py CHANGED
@@ -111,10 +111,10 @@ async def load_models():
111
  print("Loading Whisper models...")
112
  if get_settings().INFERENCE_TYPE == "local":
113
  if get_settings().LOCAL_INFERENCE_MODEL_SIZE == "small":
114
- models["small_arabic"] = WhisperModel("Whisper-Small-MN-int8", device="cpu", compute_type="int8")
115
  models["small_english"] = WhisperModel("small", device="cpu", compute_type="int8")
116
  elif get_settings().LOCAL_INFERENCE_MODEL_SIZE == "base":
117
- models["base_arabic"] = WhisperModel("Whisper-Base-MN-EG-int8", device="cpu", compute_type="int8")
118
  models["base_english"] = WhisperModel("base", device="cpu", compute_type="int8")
119
 
120
  print("Models loaded successfully ")
 
111
  print("Loading Whisper models...")
112
  if get_settings().INFERENCE_TYPE == "local":
113
  if get_settings().LOCAL_INFERENCE_MODEL_SIZE == "small":
114
+ models["small_arabic"] = WhisperModel("MinaNasser/Whisper-Small-MN-int8", device="cpu", compute_type="int8")
115
  models["small_english"] = WhisperModel("small", device="cpu", compute_type="int8")
116
  elif get_settings().LOCAL_INFERENCE_MODEL_SIZE == "base":
117
+ models["base_arabic"] = WhisperModel("MinaNasser/Whisper-Base-MN-EG-int8", device="cpu", compute_type="int8")
118
  models["base_english"] = WhisperModel("base", device="cpu", compute_type="int8")
119
 
120
  print("Models loaded successfully ")