github-actions[bot] commited on
Commit ·
9faedb3
1
Parent(s): a3e732c
Auto-deploy from GitHub: aedc4d235d2b7ddfee906065b70131a3a237be87
Browse files- app/api/routes.py +83 -2
- app/services/streaming.py +103 -0
- index.html +277 -0
- pyproject.toml +5 -2
app/api/routes.py
CHANGED
|
@@ -1,15 +1,21 @@
|
|
| 1 |
-
from fastapi import APIRouter, UploadFile, File, Form, HTTPException
|
| 2 |
from fastapi.responses import FileResponse, JSONResponse
|
| 3 |
import os
|
| 4 |
import uuid
|
|
|
|
|
|
|
| 5 |
import aiofiles
|
| 6 |
from app.core.config import settings
|
| 7 |
from custom_logger import logger_config as logger
|
| 8 |
from app.db import crud
|
| 9 |
from app.services.worker import start_worker, is_worker_running
|
|
|
|
| 10 |
|
| 11 |
router = APIRouter()
|
| 12 |
|
|
|
|
|
|
|
|
|
|
| 13 |
def allowed_file(filename):
|
| 14 |
return '.' in filename and filename.rsplit('.', 1)[1].lower() in settings.ALLOWED_EXTENSIONS
|
| 15 |
|
|
@@ -106,5 +112,80 @@ async def health():
|
|
| 106 |
return {
|
| 107 |
'status': 'healthy',
|
| 108 |
'service': 'stt-backend',
|
| 109 |
-
'worker_running': is_worker_running()
|
|
|
|
| 110 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, UploadFile, File, Form, HTTPException, WebSocket, WebSocketDisconnect
|
| 2 |
from fastapi.responses import FileResponse, JSONResponse
|
| 3 |
import os
|
| 4 |
import uuid
|
| 5 |
+
import json
|
| 6 |
+
import asyncio
|
| 7 |
import aiofiles
|
| 8 |
from app.core.config import settings
|
| 9 |
from custom_logger import logger_config as logger
|
| 10 |
from app.db import crud
|
| 11 |
from app.services.worker import start_worker, is_worker_running
|
| 12 |
+
from app.services.streaming import StreamingSTT
|
| 13 |
|
| 14 |
router = APIRouter()
|
| 15 |
|
| 16 |
+
ACTIVE_WS_CONNECTIONS = 0
|
| 17 |
+
MAX_WS_CONNECTIONS = 4
|
| 18 |
+
|
| 19 |
def allowed_file(filename):
|
| 20 |
return '.' in filename and filename.rsplit('.', 1)[1].lower() in settings.ALLOWED_EXTENSIONS
|
| 21 |
|
|
|
|
| 112 |
return {
|
| 113 |
'status': 'healthy',
|
| 114 |
'service': 'stt-backend',
|
| 115 |
+
'worker_running': is_worker_running(),
|
| 116 |
+
'ws_connections': ACTIVE_WS_CONNECTIONS,
|
| 117 |
}
|
| 118 |
+
|
| 119 |
+
@router.websocket("/ws/transcribe")
|
| 120 |
+
async def websocket_transcribe(websocket: WebSocket):
|
| 121 |
+
global ACTIVE_WS_CONNECTIONS
|
| 122 |
+
|
| 123 |
+
if ACTIVE_WS_CONNECTIONS >= MAX_WS_CONNECTIONS:
|
| 124 |
+
await websocket.accept()
|
| 125 |
+
await websocket.send_json({"type": "error", "message": "Server busy, try again later"})
|
| 126 |
+
await websocket.close()
|
| 127 |
+
return
|
| 128 |
+
|
| 129 |
+
await websocket.accept()
|
| 130 |
+
ACTIVE_WS_CONNECTIONS += 1
|
| 131 |
+
logger.info(f"WebSocket connected ({ACTIVE_WS_CONNECTIONS}/{MAX_WS_CONNECTIONS})")
|
| 132 |
+
|
| 133 |
+
stt = None
|
| 134 |
+
task = None
|
| 135 |
+
connected = True
|
| 136 |
+
|
| 137 |
+
try:
|
| 138 |
+
config_text = await websocket.receive_text()
|
| 139 |
+
config = json.loads(config_text)
|
| 140 |
+
model_name = config.get("model", "base")
|
| 141 |
+
|
| 142 |
+
stt = StreamingSTT(model_name=model_name, device="cpu")
|
| 143 |
+
await websocket.send_json({"type": "ready", "sample_rate": stt.sample_rate})
|
| 144 |
+
|
| 145 |
+
async def bg_process():
|
| 146 |
+
loop = asyncio.get_event_loop()
|
| 147 |
+
while True:
|
| 148 |
+
await asyncio.sleep(1.5)
|
| 149 |
+
try:
|
| 150 |
+
results = await loop.run_in_executor(None, stt.process)
|
| 151 |
+
for r in results:
|
| 152 |
+
try:
|
| 153 |
+
await websocket.send_json({"type": "transcript", **r})
|
| 154 |
+
except Exception:
|
| 155 |
+
return
|
| 156 |
+
except asyncio.CancelledError:
|
| 157 |
+
return
|
| 158 |
+
except Exception as e:
|
| 159 |
+
logger.error(f"bg_process error: {e}")
|
| 160 |
+
return
|
| 161 |
+
|
| 162 |
+
task = asyncio.create_task(bg_process())
|
| 163 |
+
|
| 164 |
+
while True:
|
| 165 |
+
data = await websocket.receive_bytes()
|
| 166 |
+
stt.add_audio(data)
|
| 167 |
+
|
| 168 |
+
except WebSocketDisconnect:
|
| 169 |
+
connected = False
|
| 170 |
+
logger.info("WebSocket disconnected")
|
| 171 |
+
except Exception as e:
|
| 172 |
+
connected = False
|
| 173 |
+
logger.error(f"WebSocket error: {e}")
|
| 174 |
+
finally:
|
| 175 |
+
if task:
|
| 176 |
+
task.cancel()
|
| 177 |
+
try:
|
| 178 |
+
await task
|
| 179 |
+
except (asyncio.CancelledError, Exception):
|
| 180 |
+
pass
|
| 181 |
+
if stt:
|
| 182 |
+
if connected:
|
| 183 |
+
try:
|
| 184 |
+
remaining = stt.flush()
|
| 185 |
+
for r in remaining:
|
| 186 |
+
await websocket.send_json({"type": "transcript", **r, "is_final": True})
|
| 187 |
+
except Exception:
|
| 188 |
+
pass
|
| 189 |
+
stt.cleanup()
|
| 190 |
+
ACTIVE_WS_CONNECTIONS -= 1
|
| 191 |
+
logger.info(f"WebSocket closed ({ACTIVE_WS_CONNECTIONS}/{MAX_WS_CONNECTIONS})")
|
app/services/streaming.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class StreamingSTT:
|
| 5 |
+
def __init__(self, model_name="base", device="cpu", sample_rate=16000):
|
| 6 |
+
self.sample_rate = sample_rate
|
| 7 |
+
self.model_name = model_name
|
| 8 |
+
self.buffer = np.array([], dtype=np.float32)
|
| 9 |
+
self.processed_until = 0
|
| 10 |
+
self.chunk_duration = 5
|
| 11 |
+
self.stride_duration = 2
|
| 12 |
+
self.last_segment_end = 0
|
| 13 |
+
self.is_finalized = False
|
| 14 |
+
|
| 15 |
+
compute = "int8" if device == "cpu" else "float16"
|
| 16 |
+
from faster_whisper import WhisperModel
|
| 17 |
+
self.model = WhisperModel(model_name, device=device, compute_type=compute)
|
| 18 |
+
|
| 19 |
+
def add_audio(self, audio_bytes: bytes):
|
| 20 |
+
audio_float = (
|
| 21 |
+
np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
|
| 22 |
+
)
|
| 23 |
+
self.buffer = np.append(self.buffer, audio_float)
|
| 24 |
+
|
| 25 |
+
def _trim_buffer(self):
|
| 26 |
+
max_buffered = self.sample_rate * 120
|
| 27 |
+
if len(self.buffer) > max_buffered:
|
| 28 |
+
trim_to = self.processed_until - self.sample_rate * 30
|
| 29 |
+
if trim_to > 0:
|
| 30 |
+
self.buffer = self.buffer[trim_to:]
|
| 31 |
+
self.processed_until -= trim_to
|
| 32 |
+
|
| 33 |
+
def process(self):
|
| 34 |
+
if self.is_finalized:
|
| 35 |
+
return []
|
| 36 |
+
|
| 37 |
+
chunk_samples = self.chunk_duration * self.sample_rate
|
| 38 |
+
stride_samples = self.stride_duration * self.sample_rate
|
| 39 |
+
|
| 40 |
+
if len(self.buffer) - self.processed_until < chunk_samples:
|
| 41 |
+
return []
|
| 42 |
+
|
| 43 |
+
chunk = self.buffer[self.processed_until : self.processed_until + chunk_samples]
|
| 44 |
+
time_offset = self.processed_until / self.sample_rate
|
| 45 |
+
self.processed_until += chunk_samples - stride_samples
|
| 46 |
+
self._trim_buffer()
|
| 47 |
+
|
| 48 |
+
try:
|
| 49 |
+
segments, _ = self.model.transcribe(
|
| 50 |
+
chunk, beam_size=1, vad_filter=True, language="en"
|
| 51 |
+
)
|
| 52 |
+
results = []
|
| 53 |
+
for seg in segments:
|
| 54 |
+
start = seg.start + time_offset
|
| 55 |
+
end = seg.end + time_offset
|
| 56 |
+
text = seg.text.strip()
|
| 57 |
+
if text:
|
| 58 |
+
self.last_segment_end = end
|
| 59 |
+
results.append({
|
| 60 |
+
"start": round(start, 2),
|
| 61 |
+
"end": round(end, 2),
|
| 62 |
+
"text": text,
|
| 63 |
+
})
|
| 64 |
+
return results
|
| 65 |
+
except Exception as e:
|
| 66 |
+
print(f"[StreamingSTT] error: {e}")
|
| 67 |
+
return []
|
| 68 |
+
|
| 69 |
+
def flush(self):
|
| 70 |
+
if self.is_finalized:
|
| 71 |
+
return []
|
| 72 |
+
self.is_finalized = True
|
| 73 |
+
|
| 74 |
+
remaining = self.buffer[self.processed_until:]
|
| 75 |
+
if len(remaining) < self.sample_rate * 0.5:
|
| 76 |
+
return []
|
| 77 |
+
|
| 78 |
+
try:
|
| 79 |
+
segments, _ = self.model.transcribe(
|
| 80 |
+
remaining, beam_size=1, vad_filter=True, language="en"
|
| 81 |
+
)
|
| 82 |
+
time_offset = self.processed_until / self.sample_rate
|
| 83 |
+
results = []
|
| 84 |
+
for seg in segments:
|
| 85 |
+
start = seg.start + time_offset
|
| 86 |
+
end = seg.end + time_offset
|
| 87 |
+
text = seg.text.strip()
|
| 88 |
+
if text:
|
| 89 |
+
results.append({
|
| 90 |
+
"start": round(start, 2),
|
| 91 |
+
"end": round(end, 2),
|
| 92 |
+
"text": text,
|
| 93 |
+
})
|
| 94 |
+
return results
|
| 95 |
+
except Exception as e:
|
| 96 |
+
print(f"[StreamingSTT] flush error: {e}")
|
| 97 |
+
return []
|
| 98 |
+
|
| 99 |
+
def cleanup(self):
|
| 100 |
+
self.model = None
|
| 101 |
+
self.buffer = np.array([], dtype=np.float32)
|
| 102 |
+
import gc
|
| 103 |
+
gc.collect()
|
index.html
CHANGED
|
@@ -272,6 +272,58 @@
|
|
| 272 |
box-shadow: 6px 6px 0px 0px #15803d;
|
| 273 |
}
|
| 274 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
/* --- Specific UI Elements --- */
|
| 276 |
.status-modal-content {
|
| 277 |
max-width: 450px;
|
|
@@ -420,6 +472,11 @@
|
|
| 420 |
<div id="healthDot" class="w-4 h-4 rounded-full bg-crayon-green shadow-[0_0_12px_rgba(22,163,74,0.5)]">
|
| 421 |
</div>
|
| 422 |
<span id="healthText" class="text-headline-md text-crayon-green text-2xl">Service Online</span>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 423 |
<div class="text-crayon-orange flex items-center justify-center rotate-[15deg]">
|
| 424 |
<span class="material-symbols-outlined text-4xl">light_mode</span>
|
| 425 |
</div>
|
|
@@ -580,6 +637,52 @@
|
|
| 580 |
</div>
|
| 581 |
</div>
|
| 582 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 583 |
<!-- Application Logic -->
|
| 584 |
<script>
|
| 585 |
// --- Configuration ---
|
|
@@ -808,6 +911,180 @@
|
|
| 808 |
UI.resultModal.classList.add('active');
|
| 809 |
};
|
| 810 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 811 |
// --- Lifecycle ---
|
| 812 |
loadTasks();
|
| 813 |
setInterval(loadTasks, 5000);
|
|
|
|
| 272 |
box-shadow: 6px 6px 0px 0px #15803d;
|
| 273 |
}
|
| 274 |
|
| 275 |
+
.live-btn {
|
| 276 |
+
border: 4px solid #dc2626;
|
| 277 |
+
border-radius: 12px 8px 14px 10px / 8px 14px 10px 12px;
|
| 278 |
+
transition: all 0.2s ease;
|
| 279 |
+
filter: url('#crayon-texture');
|
| 280 |
+
cursor: pointer;
|
| 281 |
+
animation: livePulse 2s ease-in-out infinite;
|
| 282 |
+
}
|
| 283 |
+
|
| 284 |
+
.live-btn:hover {
|
| 285 |
+
transform: scale(1.05) rotate(-1deg);
|
| 286 |
+
box-shadow: 6px 6px 0px 0px rgba(0, 0, 0, 0.1);
|
| 287 |
+
}
|
| 288 |
+
|
| 289 |
+
@keyframes livePulse {
|
| 290 |
+
0%, 100% { opacity: 1; }
|
| 291 |
+
50% { opacity: 0.6; }
|
| 292 |
+
}
|
| 293 |
+
|
| 294 |
+
#liveModal .live-transcript {
|
| 295 |
+
border: 3px dashed #fca5a5 !important;
|
| 296 |
+
padding: 1.5rem !important;
|
| 297 |
+
border-radius: 20px !important;
|
| 298 |
+
font-family: 'Fredoka', sans-serif !important;
|
| 299 |
+
font-size: 1.3rem !important;
|
| 300 |
+
font-weight: 500 !important;
|
| 301 |
+
color: #fff !important;
|
| 302 |
+
white-space: pre-wrap !important;
|
| 303 |
+
word-break: break-all !important;
|
| 304 |
+
line-height: 1.6 !important;
|
| 305 |
+
filter: url('#crayon-texture');
|
| 306 |
+
min-height: 200px;
|
| 307 |
+
max-height: 400px;
|
| 308 |
+
overflow-y: auto;
|
| 309 |
+
}
|
| 310 |
+
|
| 311 |
+
#liveModal select {
|
| 312 |
+
border: 3px solid #7c3aed;
|
| 313 |
+
border-radius: 12px;
|
| 314 |
+
padding: 0.5rem 1rem;
|
| 315 |
+
font-family: 'Fredoka', sans-serif;
|
| 316 |
+
font-size: 1.1rem;
|
| 317 |
+
font-weight: 600;
|
| 318 |
+
background: white;
|
| 319 |
+
filter: url('#crayon-texture');
|
| 320 |
+
outline: none;
|
| 321 |
+
}
|
| 322 |
+
|
| 323 |
+
#liveModal select:focus {
|
| 324 |
+
border-color: #2563eb;
|
| 325 |
+
}
|
| 326 |
+
|
| 327 |
/* --- Specific UI Elements --- */
|
| 328 |
.status-modal-content {
|
| 329 |
max-width: 450px;
|
|
|
|
| 472 |
<div id="healthDot" class="w-4 h-4 rounded-full bg-crayon-green shadow-[0_0_12px_rgba(22,163,74,0.5)]">
|
| 473 |
</div>
|
| 474 |
<span id="healthText" class="text-headline-md text-crayon-green text-2xl">Service Online</span>
|
| 475 |
+
<button id="liveBtn"
|
| 476 |
+
class="hidden items-center gap-2 px-6 py-2 bg-crayon-red text-white text-headline-md font-bold live-btn shadow-md">
|
| 477 |
+
<span class="material-symbols-outlined text-3xl">mic</span>
|
| 478 |
+
LIVE
|
| 479 |
+
</button>
|
| 480 |
<div class="text-crayon-orange flex items-center justify-center rotate-[15deg]">
|
| 481 |
<span class="material-symbols-outlined text-4xl">light_mode</span>
|
| 482 |
</div>
|
|
|
|
| 637 |
</div>
|
| 638 |
</div>
|
| 639 |
|
| 640 |
+
<!-- Live STT Modal -->
|
| 641 |
+
<div id="liveModal" class="modal">
|
| 642 |
+
<div class="modal-content">
|
| 643 |
+
<div class="modal-sketch-bg"></div>
|
| 644 |
+
|
| 645 |
+
<span class="material-symbols-outlined modal-decoration text-6xl text-crayon-red top-4 left-4 -rotate-12">mic</span>
|
| 646 |
+
<span class="material-symbols-outlined modal-decoration text-8xl text-crayon-blue top-12 right-20 opacity-20">cloud</span>
|
| 647 |
+
<span class="material-symbols-outlined modal-decoration text-4xl text-crayon-orange bottom-10 left-10 rotate-45">star</span>
|
| 648 |
+
<span class="material-symbols-outlined modal-decoration text-7xl text-crayon-purple bottom-4 right-4 -rotate-6 opacity-40">cloud</span>
|
| 649 |
+
|
| 650 |
+
<div class="modal-header">
|
| 651 |
+
<div class="flex items-center gap-6">
|
| 652 |
+
<span class="text-headline-lg text-[#1e1b4b]">🔴 Live STT</span>
|
| 653 |
+
</div>
|
| 654 |
+
<button class="close-modal" onclick="closeLiveModal()">×</button>
|
| 655 |
+
</div>
|
| 656 |
+
<div class="modal-body">
|
| 657 |
+
<div class="flex items-center gap-6 mb-6 flex-wrap">
|
| 658 |
+
<label class="text-label-sm text-[#4b5563] font-bold">Model:</label>
|
| 659 |
+
<select id="liveModelSelect">
|
| 660 |
+
<option value="tiny">tiny</option>
|
| 661 |
+
<option value="base" selected>base</option>
|
| 662 |
+
<option value="small">small</option>
|
| 663 |
+
<option value="medium">medium</option>
|
| 664 |
+
<option value="large-v3">large-v3</option>
|
| 665 |
+
</select>
|
| 666 |
+
<button id="startLiveBtn"
|
| 667 |
+
class="flex items-center gap-2 px-8 py-3 bg-crayon-green text-white text-headline-md font-bold crayon-button border-crayon-green shadow-md">
|
| 668 |
+
<span class="material-symbols-outlined text-3xl">play_arrow</span>
|
| 669 |
+
Start
|
| 670 |
+
</button>
|
| 671 |
+
<button id="stopLiveBtn"
|
| 672 |
+
class="hidden items-center gap-2 px-8 py-3 bg-crayon-red text-white text-headline-md font-bold crayon-button border-crayon-red shadow-md">
|
| 673 |
+
<span class="material-symbols-outlined text-3xl">stop</span>
|
| 674 |
+
Stop
|
| 675 |
+
</button>
|
| 676 |
+
<div id="liveStatusBadge" class="hidden items-center gap-2 px-4 py-2 bg-crayon-red text-white text-label-sm font-bold rounded-full animate-pulse">
|
| 677 |
+
<span class="material-symbols-outlined text-2xl">mic</span>
|
| 678 |
+
RECORDING
|
| 679 |
+
</div>
|
| 680 |
+
</div>
|
| 681 |
+
<div id="liveTranscript" class="live-transcript">Waiting to start...</div>
|
| 682 |
+
</div>
|
| 683 |
+
</div>
|
| 684 |
+
</div>
|
| 685 |
+
|
| 686 |
<!-- Application Logic -->
|
| 687 |
<script>
|
| 688 |
// --- Configuration ---
|
|
|
|
| 911 |
UI.resultModal.classList.add('active');
|
| 912 |
};
|
| 913 |
|
| 914 |
+
// --- Live STT (WebSocket) ---
|
| 915 |
+
const liveBtn = document.getElementById('liveBtn');
|
| 916 |
+
const liveModal = document.getElementById('liveModal');
|
| 917 |
+
const liveModelSelect = document.getElementById('liveModelSelect');
|
| 918 |
+
const startLiveBtn = document.getElementById('startLiveBtn');
|
| 919 |
+
const stopLiveBtn = document.getElementById('stopLiveBtn');
|
| 920 |
+
const liveTranscript = document.getElementById('liveTranscript');
|
| 921 |
+
const liveStatusBadge = document.getElementById('liveStatusBadge');
|
| 922 |
+
|
| 923 |
+
const urlParams = new URLSearchParams(window.location.search);
|
| 924 |
+
if (urlParams.get('live') === 'true') {
|
| 925 |
+
liveBtn.classList.remove('hidden');
|
| 926 |
+
}
|
| 927 |
+
|
| 928 |
+
let liveWs = null;
|
| 929 |
+
let liveMicStream = null;
|
| 930 |
+
let liveAudioCtx = null;
|
| 931 |
+
let liveProcessor = null;
|
| 932 |
+
let isLiveStreaming = false;
|
| 933 |
+
let liveResampleBuf = [];
|
| 934 |
+
let livePreBuffer = [];
|
| 935 |
+
let wsReady = false;
|
| 936 |
+
const WS_SAMPLE_RATE = 16000;
|
| 937 |
+
|
| 938 |
+
function openLiveModal() {
|
| 939 |
+
liveModal.classList.add('active');
|
| 940 |
+
liveTranscript.innerText = 'Select a model and click Start.';
|
| 941 |
+
startLiveBtn.classList.remove('hidden');
|
| 942 |
+
stopLiveBtn.classList.add('hidden');
|
| 943 |
+
liveStatusBadge.classList.add('hidden');
|
| 944 |
+
}
|
| 945 |
+
|
| 946 |
+
function closeLiveModal() {
|
| 947 |
+
stopLiveStreaming();
|
| 948 |
+
liveModal.classList.remove('active');
|
| 949 |
+
}
|
| 950 |
+
|
| 951 |
+
function float32ToInt16(float32) {
|
| 952 |
+
const int16 = new Int16Array(float32.length);
|
| 953 |
+
for (let i = 0; i < float32.length; i++) {
|
| 954 |
+
const s = Math.max(-1, Math.min(1, float32[i]));
|
| 955 |
+
int16[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
|
| 956 |
+
}
|
| 957 |
+
return int16;
|
| 958 |
+
}
|
| 959 |
+
|
| 960 |
+
function resampleAndSend(floatData, nativeRate) {
|
| 961 |
+
const ratio = WS_SAMPLE_RATE / nativeRate;
|
| 962 |
+
const outLen = Math.floor(floatData.length * ratio);
|
| 963 |
+
for (let outPos = 0; outPos < outLen; outPos++) {
|
| 964 |
+
const srcPos = outPos / ratio;
|
| 965 |
+
const idx = Math.floor(srcPos);
|
| 966 |
+
const frac = srcPos - idx;
|
| 967 |
+
const next = Math.min(idx + 1, floatData.length - 1);
|
| 968 |
+
liveResampleBuf.push(floatData[idx] * (1 - frac) + floatData[next] * frac);
|
| 969 |
+
}
|
| 970 |
+
const targetSize = Math.floor(WS_SAMPLE_RATE * 0.4);
|
| 971 |
+
while (liveResampleBuf.length >= targetSize) {
|
| 972 |
+
const chunk = liveResampleBuf.splice(0, targetSize);
|
| 973 |
+
const int16 = float32ToInt16(new Float32Array(chunk));
|
| 974 |
+
if (liveWs && liveWs.readyState === WebSocket.OPEN) {
|
| 975 |
+
liveWs.send(int16.buffer);
|
| 976 |
+
}
|
| 977 |
+
}
|
| 978 |
+
}
|
| 979 |
+
|
| 980 |
+
async function startLiveStreaming() {
|
| 981 |
+
try {
|
| 982 |
+
// Set up mic + AudioContext in the click handler (user gesture)
|
| 983 |
+
liveAudioCtx = new (window.AudioContext || window.webkitAudioContext)();
|
| 984 |
+
liveMicStream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
| 985 |
+
const source = liveAudioCtx.createMediaStreamSource(liveMicStream);
|
| 986 |
+
|
| 987 |
+
liveProcessor = liveAudioCtx.createScriptProcessor(4096, 1, 1);
|
| 988 |
+
liveProcessor.onaudioprocess = (e) => {
|
| 989 |
+
if (!isLiveStreaming) return;
|
| 990 |
+
const data = e.inputBuffer.getChannelData(0);
|
| 991 |
+
if (wsReady) {
|
| 992 |
+
resampleAndSend(data, liveAudioCtx.sampleRate);
|
| 993 |
+
} else {
|
| 994 |
+
livePreBuffer.push(...data);
|
| 995 |
+
}
|
| 996 |
+
};
|
| 997 |
+
|
| 998 |
+
source.connect(liveProcessor);
|
| 999 |
+
liveProcessor.connect(liveAudioCtx.destination);
|
| 1000 |
+
isLiveStreaming = true;
|
| 1001 |
+
|
| 1002 |
+
// Connect WebSocket
|
| 1003 |
+
const model = liveModelSelect.value;
|
| 1004 |
+
const wsUrl = `${window.location.origin.replace(/^http/, 'ws')}/ws/transcribe`;
|
| 1005 |
+
|
| 1006 |
+
liveWs = new WebSocket(wsUrl);
|
| 1007 |
+
liveWs.binaryType = 'arraybuffer';
|
| 1008 |
+
|
| 1009 |
+
liveWs.onopen = () => {
|
| 1010 |
+
liveWs.send(JSON.stringify({ model }));
|
| 1011 |
+
};
|
| 1012 |
+
|
| 1013 |
+
liveWs.onmessage = (e) => {
|
| 1014 |
+
try {
|
| 1015 |
+
const msg = JSON.parse(e.data);
|
| 1016 |
+
if (msg.type === 'ready') {
|
| 1017 |
+
wsReady = true;
|
| 1018 |
+
// Flush pre-buffered audio
|
| 1019 |
+
if (livePreBuffer.length > 0) {
|
| 1020 |
+
resampleAndSend(
|
| 1021 |
+
new Float32Array(livePreBuffer.splice(0)),
|
| 1022 |
+
liveAudioCtx.sampleRate
|
| 1023 |
+
);
|
| 1024 |
+
}
|
| 1025 |
+
liveTranscript.innerText = '🎤 Listening...';
|
| 1026 |
+
} else if (msg.type === 'transcript') {
|
| 1027 |
+
const line = `[${msg.start}s → ${msg.end}s] ${msg.text}`;
|
| 1028 |
+
if (liveTranscript.innerText === '🎤 Listening...') {
|
| 1029 |
+
liveTranscript.innerText = line;
|
| 1030 |
+
} else {
|
| 1031 |
+
liveTranscript.innerText += '\n' + line;
|
| 1032 |
+
}
|
| 1033 |
+
liveTranscript.scrollTop = liveTranscript.scrollHeight;
|
| 1034 |
+
} else if (msg.type === 'error') {
|
| 1035 |
+
liveTranscript.innerText = '❌ Error: ' + msg.message;
|
| 1036 |
+
}
|
| 1037 |
+
} catch (e) { /* ignore non-JSON */ }
|
| 1038 |
+
};
|
| 1039 |
+
|
| 1040 |
+
liveWs.onerror = () => {
|
| 1041 |
+
liveTranscript.innerText = '❌ WebSocket error. Check if the server is running.';
|
| 1042 |
+
stopLiveStreaming();
|
| 1043 |
+
};
|
| 1044 |
+
|
| 1045 |
+
liveWs.onclose = () => {
|
| 1046 |
+
if (isLiveStreaming) stopMicCapture();
|
| 1047 |
+
};
|
| 1048 |
+
|
| 1049 |
+
liveTranscript.innerText = 'Connecting...';
|
| 1050 |
+
startLiveBtn.classList.add('hidden');
|
| 1051 |
+
stopLiveBtn.classList.remove('hidden');
|
| 1052 |
+
liveStatusBadge.classList.remove('hidden');
|
| 1053 |
+
liveModelSelect.disabled = true;
|
| 1054 |
+
|
| 1055 |
+
} catch (err) {
|
| 1056 |
+
liveTranscript.innerText = '❌ Error: ' + err.message;
|
| 1057 |
+
stopLiveStreaming();
|
| 1058 |
+
}
|
| 1059 |
+
}
|
| 1060 |
+
|
| 1061 |
+
function stopMicCapture() {
|
| 1062 |
+
isLiveStreaming = false;
|
| 1063 |
+
wsReady = false;
|
| 1064 |
+
livePreBuffer = [];
|
| 1065 |
+
liveResampleBuf = [];
|
| 1066 |
+
if (liveProcessor) { liveProcessor.disconnect(); liveProcessor = null; }
|
| 1067 |
+
if (liveMicStream) { liveMicStream.getTracks().forEach(t => t.stop()); liveMicStream = null; }
|
| 1068 |
+
if (liveAudioCtx) { liveAudioCtx.close().catch(() => {}); liveAudioCtx = null; }
|
| 1069 |
+
}
|
| 1070 |
+
|
| 1071 |
+
function stopLiveStreaming() {
|
| 1072 |
+
if (liveWs) {
|
| 1073 |
+
liveWs.close();
|
| 1074 |
+
liveWs = null;
|
| 1075 |
+
}
|
| 1076 |
+
stopMicCapture();
|
| 1077 |
+
startLiveBtn.classList.remove('hidden');
|
| 1078 |
+
stopLiveBtn.classList.add('hidden');
|
| 1079 |
+
liveStatusBadge.classList.add('hidden');
|
| 1080 |
+
liveModelSelect.disabled = false;
|
| 1081 |
+
}
|
| 1082 |
+
|
| 1083 |
+
liveBtn.onclick = openLiveModal;
|
| 1084 |
+
liveModal.onclick = (e) => { if (e.target === liveModal) closeLiveModal(); };
|
| 1085 |
+
startLiveBtn.onclick = () => { isLiveStreaming = true; startLiveStreaming(); };
|
| 1086 |
+
stopLiveBtn.onclick = stopLiveStreaming;
|
| 1087 |
+
|
| 1088 |
// --- Lifecycle ---
|
| 1089 |
loadTasks();
|
| 1090 |
setInterval(loadTasks, 5000);
|
pyproject.toml
CHANGED
|
@@ -8,12 +8,15 @@ version = "2.0.0"
|
|
| 8 |
description = "FastAPI backend for STT"
|
| 9 |
dependencies = [
|
| 10 |
"fastapi",
|
| 11 |
-
"uvicorn",
|
| 12 |
"aiosqlite",
|
| 13 |
"aiofiles",
|
| 14 |
"python-multipart",
|
|
|
|
|
|
|
| 15 |
"custom_logger @ git+https://github.com/jebin2/custom_logger.git",
|
| 16 |
-
"stt-runner[parakeet] @ git+https://github.com/jebin2/STT.git"
|
|
|
|
| 17 |
]
|
| 18 |
|
| 19 |
[project.scripts]
|
|
|
|
| 8 |
description = "FastAPI backend for STT"
|
| 9 |
dependencies = [
|
| 10 |
"fastapi",
|
| 11 |
+
"uvicorn[standard]",
|
| 12 |
"aiosqlite",
|
| 13 |
"aiofiles",
|
| 14 |
"python-multipart",
|
| 15 |
+
"numpy",
|
| 16 |
+
"websockets",
|
| 17 |
"custom_logger @ git+https://github.com/jebin2/custom_logger.git",
|
| 18 |
+
"stt-runner[parakeet] @ git+https://github.com/jebin2/STT.git",
|
| 19 |
+
"faster-whisper",
|
| 20 |
]
|
| 21 |
|
| 22 |
[project.scripts]
|