Joffrey Thomas commited on
Commit ·
2686978
1
Parent(s): 5bfc9b4
Innactivity disconnect
Browse files
app.py
CHANGED
|
@@ -23,7 +23,9 @@ SAMPLE_RATE = 16_000
|
|
| 23 |
WARMUP_DURATION = 2.0 # seconds of silence for warmup
|
| 24 |
WPM_WINDOW = 10 # seconds for running mean calculation
|
| 25 |
CALIBRATION_PERIOD = 5 # seconds before showing WPM
|
| 26 |
-
SESSION_TIMEOUT =
|
|
|
|
|
|
|
| 27 |
|
| 28 |
# Global config (shared across users)
|
| 29 |
ws_url = ""
|
|
@@ -40,6 +42,8 @@ class UserSession:
|
|
| 40 |
self.word_timestamps = []
|
| 41 |
self.current_wpm = "Calibrating..."
|
| 42 |
self.session_start_time = None
|
|
|
|
|
|
|
| 43 |
|
| 44 |
|
| 45 |
# Load CSS from external file
|
|
@@ -57,7 +61,8 @@ def get_header_html() -> str:
|
|
| 57 |
|
| 58 |
return f"""
|
| 59 |
<div class="header-card">
|
| 60 |
-
<h1 class="header-title">{logo_html}Speech
|
|
|
|
| 61 |
<p class="header-subtitle">Talk naturally. Talk fast. Talk ridiculously fast. I can handle it.</p>
|
| 62 |
</div>
|
| 63 |
"""
|
|
@@ -204,6 +209,15 @@ async def websocket_handler(session):
|
|
| 204 |
async def send_audio():
|
| 205 |
while session.is_running:
|
| 206 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
if session.session_start_time is not None:
|
| 208 |
elapsed = time.time() - session.session_start_time
|
| 209 |
if elapsed >= SESSION_TIMEOUT:
|
|
@@ -225,6 +239,7 @@ async def websocket_handler(session):
|
|
| 225 |
continue
|
| 226 |
except Exception as e:
|
| 227 |
print(f"Error sending audio: {e}")
|
|
|
|
| 228 |
break
|
| 229 |
|
| 230 |
async def receive_transcription():
|
|
@@ -252,11 +267,14 @@ async def websocket_handler(session):
|
|
| 252 |
session.current_wpm = calculate_wpm(session)
|
| 253 |
except Exception as e:
|
| 254 |
print(f"Error receiving transcription: {e}")
|
|
|
|
| 255 |
|
| 256 |
await asyncio.gather(send_audio(), receive_transcription(), return_exceptions=True)
|
| 257 |
except Exception as e:
|
| 258 |
print(f"WebSocket connection error: {e}")
|
| 259 |
session.status_message = "error"
|
|
|
|
|
|
|
| 260 |
|
| 261 |
|
| 262 |
def start_websocket(session):
|
|
@@ -269,6 +287,7 @@ def start_websocket(session):
|
|
| 269 |
except Exception as e:
|
| 270 |
print(f"WebSocket error: {e}")
|
| 271 |
finally:
|
|
|
|
| 272 |
try:
|
| 273 |
loop.close()
|
| 274 |
except Exception:
|
|
@@ -277,11 +296,16 @@ def start_websocket(session):
|
|
| 277 |
|
| 278 |
def auto_start_recording(session):
|
| 279 |
"""Automatically start the transcription service when audio begins."""
|
| 280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
session.transcription_text = ""
|
| 282 |
session.word_timestamps = []
|
| 283 |
session.current_wpm = "Calibrating..."
|
| 284 |
session.session_start_time = time.time()
|
|
|
|
| 285 |
session.status_message = "connecting"
|
| 286 |
thread = threading.Thread(target=start_websocket, args=(session,), daemon=True)
|
| 287 |
thread.start()
|
|
@@ -292,6 +316,7 @@ def auto_start_recording(session):
|
|
| 292 |
def clear_history(session):
|
| 293 |
"""Stop the websocket connection and clear all history."""
|
| 294 |
session.is_running = False
|
|
|
|
| 295 |
|
| 296 |
# Clear the audio queue without blocking
|
| 297 |
try:
|
|
@@ -317,6 +342,9 @@ def process_audio(audio, session):
|
|
| 317 |
wpm = session.current_wpm if session.is_running else "Calibrating..."
|
| 318 |
return get_transcription_html(session.transcription_text, session.status_message, wpm)
|
| 319 |
|
|
|
|
|
|
|
|
|
|
| 320 |
# Auto-start if not running
|
| 321 |
if not session.is_running and session.status_message not in ["timeout", "error"]:
|
| 322 |
auto_start_recording(session)
|
|
@@ -393,7 +421,7 @@ with gr.Blocks(title="Voxtral Real-time Transcription") as demo:
|
|
| 393 |
)
|
| 394 |
|
| 395 |
# Info text
|
| 396 |
-
gr.HTML('<p class="info-text">Click "
|
| 397 |
|
| 398 |
# Event handlers
|
| 399 |
clear_btn.click(
|
|
|
|
| 23 |
WARMUP_DURATION = 2.0 # seconds of silence for warmup
|
| 24 |
WPM_WINDOW = 10 # seconds for running mean calculation
|
| 25 |
CALIBRATION_PERIOD = 5 # seconds before showing WPM
|
| 26 |
+
SESSION_TIMEOUT = 60 # 60 seconds session timeout
|
| 27 |
+
# Close the websocket after this many seconds without receiving any audio frames.
|
| 28 |
+
INACTIVITY_TIMEOUT = int(os.environ.get("INACTIVITY_TIMEOUT", "20"))
|
| 29 |
|
| 30 |
# Global config (shared across users)
|
| 31 |
ws_url = ""
|
|
|
|
| 42 |
self.word_timestamps = []
|
| 43 |
self.current_wpm = "Calibrating..."
|
| 44 |
self.session_start_time = None
|
| 45 |
+
self.last_audio_time = None
|
| 46 |
+
self._start_lock = threading.Lock()
|
| 47 |
|
| 48 |
|
| 49 |
# Load CSS from external file
|
|
|
|
| 61 |
|
| 62 |
return f"""
|
| 63 |
<div class="header-card">
|
| 64 |
+
<h1 class="header-title">{logo_html}Real-time Speech Transcription</h1>
|
| 65 |
+
<p class="header-subtitle">Click the microphone to start streaming transcriptions. The system will warm up automatically - so there will be a small delay</p>
|
| 66 |
<p class="header-subtitle">Talk naturally. Talk fast. Talk ridiculously fast. I can handle it.</p>
|
| 67 |
</div>
|
| 68 |
"""
|
|
|
|
| 209 |
async def send_audio():
|
| 210 |
while session.is_running:
|
| 211 |
try:
|
| 212 |
+
# Check for inactivity timeout
|
| 213 |
+
if session.last_audio_time is not None:
|
| 214 |
+
idle = time.time() - session.last_audio_time
|
| 215 |
+
if idle >= INACTIVITY_TIMEOUT:
|
| 216 |
+
print(f"Inactivity timeout reached ({INACTIVITY_TIMEOUT}s). Closing websocket.")
|
| 217 |
+
session.is_running = False
|
| 218 |
+
session.status_message = "ready"
|
| 219 |
+
break
|
| 220 |
+
|
| 221 |
if session.session_start_time is not None:
|
| 222 |
elapsed = time.time() - session.session_start_time
|
| 223 |
if elapsed >= SESSION_TIMEOUT:
|
|
|
|
| 239 |
continue
|
| 240 |
except Exception as e:
|
| 241 |
print(f"Error sending audio: {e}")
|
| 242 |
+
session.is_running = False
|
| 243 |
break
|
| 244 |
|
| 245 |
async def receive_transcription():
|
|
|
|
| 267 |
session.current_wpm = calculate_wpm(session)
|
| 268 |
except Exception as e:
|
| 269 |
print(f"Error receiving transcription: {e}")
|
| 270 |
+
session.is_running = False
|
| 271 |
|
| 272 |
await asyncio.gather(send_audio(), receive_transcription(), return_exceptions=True)
|
| 273 |
except Exception as e:
|
| 274 |
print(f"WebSocket connection error: {e}")
|
| 275 |
session.status_message = "error"
|
| 276 |
+
finally:
|
| 277 |
+
session.is_running = False
|
| 278 |
|
| 279 |
|
| 280 |
def start_websocket(session):
|
|
|
|
| 287 |
except Exception as e:
|
| 288 |
print(f"WebSocket error: {e}")
|
| 289 |
finally:
|
| 290 |
+
session.is_running = False
|
| 291 |
try:
|
| 292 |
loop.close()
|
| 293 |
except Exception:
|
|
|
|
| 296 |
|
| 297 |
def auto_start_recording(session):
|
| 298 |
"""Automatically start the transcription service when audio begins."""
|
| 299 |
+
# Protect against startup races: Gradio can call `process_audio` concurrently.
|
| 300 |
+
with session._start_lock:
|
| 301 |
+
if session.is_running:
|
| 302 |
+
return get_transcription_html(session.transcription_text, session.status_message, session.current_wpm)
|
| 303 |
+
|
| 304 |
session.transcription_text = ""
|
| 305 |
session.word_timestamps = []
|
| 306 |
session.current_wpm = "Calibrating..."
|
| 307 |
session.session_start_time = time.time()
|
| 308 |
+
session.last_audio_time = time.time()
|
| 309 |
session.status_message = "connecting"
|
| 310 |
thread = threading.Thread(target=start_websocket, args=(session,), daemon=True)
|
| 311 |
thread.start()
|
|
|
|
| 316 |
def clear_history(session):
|
| 317 |
"""Stop the websocket connection and clear all history."""
|
| 318 |
session.is_running = False
|
| 319 |
+
session.last_audio_time = None
|
| 320 |
|
| 321 |
# Clear the audio queue without blocking
|
| 322 |
try:
|
|
|
|
| 342 |
wpm = session.current_wpm if session.is_running else "Calibrating..."
|
| 343 |
return get_transcription_html(session.transcription_text, session.status_message, wpm)
|
| 344 |
|
| 345 |
+
# Update last audio time for inactivity tracking
|
| 346 |
+
session.last_audio_time = time.time()
|
| 347 |
+
|
| 348 |
# Auto-start if not running
|
| 349 |
if not session.is_running and session.status_message not in ["timeout", "error"]:
|
| 350 |
auto_start_recording(session)
|
|
|
|
| 421 |
)
|
| 422 |
|
| 423 |
# Info text
|
| 424 |
+
gr.HTML('<p class="info-text">Click "Clear History" and refresh the page to start a new session.</p>')
|
| 425 |
|
| 426 |
# Event handlers
|
| 427 |
clear_btn.click(
|