Joffrey Thomas commited on
Commit
2686978
·
1 Parent(s): 5bfc9b4

Innactivity disconnect

Browse files
Files changed (1) hide show
  1. app.py +32 -4
app.py CHANGED
@@ -23,7 +23,9 @@ SAMPLE_RATE = 16_000
23
  WARMUP_DURATION = 2.0 # seconds of silence for warmup
24
  WPM_WINDOW = 10 # seconds for running mean calculation
25
  CALIBRATION_PERIOD = 5 # seconds before showing WPM
26
- SESSION_TIMEOUT = 300 # 5 minutes session timeout
 
 
27
 
28
  # Global config (shared across users)
29
  ws_url = ""
@@ -40,6 +42,8 @@ class UserSession:
40
  self.word_timestamps = []
41
  self.current_wpm = "Calibrating..."
42
  self.session_start_time = None
 
 
43
 
44
 
45
  # Load CSS from external file
@@ -57,7 +61,8 @@ def get_header_html() -> str:
57
 
58
  return f"""
59
  <div class="header-card">
60
- <h1 class="header-title">{logo_html}Speech Recognition Speed Run 🏃‍♀️</h1>
 
61
  <p class="header-subtitle">Talk naturally. Talk fast. Talk ridiculously fast. I can handle it.</p>
62
  </div>
63
  """
@@ -204,6 +209,15 @@ async def websocket_handler(session):
204
  async def send_audio():
205
  while session.is_running:
206
  try:
 
 
 
 
 
 
 
 
 
207
  if session.session_start_time is not None:
208
  elapsed = time.time() - session.session_start_time
209
  if elapsed >= SESSION_TIMEOUT:
@@ -225,6 +239,7 @@ async def websocket_handler(session):
225
  continue
226
  except Exception as e:
227
  print(f"Error sending audio: {e}")
 
228
  break
229
 
230
  async def receive_transcription():
@@ -252,11 +267,14 @@ async def websocket_handler(session):
252
  session.current_wpm = calculate_wpm(session)
253
  except Exception as e:
254
  print(f"Error receiving transcription: {e}")
 
255
 
256
  await asyncio.gather(send_audio(), receive_transcription(), return_exceptions=True)
257
  except Exception as e:
258
  print(f"WebSocket connection error: {e}")
259
  session.status_message = "error"
 
 
260
 
261
 
262
  def start_websocket(session):
@@ -269,6 +287,7 @@ def start_websocket(session):
269
  except Exception as e:
270
  print(f"WebSocket error: {e}")
271
  finally:
 
272
  try:
273
  loop.close()
274
  except Exception:
@@ -277,11 +296,16 @@ def start_websocket(session):
277
 
278
  def auto_start_recording(session):
279
  """Automatically start the transcription service when audio begins."""
280
- if not session.is_running:
 
 
 
 
281
  session.transcription_text = ""
282
  session.word_timestamps = []
283
  session.current_wpm = "Calibrating..."
284
  session.session_start_time = time.time()
 
285
  session.status_message = "connecting"
286
  thread = threading.Thread(target=start_websocket, args=(session,), daemon=True)
287
  thread.start()
@@ -292,6 +316,7 @@ def auto_start_recording(session):
292
  def clear_history(session):
293
  """Stop the websocket connection and clear all history."""
294
  session.is_running = False
 
295
 
296
  # Clear the audio queue without blocking
297
  try:
@@ -317,6 +342,9 @@ def process_audio(audio, session):
317
  wpm = session.current_wpm if session.is_running else "Calibrating..."
318
  return get_transcription_html(session.transcription_text, session.status_message, wpm)
319
 
 
 
 
320
  # Auto-start if not running
321
  if not session.is_running and session.status_message not in ["timeout", "error"]:
322
  auto_start_recording(session)
@@ -393,7 +421,7 @@ with gr.Blocks(title="Voxtral Real-time Transcription") as demo:
393
  )
394
 
395
  # Info text
396
- gr.HTML('<p class="info-text">Click "Stop Recording" and "Clear History" to start a new session.</p>')
397
 
398
  # Event handlers
399
  clear_btn.click(
 
23
  WARMUP_DURATION = 2.0 # seconds of silence for warmup
24
  WPM_WINDOW = 10 # seconds for running mean calculation
25
  CALIBRATION_PERIOD = 5 # seconds before showing WPM
26
+ SESSION_TIMEOUT = 60 # 60 seconds session timeout
27
+ # Close the websocket after this many seconds without receiving any audio frames.
28
+ INACTIVITY_TIMEOUT = int(os.environ.get("INACTIVITY_TIMEOUT", "20"))
29
 
30
  # Global config (shared across users)
31
  ws_url = ""
 
42
  self.word_timestamps = []
43
  self.current_wpm = "Calibrating..."
44
  self.session_start_time = None
45
+ self.last_audio_time = None
46
+ self._start_lock = threading.Lock()
47
 
48
 
49
  # Load CSS from external file
 
61
 
62
  return f"""
63
  <div class="header-card">
64
+ <h1 class="header-title">{logo_html}Real-time Speech Transcription</h1>
65
+ <p class="header-subtitle">Click the microphone to start streaming transcriptions. The system will warm up automatically - so there will be a small delay</p>
66
  <p class="header-subtitle">Talk naturally. Talk fast. Talk ridiculously fast. I can handle it.</p>
67
  </div>
68
  """
 
209
  async def send_audio():
210
  while session.is_running:
211
  try:
212
+ # Check for inactivity timeout
213
+ if session.last_audio_time is not None:
214
+ idle = time.time() - session.last_audio_time
215
+ if idle >= INACTIVITY_TIMEOUT:
216
+ print(f"Inactivity timeout reached ({INACTIVITY_TIMEOUT}s). Closing websocket.")
217
+ session.is_running = False
218
+ session.status_message = "ready"
219
+ break
220
+
221
  if session.session_start_time is not None:
222
  elapsed = time.time() - session.session_start_time
223
  if elapsed >= SESSION_TIMEOUT:
 
239
  continue
240
  except Exception as e:
241
  print(f"Error sending audio: {e}")
242
+ session.is_running = False
243
  break
244
 
245
  async def receive_transcription():
 
267
  session.current_wpm = calculate_wpm(session)
268
  except Exception as e:
269
  print(f"Error receiving transcription: {e}")
270
+ session.is_running = False
271
 
272
  await asyncio.gather(send_audio(), receive_transcription(), return_exceptions=True)
273
  except Exception as e:
274
  print(f"WebSocket connection error: {e}")
275
  session.status_message = "error"
276
+ finally:
277
+ session.is_running = False
278
 
279
 
280
  def start_websocket(session):
 
287
  except Exception as e:
288
  print(f"WebSocket error: {e}")
289
  finally:
290
+ session.is_running = False
291
  try:
292
  loop.close()
293
  except Exception:
 
296
 
297
  def auto_start_recording(session):
298
  """Automatically start the transcription service when audio begins."""
299
+ # Protect against startup races: Gradio can call `process_audio` concurrently.
300
+ with session._start_lock:
301
+ if session.is_running:
302
+ return get_transcription_html(session.transcription_text, session.status_message, session.current_wpm)
303
+
304
  session.transcription_text = ""
305
  session.word_timestamps = []
306
  session.current_wpm = "Calibrating..."
307
  session.session_start_time = time.time()
308
+ session.last_audio_time = time.time()
309
  session.status_message = "connecting"
310
  thread = threading.Thread(target=start_websocket, args=(session,), daemon=True)
311
  thread.start()
 
316
  def clear_history(session):
317
  """Stop the websocket connection and clear all history."""
318
  session.is_running = False
319
+ session.last_audio_time = None
320
 
321
  # Clear the audio queue without blocking
322
  try:
 
342
  wpm = session.current_wpm if session.is_running else "Calibrating..."
343
  return get_transcription_html(session.transcription_text, session.status_message, wpm)
344
 
345
+ # Update last audio time for inactivity tracking
346
+ session.last_audio_time = time.time()
347
+
348
  # Auto-start if not running
349
  if not session.is_running and session.status_message not in ["timeout", "error"]:
350
  auto_start_recording(session)
 
421
  )
422
 
423
  # Info text
424
+ gr.HTML('<p class="info-text">Click "Clear History" and refresh the page to start a new session.</p>')
425
 
426
  # Event handlers
427
  clear_btn.click(