Spaces:

mistralai
/

Voxtral-Mini-Realtime

Paused

App Files Files Community

Joffrey Thomas commited on Feb 4

Commit

2686978

1 Parent(s): 5bfc9b4

Innactivity disconnect

Browse files

Files changed (1) hide show

app.py +32 -4

app.py CHANGED Viewed

@@ -23,7 +23,9 @@ SAMPLE_RATE = 16_000
 WARMUP_DURATION = 2.0  # seconds of silence for warmup
 WPM_WINDOW = 10  # seconds for running mean calculation
 CALIBRATION_PERIOD = 5  # seconds before showing WPM
-SESSION_TIMEOUT = 300  # 5 minutes session timeout
 # Global config (shared across users)
 ws_url = ""
@@ -40,6 +42,8 @@ class UserSession:
         self.word_timestamps = []
         self.current_wpm = "Calibrating..."
         self.session_start_time = None
 # Load CSS from external file
@@ -57,7 +61,8 @@ def get_header_html() -> str:
     return f"""
     <div class="header-card">
-        <h1 class="header-title">{logo_html}Speech Recognition Speed Run 🏃‍♀️</h1>
         <p class="header-subtitle">Talk naturally. Talk fast. Talk ridiculously fast. I can handle it.</p>
     </div>
     """
@@ -204,6 +209,15 @@ async def websocket_handler(session):
             async def send_audio():
                 while session.is_running:
                     try:
                         if session.session_start_time is not None:
                             elapsed = time.time() - session.session_start_time
                             if elapsed >= SESSION_TIMEOUT:
@@ -225,6 +239,7 @@ async def websocket_handler(session):
                         continue
                     except Exception as e:
                         print(f"Error sending audio: {e}")
                         break
             async def receive_transcription():
@@ -252,11 +267,14 @@ async def websocket_handler(session):
                             session.current_wpm = calculate_wpm(session)
                 except Exception as e:
                     print(f"Error receiving transcription: {e}")
             await asyncio.gather(send_audio(), receive_transcription(), return_exceptions=True)
     except Exception as e:
         print(f"WebSocket connection error: {e}")
         session.status_message = "error"
 def start_websocket(session):
@@ -269,6 +287,7 @@ def start_websocket(session):
     except Exception as e:
         print(f"WebSocket error: {e}")
     finally:
         try:
             loop.close()
         except Exception:
@@ -277,11 +296,16 @@ def start_websocket(session):
 def auto_start_recording(session):
     """Automatically start the transcription service when audio begins."""
-    if not session.is_running:
         session.transcription_text = ""
         session.word_timestamps = []
         session.current_wpm = "Calibrating..."
         session.session_start_time = time.time()
         session.status_message = "connecting"
         thread = threading.Thread(target=start_websocket, args=(session,), daemon=True)
         thread.start()
@@ -292,6 +316,7 @@ def auto_start_recording(session):
 def clear_history(session):
     """Stop the websocket connection and clear all history."""
     session.is_running = False
     # Clear the audio queue without blocking
     try:
@@ -317,6 +342,9 @@ def process_audio(audio, session):
             wpm = session.current_wpm if session.is_running else "Calibrating..."
             return get_transcription_html(session.transcription_text, session.status_message, wpm)
         # Auto-start if not running
         if not session.is_running and session.status_message not in ["timeout", "error"]:
             auto_start_recording(session)
@@ -393,7 +421,7 @@ with gr.Blocks(title="Voxtral Real-time Transcription") as demo:
     )
     # Info text
-    gr.HTML('<p class="info-text">Click "Stop Recording" and "Clear History" to start a new session.</p>')
     # Event handlers
     clear_btn.click(

 WARMUP_DURATION = 2.0  # seconds of silence for warmup
 WPM_WINDOW = 10  # seconds for running mean calculation
 CALIBRATION_PERIOD = 5  # seconds before showing WPM
+SESSION_TIMEOUT = 60  # 60 seconds session timeout
+# Close the websocket after this many seconds without receiving any audio frames.
+INACTIVITY_TIMEOUT = int(os.environ.get("INACTIVITY_TIMEOUT", "20"))
 # Global config (shared across users)
 ws_url = ""
         self.word_timestamps = []
         self.current_wpm = "Calibrating..."
         self.session_start_time = None
+        self.last_audio_time = None
+        self._start_lock = threading.Lock()
 # Load CSS from external file
     return f"""
     <div class="header-card">
+        <h1 class="header-title">{logo_html}Real-time Speech Transcription</h1>
+        <p class="header-subtitle">Click the microphone to start streaming transcriptions. The system will warm up automatically - so there will be a small delay</p>
         <p class="header-subtitle">Talk naturally. Talk fast. Talk ridiculously fast. I can handle it.</p>
     </div>
     """
             async def send_audio():
                 while session.is_running:
                     try:
+                        # Check for inactivity timeout
+                        if session.last_audio_time is not None:
+                            idle = time.time() - session.last_audio_time
+                            if idle >= INACTIVITY_TIMEOUT:
+                                print(f"Inactivity timeout reached ({INACTIVITY_TIMEOUT}s). Closing websocket.")
+                                session.is_running = False
+                                session.status_message = "ready"
+                                break
                         if session.session_start_time is not None:
                             elapsed = time.time() - session.session_start_time
                             if elapsed >= SESSION_TIMEOUT:
                         continue
                     except Exception as e:
                         print(f"Error sending audio: {e}")
+                        session.is_running = False
                         break
             async def receive_transcription():
                             session.current_wpm = calculate_wpm(session)
                 except Exception as e:
                     print(f"Error receiving transcription: {e}")
+                    session.is_running = False
             await asyncio.gather(send_audio(), receive_transcription(), return_exceptions=True)
     except Exception as e:
         print(f"WebSocket connection error: {e}")
         session.status_message = "error"
+    finally:
+        session.is_running = False
 def start_websocket(session):
     except Exception as e:
         print(f"WebSocket error: {e}")
     finally:
+        session.is_running = False
         try:
             loop.close()
         except Exception:
 def auto_start_recording(session):
     """Automatically start the transcription service when audio begins."""
+    # Protect against startup races: Gradio can call `process_audio` concurrently.
+    with session._start_lock:
+        if session.is_running:
+            return get_transcription_html(session.transcription_text, session.status_message, session.current_wpm)
         session.transcription_text = ""
         session.word_timestamps = []
         session.current_wpm = "Calibrating..."
         session.session_start_time = time.time()
+        session.last_audio_time = time.time()
         session.status_message = "connecting"
         thread = threading.Thread(target=start_websocket, args=(session,), daemon=True)
         thread.start()
 def clear_history(session):
     """Stop the websocket connection and clear all history."""
     session.is_running = False
+    session.last_audio_time = None
     # Clear the audio queue without blocking
     try:
             wpm = session.current_wpm if session.is_running else "Calibrating..."
             return get_transcription_html(session.transcription_text, session.status_message, wpm)
+        # Update last audio time for inactivity tracking
+        session.last_audio_time = time.time()
         # Auto-start if not running
         if not session.is_running and session.status_message not in ["timeout", "error"]:
             auto_start_recording(session)
     )
     # Info text
+    gr.HTML('<p class="info-text">Click "Clear History" and refresh the page to start a new session.</p>')
     # Event handlers
     clear_btn.click(