Voxtral-Mini-Realtime

Sleeping

App Files Files Community

Joffrey Thomas commited on Feb 4

Commit

68d5702

1 Parent(s): f259931

change app.py

Browse files

Files changed (1) hide show

app.py +109 -49

app.py CHANGED Viewed

@@ -3,9 +3,9 @@ import asyncio
 import base64
 import json
 import os
-import queue
 import threading
 import time
 import gradio as gr
 import numpy as np
@@ -22,19 +22,48 @@ SAMPLE_RATE = 16_000
 WARMUP_DURATION = 2.0  # seconds of silence for warmup
 WPM_WINDOW = 10  # seconds for running mean calculation
 CALIBRATION_PERIOD = 5  # seconds before showing WPM
-SESSION_TIMEOUT = 60  # 60 seconds session timeout
-# Close the websocket after this many seconds without receiving any audio frames.
 INACTIVITY_TIMEOUT = int(os.environ.get("INACTIVITY_TIMEOUT", "20"))
 # Global config (shared across users)
 ws_url = ""
 model = ""
 class UserSession:
     """Per-user session state."""
     def __init__(self):
-        self.audio_queue = queue.Queue(maxsize=100)  # Limit queue size
         self.transcription_text = ""
         self.is_running = False
         self.status_message = "ready"
@@ -43,6 +72,7 @@ class UserSession:
         self.session_start_time = None
         self.last_audio_time = None
         self._start_lock = threading.Lock()
 # Load CSS from external file
@@ -212,7 +242,6 @@ async def websocket_handler(session):
                         if session.last_audio_time is not None:
                             idle = time.time() - session.last_audio_time
                             if idle >= INACTIVITY_TIMEOUT:
-                                print(f"Inactivity timeout reached ({INACTIVITY_TIMEOUT}s). Closing websocket.")
                                 session.is_running = False
                                 session.status_message = "ready"
                                 break
@@ -220,24 +249,23 @@ async def websocket_handler(session):
                         if session.session_start_time is not None:
                             elapsed = time.time() - session.session_start_time
                             if elapsed >= SESSION_TIMEOUT:
-                                print(f"Session timeout reached ({SESSION_TIMEOUT}s)")
                                 session.is_running = False
                                 session.status_message = "timeout"
                                 break
-                        chunk = await asyncio.get_event_loop().run_in_executor(
-                            None, lambda: session.audio_queue.get(timeout=0.1)
-                        )
-                        if session.is_running:
-                            await ws.send(
-                                json.dumps(
-                                    {"type": "input_audio_buffer.append", "audio": chunk}
                                 )
-                            )
-                    except queue.Empty:
-                        continue
                     except Exception as e:
-                        print(f"Error sending audio: {e}")
                         session.is_running = False
                         break
@@ -264,37 +292,45 @@ async def websocket_handler(session):
                                 session.word_timestamps.append(time.time())
                             session.current_wpm = calculate_wpm(session)
                 except Exception as e:
-                    print(f"Error receiving transcription: {e}")
                     session.is_running = False
             await asyncio.gather(send_audio(), receive_transcription(), return_exceptions=True)
     except websockets.exceptions.ConnectionClosed:
-        # Normal closure, not an error
-        pass
     except Exception as e:
         error_msg = str(e) if str(e) else type(e).__name__
-        print(f"WebSocket connection error: {error_msg}")
         session.status_message = "error"
     finally:
         session.is_running = False
 def start_websocket(session):
-    """Start WebSocket connection in background thread."""
     session.is_running = True
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-    try:
-        loop.run_until_complete(websocket_handler(session))
-    except Exception as e:
-        print(f"WebSocket error: {e}")
-    finally:
-        session.is_running = False
-        try:
-            loop.close()
-        except Exception:
-            pass
 def auto_start_recording(session):
@@ -304,14 +340,21 @@ def auto_start_recording(session):
         if session.is_running:
             return get_transcription_html(session.transcription_text, session.status_message, session.current_wpm)
         session.transcription_text = ""
         session.word_timestamps = []
         session.current_wpm = "Calibrating..."
         session.session_start_time = time.time()
         session.last_audio_time = time.time()
         session.status_message = "connecting"
-        thread = threading.Thread(target=start_websocket, args=(session,), daemon=True)
-        thread.start()
     return get_transcription_html(session.transcription_text, session.status_message, session.current_wpm)
@@ -321,12 +364,17 @@ def clear_history(session):
     session.is_running = False
     session.last_audio_time = None
-    # Clear the audio queue without blocking
-    try:
-        while True:
-            session.audio_queue.get_nowait()
-    except queue.Empty:
-        pass
     session.transcription_text = ""
     session.word_timestamps = []
@@ -381,11 +429,12 @@ def process_audio(audio, session):
         pcm16 = (audio_float * 32767).astype(np.int16)
         b64_chunk = base64.b64encode(pcm16.tobytes()).decode("utf-8")
-        # Non-blocking put to queue
         try:
-            session.audio_queue.put_nowait(b64_chunk)
-        except queue.Full:
-            pass  # Skip if queue is full
         return get_transcription_html(session.transcription_text, session.status_message, session.current_wpm)
     except Exception as e:
@@ -393,6 +442,14 @@ def process_audio(audio, session):
         return get_transcription_html(session.transcription_text, session.status_message, session.current_wpm)
 # Gradio interface
 with gr.Blocks(title="Voxtral Real-time Transcription") as demo:
     session_state = gr.State(value=lambda: UserSession())
@@ -437,7 +494,7 @@ with gr.Blocks(title="Voxtral Real-time Transcription") as demo:
         inputs=[audio_input, session_state],
         outputs=[transcription_display],
         show_progress="hidden",
-        concurrency_limit=None,
     )
 model = os.environ.get("MODEL", "mistralai/Voxtral-Mini-4B-Realtime-2602")
@@ -445,5 +502,8 @@ host = os.environ.get("HOST", "")
 ws_url = f"wss://{host}/v1/realtime"
-demo.queue(default_concurrency_limit=20)
-demo.launch(css=CUSTOM_CSS, theme=gr.themes.Base(), ssr_mode=False, max_threads=40)

 import base64
 import json
 import os
 import threading
 import time
+import uuid
 import gradio as gr
 import numpy as np
 WARMUP_DURATION = 2.0  # seconds of silence for warmup
 WPM_WINDOW = 10  # seconds for running mean calculation
 CALIBRATION_PERIOD = 5  # seconds before showing WPM
+SESSION_TIMEOUT = int(os.environ.get("SESSION_TIMEOUT", "60"))
 INACTIVITY_TIMEOUT = int(os.environ.get("INACTIVITY_TIMEOUT", "20"))
+MAX_CONCURRENT_SESSIONS = int(os.environ.get("MAX_SESSIONS", "50"))
 # Global config (shared across users)
 ws_url = ""
 model = ""
+# Global event loop for all websocket connections (runs in single background thread)
+_event_loop = None
+_loop_thread = None
+_loop_lock = threading.Lock()
+# Track active sessions for resource management
+_active_sessions = {}
+_sessions_lock = threading.Lock()
+def get_event_loop():
+    """Get or create the shared event loop."""
+    global _event_loop, _loop_thread
+    with _loop_lock:
+        if _event_loop is None or not _event_loop.is_running():
+            _event_loop = asyncio.new_event_loop()
+            _loop_thread = threading.Thread(target=_run_event_loop, daemon=True)
+            _loop_thread.start()
+            # Wait for loop to start
+            time.sleep(0.1)
+    return _event_loop
+def _run_event_loop():
+    """Run the event loop in background thread."""
+    asyncio.set_event_loop(_event_loop)
+    _event_loop.run_forever()
 class UserSession:
     """Per-user session state."""
     def __init__(self):
+        self.session_id = str(uuid.uuid4())
+        self.audio_queue = asyncio.Queue(maxsize=100)  # Use async queue
         self.transcription_text = ""
         self.is_running = False
         self.status_message = "ready"
         self.session_start_time = None
         self.last_audio_time = None
         self._start_lock = threading.Lock()
+        self._task = None  # Track the async task
 # Load CSS from external file
                         if session.last_audio_time is not None:
                             idle = time.time() - session.last_audio_time
                             if idle >= INACTIVITY_TIMEOUT:
                                 session.is_running = False
                                 session.status_message = "ready"
                                 break
                         if session.session_start_time is not None:
                             elapsed = time.time() - session.session_start_time
                             if elapsed >= SESSION_TIMEOUT:
                                 session.is_running = False
                                 session.status_message = "timeout"
                                 break
+                        try:
+                            chunk = await asyncio.wait_for(session.audio_queue.get(), timeout=0.1)
+                            if session.is_running:
+                                await ws.send(
+                                    json.dumps(
+                                        {"type": "input_audio_buffer.append", "audio": chunk}
+                                    )
                                 )
+                        except asyncio.TimeoutError:
+                            continue
                     except Exception as e:
+                        if session.is_running:  # Only log if unexpected
+                            print(f"Error sending audio: {e}")
                         session.is_running = False
                         break
                                 session.word_timestamps.append(time.time())
                             session.current_wpm = calculate_wpm(session)
+                except asyncio.CancelledError:
+                    pass  # Normal cancellation
                 except Exception as e:
+                    if session.is_running:
+                        print(f"Error receiving transcription: {e}")
                     session.is_running = False
             await asyncio.gather(send_audio(), receive_transcription(), return_exceptions=True)
+    except asyncio.CancelledError:
+        pass  # Normal cancellation
     except websockets.exceptions.ConnectionClosed:
+        pass  # Normal closure
     except Exception as e:
         error_msg = str(e) if str(e) else type(e).__name__
+        if "ConnectionReset" not in error_msg:  # Suppress common disconnect errors
+            print(f"WebSocket error: {error_msg}")
         session.status_message = "error"
     finally:
         session.is_running = False
+        # Remove from active sessions
+        with _sessions_lock:
+            _active_sessions.pop(session.session_id, None)
 def start_websocket(session):
+    """Start WebSocket connection using the shared event loop."""
     session.is_running = True
+    # Register this session
+    with _sessions_lock:
+        _active_sessions[session.session_id] = session
+    # Submit to the shared event loop
+    loop = get_event_loop()
+    future = asyncio.run_coroutine_threadsafe(websocket_handler(session), loop)
+    session._task = future
+    # Don't block - the coroutine runs in the background
+    # Cleanup happens in websocket_handler's finally block
 def auto_start_recording(session):
         if session.is_running:
             return get_transcription_html(session.transcription_text, session.status_message, session.current_wpm)
+        # Check if we've hit max concurrent sessions
+        with _sessions_lock:
+            if len(_active_sessions) >= MAX_CONCURRENT_SESSIONS:
+                session.status_message = "error"
+                return get_transcription_html("Server at capacity. Please try again later.", "error", "")
         session.transcription_text = ""
         session.word_timestamps = []
         session.current_wpm = "Calibrating..."
         session.session_start_time = time.time()
         session.last_audio_time = time.time()
         session.status_message = "connecting"
+        # Start websocket (now non-blocking, uses shared event loop)
+        start_websocket(session)
     return get_transcription_html(session.transcription_text, session.status_message, session.current_wpm)
     session.is_running = False
     session.last_audio_time = None
+    # Cancel the running task if any
+    if session._task is not None:
+        session._task.cancel()
+        session._task = None
+    # Remove from active sessions
+    with _sessions_lock:
+        _active_sessions.pop(session.session_id, None)
+    # Create a fresh async queue (old one may have items)
+    session.audio_queue = asyncio.Queue(maxsize=100)
     session.transcription_text = ""
     session.word_timestamps = []
         pcm16 = (audio_float * 32767).astype(np.int16)
         b64_chunk = base64.b64encode(pcm16.tobytes()).decode("utf-8")
+        # Non-blocking put to async queue (thread-safe)
         try:
+            loop = get_event_loop()
+            loop.call_soon_threadsafe(lambda: _safe_queue_put(session.audio_queue, b64_chunk))
+        except Exception:
+            pass  # Skip if queue is full or loop issues
         return get_transcription_html(session.transcription_text, session.status_message, session.current_wpm)
     except Exception as e:
         return get_transcription_html(session.transcription_text, session.status_message, session.current_wpm)
+def _safe_queue_put(q, item):
+    """Safely put item in async queue without blocking."""
+    try:
+        q.put_nowait(item)
+    except asyncio.QueueFull:
+        pass  # Drop frame if queue is full
 # Gradio interface
 with gr.Blocks(title="Voxtral Real-time Transcription") as demo:
     session_state = gr.State(value=lambda: UserSession())
         inputs=[audio_input, session_state],
         outputs=[transcription_display],
         show_progress="hidden",
+        concurrency_limit=100,  # Allow many concurrent audio streams
     )
 model = os.environ.get("MODEL", "mistralai/Voxtral-Mini-4B-Realtime-2602")
 ws_url = f"wss://{host}/v1/realtime"
+# Initialize the shared event loop at startup
+get_event_loop()
+demo.queue(default_concurrency_limit=50)
+demo.launch(css=CUSTOM_CSS, theme=gr.themes.Base(), ssr_mode=False, max_threads=100)