Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,837 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import base64
|
| 3 |
+
import os
|
| 4 |
+
import queue
|
| 5 |
+
import threading
|
| 6 |
+
import time
|
| 7 |
+
import uuid
|
| 8 |
+
from typing import AsyncIterator
|
| 9 |
+
|
| 10 |
+
import gradio as gr
|
| 11 |
+
import numpy as np
|
| 12 |
+
|
| 13 |
+
from mistralai import Mistral
|
| 14 |
+
from mistralai.extra.realtime import UnknownRealtimeEvent
|
| 15 |
+
from mistralai.models import (
|
| 16 |
+
AudioFormat,
|
| 17 |
+
RealtimeTranscriptionError,
|
| 18 |
+
RealtimeTranscriptionSessionCreated,
|
| 19 |
+
TranscriptionStreamDone,
|
| 20 |
+
TranscriptionStreamTextDelta,
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
# --- SECURITY ENHANCEMENT ---
|
| 24 |
+
# kkk = Your Mistral API Key
|
| 25 |
+
# ppp = The password you want users to enter
|
| 26 |
+
MISTRAL_PRIVATE_KEY = os.environ.get("kkk", "")
|
| 27 |
+
APP_PASSWORD = os.environ.get("ppp", "")
|
| 28 |
+
# ----------------------------
|
| 29 |
+
|
| 30 |
+
# Load Voxtral icon as base64
|
| 31 |
+
VOXTRAL_ICON_B64 = ""
|
| 32 |
+
icon_path = os.path.join(os.path.dirname(__file__), "assets", "voxtral.png")
|
| 33 |
+
if os.path.exists(icon_path):
|
| 34 |
+
with open(icon_path, "rb") as f:
|
| 35 |
+
VOXTRAL_ICON_B64 = base64.b64encode(f.read()).decode("utf-8")
|
| 36 |
+
|
| 37 |
+
SAMPLE_RATE = 16_000
|
| 38 |
+
WARMUP_DURATION = 2.0 # seconds of silence for warmup
|
| 39 |
+
WPM_WINDOW = 10 # seconds for running mean calculation
|
| 40 |
+
CALIBRATION_PERIOD = 5 # seconds before showing WPM
|
| 41 |
+
SESSION_TIMEOUT = int(os.environ.get("SESSION_TIMEOUT", "35")) # Max 30s per session
|
| 42 |
+
INACTIVITY_TIMEOUT = int(os.environ.get("INACTIVITY_TIMEOUT", "10")) # Close after 10s silence
|
| 43 |
+
MAX_CONCURRENT_SESSIONS = int(os.environ.get("MAX_SESSIONS", "50"))
|
| 44 |
+
|
| 45 |
+
# Global config (shared across users)
|
| 46 |
+
MISTRAL_BASE_URL = "wss://api.mistral.ai"
|
| 47 |
+
MODEL = "voxtral-mini-transcribe-realtime-2602"
|
| 48 |
+
|
| 49 |
+
# Global event loop for all websocket connections (runs in single background thread)
|
| 50 |
+
_event_loop = None
|
| 51 |
+
_loop_thread = None
|
| 52 |
+
_loop_lock = threading.Lock()
|
| 53 |
+
|
| 54 |
+
# Track active sessions for resource management
|
| 55 |
+
_active_sessions = {}
|
| 56 |
+
_sessions_lock = threading.Lock()
|
| 57 |
+
|
| 58 |
+
# Global session registry - sessions are stored here and looked up by ID
|
| 59 |
+
_session_registry = {}
|
| 60 |
+
_registry_lock = threading.Lock()
|
| 61 |
+
_last_cleanup = time.time()
|
| 62 |
+
SESSION_REGISTRY_CLEANUP_INTERVAL = 30 # seconds
|
| 63 |
+
SESSION_MAX_AGE = 30 # 30 seconds - remove sessions older than this
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def get_or_create_session(session_id: str = None) -> "UserSession":
|
| 67 |
+
"""Get existing session by ID or create a new one."""
|
| 68 |
+
global _last_cleanup
|
| 69 |
+
|
| 70 |
+
# Periodic cleanup of stale sessions
|
| 71 |
+
now = time.time()
|
| 72 |
+
if now - _last_cleanup > SESSION_REGISTRY_CLEANUP_INTERVAL:
|
| 73 |
+
_cleanup_stale_sessions()
|
| 74 |
+
_last_cleanup = now
|
| 75 |
+
|
| 76 |
+
with _registry_lock:
|
| 77 |
+
if session_id and session_id in _session_registry:
|
| 78 |
+
session = _session_registry[session_id]
|
| 79 |
+
# Validate the session is actually a UserSession instance
|
| 80 |
+
if isinstance(session, UserSession):
|
| 81 |
+
session._last_accessed = now
|
| 82 |
+
return session
|
| 83 |
+
else:
|
| 84 |
+
# Corrupted registry entry - remove and create new
|
| 85 |
+
print(f"WARNING: Corrupted session registry entry for {session_id}: {type(session)}")
|
| 86 |
+
del _session_registry[session_id]
|
| 87 |
+
|
| 88 |
+
# Create new session
|
| 89 |
+
session = UserSession()
|
| 90 |
+
session._last_accessed = now
|
| 91 |
+
_session_registry[session.session_id] = session
|
| 92 |
+
return session
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def _cleanup_stale_sessions():
|
| 96 |
+
"""Remove sessions that haven't been accessed recently."""
|
| 97 |
+
now = time.time()
|
| 98 |
+
to_remove_from_registry = []
|
| 99 |
+
to_remove_from_active = []
|
| 100 |
+
|
| 101 |
+
# Need both locks to safely check both dictionaries
|
| 102 |
+
with _registry_lock:
|
| 103 |
+
with _sessions_lock:
|
| 104 |
+
# Find stale sessions in registry
|
| 105 |
+
for session_id, session in _session_registry.items():
|
| 106 |
+
# NEVER remove if still in active_sessions (websocket still running)
|
| 107 |
+
if session_id in _active_sessions:
|
| 108 |
+
continue
|
| 109 |
+
|
| 110 |
+
last_accessed = getattr(session, '_last_accessed', 0)
|
| 111 |
+
# Remove if: not running AND not active AND old
|
| 112 |
+
if not session.is_running and (now - last_accessed > SESSION_MAX_AGE):
|
| 113 |
+
to_remove_from_registry.append(session_id)
|
| 114 |
+
|
| 115 |
+
# Find orphaned sessions in active_sessions (not in registry anymore)
|
| 116 |
+
for session_id, session in list(_active_sessions.items()):
|
| 117 |
+
if session_id not in _session_registry:
|
| 118 |
+
# Orphaned - mark for removal
|
| 119 |
+
if not session.is_running:
|
| 120 |
+
to_remove_from_active.append(session_id)
|
| 121 |
+
|
| 122 |
+
# Clean up registry
|
| 123 |
+
for session_id in to_remove_from_registry:
|
| 124 |
+
_session_registry.pop(session_id, None)
|
| 125 |
+
|
| 126 |
+
# Clean up orphaned active sessions
|
| 127 |
+
for session_id in to_remove_from_active:
|
| 128 |
+
_active_sessions.pop(session_id, None)
|
| 129 |
+
|
| 130 |
+
active_count = len(_active_sessions)
|
| 131 |
+
registry_count = len(_session_registry)
|
| 132 |
+
|
| 133 |
+
total_cleaned = len(to_remove_from_registry) + len(to_remove_from_active)
|
| 134 |
+
if total_cleaned > 0:
|
| 135 |
+
print(f"Cleaned up {len(to_remove_from_registry)} stale + {len(to_remove_from_active)} orphaned sessions. Registry: {registry_count}, Active: {active_count}")
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def cleanup_session(session_id: str):
|
| 139 |
+
"""Remove session from registry."""
|
| 140 |
+
with _registry_lock:
|
| 141 |
+
_session_registry.pop(session_id, None)
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def kill_all_sessions():
|
| 145 |
+
"""Emergency cleanup - kill ALL active sessions to free capacity."""
|
| 146 |
+
killed_count = 0
|
| 147 |
+
|
| 148 |
+
with _sessions_lock:
|
| 149 |
+
sessions_to_kill = list(_active_sessions.values())
|
| 150 |
+
|
| 151 |
+
for session in sessions_to_kill:
|
| 152 |
+
try:
|
| 153 |
+
session.is_running = False
|
| 154 |
+
session._stopped_by_user = True
|
| 155 |
+
|
| 156 |
+
# Signal stop event
|
| 157 |
+
if session._stop_event is not None:
|
| 158 |
+
loop = get_event_loop()
|
| 159 |
+
try:
|
| 160 |
+
asyncio.run_coroutine_threadsafe(
|
| 161 |
+
_set_stop_event_sync(session._stop_event), loop
|
| 162 |
+
)
|
| 163 |
+
except Exception:
|
| 164 |
+
pass
|
| 165 |
+
session._stop_event = None
|
| 166 |
+
|
| 167 |
+
# Cancel the task
|
| 168 |
+
if session._task is not None:
|
| 169 |
+
session._task.cancel()
|
| 170 |
+
session._task = None
|
| 171 |
+
|
| 172 |
+
killed_count += 1
|
| 173 |
+
except Exception as e:
|
| 174 |
+
print(f"Error killing session {session.session_id[:8]}: {e}")
|
| 175 |
+
|
| 176 |
+
# Clear both dictionaries
|
| 177 |
+
with _registry_lock:
|
| 178 |
+
with _sessions_lock:
|
| 179 |
+
_active_sessions.clear()
|
| 180 |
+
_session_registry.clear()
|
| 181 |
+
|
| 182 |
+
print(f"CAPACITY RESET: Killed {killed_count} sessions. All sessions cleared.")
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
async def _set_stop_event_sync(event):
|
| 186 |
+
"""Helper to set asyncio event."""
|
| 187 |
+
event.set()
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
def get_event_loop():
|
| 191 |
+
"""Get or create the shared event loop."""
|
| 192 |
+
global _event_loop, _loop_thread
|
| 193 |
+
with _loop_lock:
|
| 194 |
+
if _event_loop is None or not _event_loop.is_running():
|
| 195 |
+
_event_loop = asyncio.new_event_loop()
|
| 196 |
+
_loop_thread = threading.Thread(target=_run_event_loop, daemon=True)
|
| 197 |
+
_loop_thread.start()
|
| 198 |
+
# Wait for loop to start
|
| 199 |
+
time.sleep(0.1)
|
| 200 |
+
return _event_loop
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
def _run_event_loop():
|
| 204 |
+
"""Run the event loop in background thread."""
|
| 205 |
+
asyncio.set_event_loop(_event_loop)
|
| 206 |
+
_event_loop.run_forever()
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
class UserSession:
|
| 210 |
+
"""Per-user session state."""
|
| 211 |
+
def __init__(self, api_key: str = None):
|
| 212 |
+
self.session_id = str(uuid.uuid4())
|
| 213 |
+
self.api_key = api_key
|
| 214 |
+
# Use a thread-safe queue for cross-thread communication
|
| 215 |
+
self._audio_queue = queue.Queue(maxsize=200)
|
| 216 |
+
self.transcription_text = ""
|
| 217 |
+
self.is_running = False
|
| 218 |
+
self.status_message = "ready"
|
| 219 |
+
self.word_timestamps = []
|
| 220 |
+
self.current_wpm = "Calibrating..."
|
| 221 |
+
self.session_start_time = None
|
| 222 |
+
self.last_audio_time = None
|
| 223 |
+
self._start_lock = threading.Lock()
|
| 224 |
+
self._task = None # Track the async task
|
| 225 |
+
self._stop_event = None # Event to signal stop
|
| 226 |
+
self._stopped_by_user = False # Track if user explicitly stopped
|
| 227 |
+
|
| 228 |
+
@property
|
| 229 |
+
def audio_queue(self):
|
| 230 |
+
"""Return the thread-safe queue."""
|
| 231 |
+
return self._audio_queue
|
| 232 |
+
|
| 233 |
+
def reset_queue(self):
|
| 234 |
+
"""Reset the audio queue."""
|
| 235 |
+
self._audio_queue = queue.Queue(maxsize=200)
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
# Load CSS from external file
|
| 239 |
+
css_path = os.path.join(os.path.dirname(__file__), "style.css")
|
| 240 |
+
with open(css_path, "r") as f:
|
| 241 |
+
CUSTOM_CSS = f.read()
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
def get_header_html() -> str:
|
| 245 |
+
"""Generate the header HTML with Voxtral logo."""
|
| 246 |
+
if VOXTRAL_ICON_B64:
|
| 247 |
+
logo_html = f'<img src="data:image/png;base64,{VOXTRAL_ICON_B64}" alt="Voxtral" class="header-logo" />'
|
| 248 |
+
else:
|
| 249 |
+
logo_html = ''
|
| 250 |
+
|
| 251 |
+
return f"""
|
| 252 |
+
<div class="header-card">
|
| 253 |
+
<h1 class="header-title">{logo_html}Real-time Speech Transcription</h1>
|
| 254 |
+
<p class="header-subtitle">Enter the <b>App Password</b> below, then click the microphone to start streaming transcriptions.</p>
|
| 255 |
+
<p class="header-subtitle">Talk naturally. Talk fast. Talk ridiculously fast. I can handle it.</p>
|
| 256 |
+
</div>
|
| 257 |
+
"""
|
| 258 |
+
|
| 259 |
+
|
| 260 |
+
def get_status_html(status: str) -> str:
|
| 261 |
+
"""Generate status badge HTML based on current status."""
|
| 262 |
+
status_configs = {
|
| 263 |
+
"ready": ("STANDBY", "status-ready", ""),
|
| 264 |
+
"connecting": ("CONNECTING", "status-connecting", "fast"),
|
| 265 |
+
"warming": ("WARMING UP", "status-warming", "fast"),
|
| 266 |
+
"listening": ("LISTENING", "status-listening", "animate"),
|
| 267 |
+
"timeout": ("TIMEOUT", "status-timeout", ""),
|
| 268 |
+
"error": ("ERROR", "status-error", ""),
|
| 269 |
+
}
|
| 270 |
+
label, css_class, dot_class = status_configs.get(status, status_configs["ready"])
|
| 271 |
+
dot_anim = f" {dot_class}" if dot_class else ""
|
| 272 |
+
|
| 273 |
+
return f"""<div class="status-badge {css_class}"><span class="status-dot{dot_anim}"></span><span style="color: inherit !important;">{label}</span></div>"""
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
def get_transcription_html(transcript: str, status: str, wpm: str = "Calibrating...") -> str:
|
| 277 |
+
"""Generate the full transcription card HTML."""
|
| 278 |
+
status_badge = get_status_html(status)
|
| 279 |
+
wpm_badge = f'<div class="wpm-badge"><span style="color: #1E1E1E !important;">{wpm}</span></div>'
|
| 280 |
+
|
| 281 |
+
if transcript:
|
| 282 |
+
cursor_html = '<span class="transcript-cursor"></span>' if status == "listening" else ""
|
| 283 |
+
content_html = f"""
|
| 284 |
+
<div class="transcript-text" style="color: #000000 !important;">
|
| 285 |
+
{transcript}{cursor_html}
|
| 286 |
+
</div>
|
| 287 |
+
"""
|
| 288 |
+
elif status in ["listening", "warming", "connecting"]:
|
| 289 |
+
content_html = """
|
| 290 |
+
<div class="empty-state">
|
| 291 |
+
<div class="empty-dots">
|
| 292 |
+
<div class="empty-dot"></div>
|
| 293 |
+
<div class="empty-dot"></div>
|
| 294 |
+
<div class="empty-dot"></div>
|
| 295 |
+
</div>
|
| 296 |
+
<p class="empty-text" style="color: #555555 !important;">Listening for audio...</p>
|
| 297 |
+
</div>
|
| 298 |
+
"""
|
| 299 |
+
elif status == "timeout":
|
| 300 |
+
content_html = """
|
| 301 |
+
<div class="empty-state">
|
| 302 |
+
<p class="empty-text" style="color: #B30400 !important;">Session timeout (5 minutes)</p>
|
| 303 |
+
<p class="empty-text" style="color: #555555 !important;">Click 'Clear History' and refresh to restart.</p>
|
| 304 |
+
</div>
|
| 305 |
+
"""
|
| 306 |
+
else:
|
| 307 |
+
content_html = """
|
| 308 |
+
<div class="empty-state">
|
| 309 |
+
<p class="empty-text" style="color: #555555 !important;">// Awaiting audio input...</p>
|
| 310 |
+
<p class="empty-text" style="color: #555555 !important;">// Click the microphone to start.</p>
|
| 311 |
+
</div>
|
| 312 |
+
"""
|
| 313 |
+
|
| 314 |
+
# Use base64 image if available
|
| 315 |
+
if VOXTRAL_ICON_B64:
|
| 316 |
+
icon_html = f'<img src="data:image/png;base64,{VOXTRAL_ICON_B64}" alt="Voxtral" class="voxtral-icon" />'
|
| 317 |
+
else:
|
| 318 |
+
icon_html = '<span style="font-size:20px;">🎙️</span>'
|
| 319 |
+
|
| 320 |
+
return f"""
|
| 321 |
+
<div class="transcription-card">
|
| 322 |
+
<div class="card-header">
|
| 323 |
+
<div class="card-header-left">
|
| 324 |
+
{icon_html}
|
| 325 |
+
<span class="card-title" style="color: #1E1E1E !important;">Transcription Output</span>
|
| 326 |
+
</div>
|
| 327 |
+
<div class="card-header-right">
|
| 328 |
+
{wpm_badge}
|
| 329 |
+
{status_badge}
|
| 330 |
+
</div>
|
| 331 |
+
</div>
|
| 332 |
+
<div class="card-content">
|
| 333 |
+
{content_html}
|
| 334 |
+
</div>
|
| 335 |
+
<div class="card-footer">
|
| 336 |
+
<span style="color: #555555 !important;">Voxtral Mini</span>
|
| 337 |
+
<span style="color: #555555 !important;">Real-time Audio Transcription</span>
|
| 338 |
+
</div>
|
| 339 |
+
</div>
|
| 340 |
+
"""
|
| 341 |
+
|
| 342 |
+
|
| 343 |
+
def calculate_wpm(session):
|
| 344 |
+
"""Calculate words per minute based on running mean of last WPM_WINDOW seconds."""
|
| 345 |
+
if session.session_start_time is not None:
|
| 346 |
+
elapsed = time.time() - session.session_start_time
|
| 347 |
+
if elapsed < CALIBRATION_PERIOD:
|
| 348 |
+
return "Calibrating..."
|
| 349 |
+
|
| 350 |
+
if len(session.word_timestamps) < 2:
|
| 351 |
+
return "0.0 WPM"
|
| 352 |
+
|
| 353 |
+
current_time = time.time()
|
| 354 |
+
cutoff_time = current_time - WPM_WINDOW
|
| 355 |
+
session.word_timestamps = [ts for ts in session.word_timestamps if ts >= cutoff_time]
|
| 356 |
+
|
| 357 |
+
if len(session.word_timestamps) < 2:
|
| 358 |
+
return "0.0 WPM"
|
| 359 |
+
|
| 360 |
+
time_span = current_time - session.word_timestamps[0]
|
| 361 |
+
if time_span == 0:
|
| 362 |
+
return "0.0 WPM"
|
| 363 |
+
|
| 364 |
+
word_count = len(session.word_timestamps)
|
| 365 |
+
wpm = (word_count / time_span) * 60
|
| 366 |
+
return f"{round(wpm, 1)} WPM"
|
| 367 |
+
|
| 368 |
+
|
| 369 |
+
async def audio_stream_from_queue(session) -> AsyncIterator[bytes]:
|
| 370 |
+
"""Async generator that yields audio bytes from the session queue."""
|
| 371 |
+
# First, send silence for warmup
|
| 372 |
+
session.status_message = "warming"
|
| 373 |
+
num_samples = int(SAMPLE_RATE * WARMUP_DURATION)
|
| 374 |
+
silence = np.zeros(num_samples, dtype=np.int16)
|
| 375 |
+
chunk_size = int(SAMPLE_RATE * 0.1) # 100ms chunks
|
| 376 |
+
|
| 377 |
+
for i in range(0, num_samples, chunk_size):
|
| 378 |
+
if not session.is_running:
|
| 379 |
+
return
|
| 380 |
+
chunk = silence[i:i + chunk_size]
|
| 381 |
+
yield chunk.tobytes()
|
| 382 |
+
await asyncio.sleep(0.05)
|
| 383 |
+
|
| 384 |
+
session.status_message = "listening"
|
| 385 |
+
|
| 386 |
+
# Then stream real audio from the queue
|
| 387 |
+
while session.is_running:
|
| 388 |
+
# Check for inactivity timeout
|
| 389 |
+
if session.last_audio_time is not None:
|
| 390 |
+
idle = time.time() - session.last_audio_time
|
| 391 |
+
if idle >= INACTIVITY_TIMEOUT:
|
| 392 |
+
session.is_running = False
|
| 393 |
+
session.status_message = "ready"
|
| 394 |
+
return
|
| 395 |
+
|
| 396 |
+
# Check for session timeout
|
| 397 |
+
if session.session_start_time is not None:
|
| 398 |
+
elapsed = time.time() - session.session_start_time
|
| 399 |
+
if elapsed >= SESSION_TIMEOUT:
|
| 400 |
+
session.is_running = False
|
| 401 |
+
session.status_message = "timeout"
|
| 402 |
+
return
|
| 403 |
+
|
| 404 |
+
# Check if stop was requested
|
| 405 |
+
if session._stop_event and session._stop_event.is_set():
|
| 406 |
+
return
|
| 407 |
+
|
| 408 |
+
# Get audio from queue
|
| 409 |
+
try:
|
| 410 |
+
# The queue contains base64-encoded PCM16 audio
|
| 411 |
+
b64_chunk = session.audio_queue.get_nowait()
|
| 412 |
+
# Decode base64 to raw bytes
|
| 413 |
+
audio_bytes = base64.b64decode(b64_chunk)
|
| 414 |
+
yield audio_bytes
|
| 415 |
+
except queue.Empty:
|
| 416 |
+
# No audio available, yield control briefly
|
| 417 |
+
await asyncio.sleep(0.05)
|
| 418 |
+
continue
|
| 419 |
+
|
| 420 |
+
|
| 421 |
+
async def mistral_transcription_handler(session):
|
| 422 |
+
"""Connect to Mistral realtime API and handle transcription."""
|
| 423 |
+
try:
|
| 424 |
+
if not session.api_key:
|
| 425 |
+
session.status_message = "error"
|
| 426 |
+
print(f"Session {session.session_id[:8]}: No API key provided")
|
| 427 |
+
return
|
| 428 |
+
|
| 429 |
+
# Create Mistral client
|
| 430 |
+
client = Mistral(api_key=session.api_key, server_url=MISTRAL_BASE_URL)
|
| 431 |
+
audio_format = AudioFormat(encoding="pcm_s16le", sample_rate=SAMPLE_RATE)
|
| 432 |
+
|
| 433 |
+
session.status_message = "connecting"
|
| 434 |
+
|
| 435 |
+
# Create the audio stream generator
|
| 436 |
+
audio_stream = audio_stream_from_queue(session)
|
| 437 |
+
|
| 438 |
+
print(f"Session {session.session_id[:8]}: Connecting to Mistral realtime API...")
|
| 439 |
+
|
| 440 |
+
async for event in client.audio.realtime.transcribe_stream(
|
| 441 |
+
audio_stream=audio_stream,
|
| 442 |
+
model=MODEL,
|
| 443 |
+
audio_format=audio_format,
|
| 444 |
+
):
|
| 445 |
+
if not session.is_running:
|
| 446 |
+
break
|
| 447 |
+
|
| 448 |
+
if isinstance(event, RealtimeTranscriptionSessionCreated):
|
| 449 |
+
print(f"Session {session.session_id[:8]}: Connected to Mistral")
|
| 450 |
+
# Status is already set by audio_stream_from_queue
|
| 451 |
+
|
| 452 |
+
elif isinstance(event, TranscriptionStreamTextDelta):
|
| 453 |
+
delta = event.text
|
| 454 |
+
session.transcription_text += delta
|
| 455 |
+
|
| 456 |
+
# Track words for WPM calculation
|
| 457 |
+
words = delta.split()
|
| 458 |
+
for _ in words:
|
| 459 |
+
session.word_timestamps.append(time.time())
|
| 460 |
+
|
| 461 |
+
session.current_wpm = calculate_wpm(session)
|
| 462 |
+
|
| 463 |
+
elif isinstance(event, TranscriptionStreamDone):
|
| 464 |
+
print(f"Session {session.session_id[:8]}: Transcription done")
|
| 465 |
+
break
|
| 466 |
+
|
| 467 |
+
elif isinstance(event, RealtimeTranscriptionError):
|
| 468 |
+
print(f"Session {session.session_id[:8]}: Error - {event.error}")
|
| 469 |
+
session.status_message = "error"
|
| 470 |
+
break
|
| 471 |
+
|
| 472 |
+
elif isinstance(event, UnknownRealtimeEvent):
|
| 473 |
+
continue # Ignore unknown events
|
| 474 |
+
|
| 475 |
+
except asyncio.CancelledError:
|
| 476 |
+
pass # Normal cancellation
|
| 477 |
+
except Exception as e:
|
| 478 |
+
error_msg = str(e) if str(e) else type(e).__name__
|
| 479 |
+
if "ConnectionReset" not in error_msg and "CancelledError" not in error_msg:
|
| 480 |
+
print(f"Session {session.session_id[:8]}: Mistral API error - {error_msg}")
|
| 481 |
+
session.status_message = "error"
|
| 482 |
+
finally:
|
| 483 |
+
session.is_running = False
|
| 484 |
+
|
| 485 |
+
# Only remove and log if not already handled by stop_session
|
| 486 |
+
if not session._stopped_by_user:
|
| 487 |
+
with _sessions_lock:
|
| 488 |
+
removed = _active_sessions.pop(session.session_id, None)
|
| 489 |
+
active_count = len(_active_sessions)
|
| 490 |
+
if removed:
|
| 491 |
+
print(f"Session {session.session_id[:8]} ended. Active sessions: {active_count}")
|
| 492 |
+
|
| 493 |
+
|
| 494 |
+
def start_transcription(session):
|
| 495 |
+
"""Start Mistral transcription using the shared event loop."""
|
| 496 |
+
session.is_running = True
|
| 497 |
+
session._stop_event = asyncio.Event()
|
| 498 |
+
|
| 499 |
+
# Register this session
|
| 500 |
+
with _sessions_lock:
|
| 501 |
+
_active_sessions[session.session_id] = session
|
| 502 |
+
active_count = len(_active_sessions)
|
| 503 |
+
|
| 504 |
+
print(f"Starting session {session.session_id[:8]}. Active sessions: {active_count}")
|
| 505 |
+
|
| 506 |
+
# Submit to the shared event loop
|
| 507 |
+
loop = get_event_loop()
|
| 508 |
+
future = asyncio.run_coroutine_threadsafe(mistral_transcription_handler(session), loop)
|
| 509 |
+
session._task = future
|
| 510 |
+
|
| 511 |
+
# Don't block - the coroutine runs in the background
|
| 512 |
+
# Cleanup happens in mistral_transcription_handler's finally block
|
| 513 |
+
|
| 514 |
+
|
| 515 |
+
def ensure_session(session_id):
|
| 516 |
+
"""Get or create a valid UserSession from a session_id."""
|
| 517 |
+
# Handle various invalid inputs
|
| 518 |
+
if session_id is None or callable(session_id):
|
| 519 |
+
session = get_or_create_session()
|
| 520 |
+
return session
|
| 521 |
+
|
| 522 |
+
# If it's already a UserSession object (legacy), return it
|
| 523 |
+
if isinstance(session_id, UserSession):
|
| 524 |
+
return session_id
|
| 525 |
+
|
| 526 |
+
# Otherwise treat it as a session_id string
|
| 527 |
+
session = get_or_create_session(str(session_id))
|
| 528 |
+
|
| 529 |
+
# Defensive check - this should never happen but helps debug
|
| 530 |
+
if not isinstance(session, UserSession):
|
| 531 |
+
print(f"WARNING: ensure_session returned non-UserSession: {type(session)}")
|
| 532 |
+
return get_or_create_session()
|
| 533 |
+
|
| 534 |
+
return session
|
| 535 |
+
|
| 536 |
+
|
| 537 |
+
def auto_start_recording(session):
|
| 538 |
+
"""Automatically start the transcription service when audio begins."""
|
| 539 |
+
# Protect against startup races: Gradio can call `process_audio` concurrently.
|
| 540 |
+
with session._start_lock:
|
| 541 |
+
if session.is_running:
|
| 542 |
+
return get_transcription_html(session.transcription_text, session.status_message, session.current_wpm)
|
| 543 |
+
|
| 544 |
+
# Check if API key is set
|
| 545 |
+
if not session.api_key:
|
| 546 |
+
session.status_message = "error"
|
| 547 |
+
return get_transcription_html("Please enter the correct App Password above to start.", "error", "")
|
| 548 |
+
|
| 549 |
+
# Check if we've hit max concurrent sessions - kill all if so
|
| 550 |
+
with _sessions_lock:
|
| 551 |
+
active_at_capacity = len(_active_sessions) >= MAX_CONCURRENT_SESSIONS
|
| 552 |
+
with _registry_lock:
|
| 553 |
+
registry_over = len(_session_registry) > MAX_CONCURRENT_SESSIONS
|
| 554 |
+
|
| 555 |
+
if active_at_capacity or registry_over:
|
| 556 |
+
kill_all_sessions()
|
| 557 |
+
session.status_message = "error"
|
| 558 |
+
return get_transcription_html("Server reset due to capacity. Please click the microphone to restart.", "error", "")
|
| 559 |
+
|
| 560 |
+
session.transcription_text = ""
|
| 561 |
+
session.word_timestamps = []
|
| 562 |
+
session.current_wpm = "Calibrating..."
|
| 563 |
+
session.session_start_time = time.time()
|
| 564 |
+
session.last_audio_time = time.time()
|
| 565 |
+
session.status_message = "connecting"
|
| 566 |
+
|
| 567 |
+
# Start Mistral transcription (now non-blocking, uses shared event loop)
|
| 568 |
+
start_transcription(session)
|
| 569 |
+
|
| 570 |
+
return get_transcription_html(session.transcription_text, session.status_message, session.current_wpm)
|
| 571 |
+
|
| 572 |
+
|
| 573 |
+
def stop_session(session_id, api_key=None):
|
| 574 |
+
"""Stop the transcription and invalidate the session.
|
| 575 |
+
|
| 576 |
+
Returns None for session_id so a fresh session is created on next recording.
|
| 577 |
+
This prevents duplicate session issues when users stop and restart quickly.
|
| 578 |
+
"""
|
| 579 |
+
session = ensure_session(session_id)
|
| 580 |
+
old_transcript = session.transcription_text
|
| 581 |
+
old_wpm = session.current_wpm
|
| 582 |
+
|
| 583 |
+
if session.is_running:
|
| 584 |
+
session.is_running = False
|
| 585 |
+
session.last_audio_time = None
|
| 586 |
+
session._stopped_by_user = True # Mark as user-stopped to avoid duplicate logging
|
| 587 |
+
|
| 588 |
+
# Signal the stop event to terminate the audio stream
|
| 589 |
+
if session._stop_event is not None:
|
| 590 |
+
loop = get_event_loop()
|
| 591 |
+
try:
|
| 592 |
+
asyncio.run_coroutine_threadsafe(
|
| 593 |
+
_set_stop_event(session._stop_event), loop
|
| 594 |
+
)
|
| 595 |
+
except Exception:
|
| 596 |
+
pass
|
| 597 |
+
session._stop_event = None
|
| 598 |
+
|
| 599 |
+
# Cancel the running task if any
|
| 600 |
+
if session._task is not None:
|
| 601 |
+
session._task.cancel()
|
| 602 |
+
session._task = None
|
| 603 |
+
|
| 604 |
+
# Remove from active sessions
|
| 605 |
+
with _sessions_lock:
|
| 606 |
+
_active_sessions.pop(session.session_id, None)
|
| 607 |
+
active_count = len(_active_sessions)
|
| 608 |
+
|
| 609 |
+
print(f"Mic stopped - session {session.session_id[:8]} ended. Active sessions: {active_count}")
|
| 610 |
+
|
| 611 |
+
# Remove from registry - the session is done
|
| 612 |
+
cleanup_session(session.session_id)
|
| 613 |
+
|
| 614 |
+
# Return None for session_id - a fresh session will be created on next recording
|
| 615 |
+
# This ensures no duplicate sessions when users stop/start quickly
|
| 616 |
+
return get_transcription_html(old_transcript, "ready", old_wpm), None
|
| 617 |
+
|
| 618 |
+
|
| 619 |
+
async def _set_stop_event(event):
|
| 620 |
+
"""Helper to set asyncio event from sync context."""
|
| 621 |
+
event.set()
|
| 622 |
+
|
| 623 |
+
|
| 624 |
+
def clear_history(session_id, api_key=None):
|
| 625 |
+
"""Stop the transcription and clear all history."""
|
| 626 |
+
session = ensure_session(session_id)
|
| 627 |
+
session.is_running = False
|
| 628 |
+
session.last_audio_time = None
|
| 629 |
+
session._stopped_by_user = True # Mark as user-stopped
|
| 630 |
+
|
| 631 |
+
# Signal the stop event
|
| 632 |
+
if session._stop_event is not None:
|
| 633 |
+
loop = get_event_loop()
|
| 634 |
+
try:
|
| 635 |
+
asyncio.run_coroutine_threadsafe(
|
| 636 |
+
_set_stop_event(session._stop_event), loop
|
| 637 |
+
)
|
| 638 |
+
except Exception:
|
| 639 |
+
pass
|
| 640 |
+
session._stop_event = None
|
| 641 |
+
|
| 642 |
+
# Cancel the running task if any
|
| 643 |
+
if session._task is not None:
|
| 644 |
+
session._task.cancel()
|
| 645 |
+
session._task = None
|
| 646 |
+
|
| 647 |
+
# Remove from active sessions
|
| 648 |
+
with _sessions_lock:
|
| 649 |
+
_active_sessions.pop(session.session_id, None)
|
| 650 |
+
|
| 651 |
+
# Reset the queue
|
| 652 |
+
session.reset_queue()
|
| 653 |
+
|
| 654 |
+
session.transcription_text = ""
|
| 655 |
+
session.word_timestamps = []
|
| 656 |
+
session.current_wpm = "Calibrating..."
|
| 657 |
+
session.session_start_time = None
|
| 658 |
+
session.status_message = "ready"
|
| 659 |
+
|
| 660 |
+
# Return the session_id to maintain state
|
| 661 |
+
return get_transcription_html("", "ready", "Calibrating..."), None, session.session_id
|
| 662 |
+
|
| 663 |
+
|
| 664 |
+
def process_audio(audio, session_id, api_key_input):
|
| 665 |
+
"""Process incoming audio and queue for streaming."""
|
| 666 |
+
|
| 667 |
+
# --- PASSWORD VALIDATION ---
|
| 668 |
+
provided_password = api_key_input.strip() if api_key_input else ""
|
| 669 |
+
if provided_password != APP_PASSWORD:
|
| 670 |
+
return get_transcription_html(
|
| 671 |
+
"Invalid App Password. Access Denied.",
|
| 672 |
+
"error",
|
| 673 |
+
""
|
| 674 |
+
), None
|
| 675 |
+
|
| 676 |
+
# If password is correct, use the private key from environment
|
| 677 |
+
actual_api_key = MISTRAL_PRIVATE_KEY
|
| 678 |
+
# ---------------------------
|
| 679 |
+
|
| 680 |
+
# Check capacity - if at or above max, kill ALL sessions to reset
|
| 681 |
+
with _sessions_lock:
|
| 682 |
+
active_count = len(_active_sessions)
|
| 683 |
+
is_active_user = session_id and any(s.session_id == session_id for s in _active_sessions.values())
|
| 684 |
+
|
| 685 |
+
with _registry_lock:
|
| 686 |
+
registry_count = len(_session_registry)
|
| 687 |
+
|
| 688 |
+
# Kill all if:
|
| 689 |
+
# 1. Registry exceeds limit (memory safety)
|
| 690 |
+
# 2. Active sessions exceed limit
|
| 691 |
+
# 3. At active capacity AND new user trying to join
|
| 692 |
+
if registry_count > MAX_CONCURRENT_SESSIONS or active_count > MAX_CONCURRENT_SESSIONS or (active_count >= MAX_CONCURRENT_SESSIONS and not is_active_user):
|
| 693 |
+
kill_all_sessions()
|
| 694 |
+
return get_transcription_html(
|
| 695 |
+
"Server reset due to capacity. Please click the microphone to restart.",
|
| 696 |
+
"error",
|
| 697 |
+
""
|
| 698 |
+
), None
|
| 699 |
+
|
| 700 |
+
# Always ensure we have a valid session first
|
| 701 |
+
try:
|
| 702 |
+
session = ensure_session(session_id)
|
| 703 |
+
# Update API key on the session with the PRIVATE KEY
|
| 704 |
+
session.api_key = actual_api_key
|
| 705 |
+
except Exception as e:
|
| 706 |
+
print(f"Error creating session: {e}")
|
| 707 |
+
# Create a fresh session if ensure_session fails
|
| 708 |
+
session = UserSession(api_key=actual_api_key)
|
| 709 |
+
_session_registry[session.session_id] = session
|
| 710 |
+
|
| 711 |
+
# Cache session_id early in case of later errors
|
| 712 |
+
current_session_id = session.session_id
|
| 713 |
+
|
| 714 |
+
try:
|
| 715 |
+
# Quick return if audio is None
|
| 716 |
+
if audio is None:
|
| 717 |
+
wpm = session.current_wpm if session.is_running else "Calibrating..."
|
| 718 |
+
return get_transcription_html(session.transcription_text, session.status_message, wpm), current_session_id
|
| 719 |
+
|
| 720 |
+
# Update last audio time for inactivity tracking
|
| 721 |
+
session.last_audio_time = time.time()
|
| 722 |
+
|
| 723 |
+
# Auto-start if not running
|
| 724 |
+
if not session.is_running and session.status_message not in ["timeout", "error"]:
|
| 725 |
+
auto_start_recording(session)
|
| 726 |
+
|
| 727 |
+
# Skip processing if session stopped
|
| 728 |
+
if not session.is_running:
|
| 729 |
+
return get_transcription_html(session.transcription_text, session.status_message, session.current_wpm), current_session_id
|
| 730 |
+
|
| 731 |
+
sample_rate, audio_data = audio
|
| 732 |
+
|
| 733 |
+
# Convert to mono if stereo
|
| 734 |
+
if len(audio_data.shape) > 1:
|
| 735 |
+
audio_data = audio_data.mean(axis=1)
|
| 736 |
+
|
| 737 |
+
# Normalize to float
|
| 738 |
+
if audio_data.dtype == np.int16:
|
| 739 |
+
audio_float = audio_data.astype(np.float32) / 32767.0
|
| 740 |
+
else:
|
| 741 |
+
audio_float = audio_data.astype(np.float32)
|
| 742 |
+
|
| 743 |
+
# Resample to 16kHz if needed
|
| 744 |
+
if sample_rate != SAMPLE_RATE:
|
| 745 |
+
num_samples = int(len(audio_float) * SAMPLE_RATE / sample_rate)
|
| 746 |
+
audio_float = np.interp(
|
| 747 |
+
np.linspace(0, len(audio_float) - 1, num_samples),
|
| 748 |
+
np.arange(len(audio_float)),
|
| 749 |
+
audio_float,
|
| 750 |
+
)
|
| 751 |
+
|
| 752 |
+
# Convert to PCM16 and base64 encode
|
| 753 |
+
pcm16 = (audio_float * 32767).astype(np.int16)
|
| 754 |
+
b64_chunk = base64.b64encode(pcm16.tobytes()).decode("utf-8")
|
| 755 |
+
|
| 756 |
+
# Put directly into thread-safe queue (no event loop needed)
|
| 757 |
+
try:
|
| 758 |
+
session.audio_queue.put_nowait(b64_chunk)
|
| 759 |
+
except Exception:
|
| 760 |
+
pass # Skip if queue is full
|
| 761 |
+
|
| 762 |
+
return get_transcription_html(session.transcription_text, session.status_message, session.current_wpm), current_session_id
|
| 763 |
+
except Exception as e:
|
| 764 |
+
print(f"Error processing audio: {e}")
|
| 765 |
+
# Return safe defaults - always include session_id to maintain state
|
| 766 |
+
return get_transcription_html("", "error", ""), current_session_id
|
| 767 |
+
|
| 768 |
+
|
| 769 |
+
# Gradio interface
|
| 770 |
+
with gr.Blocks(title="Voxtral Real-time Transcription") as demo:
|
| 771 |
+
# Store just the session_id string - much more reliable than complex objects
|
| 772 |
+
session_state = gr.State(value=None)
|
| 773 |
+
|
| 774 |
+
# Header
|
| 775 |
+
gr.HTML(get_header_html())
|
| 776 |
+
|
| 777 |
+
# Password input (Replaces API Key input)
|
| 778 |
+
with gr.Row():
|
| 779 |
+
api_key_input = gr.Textbox(
|
| 780 |
+
label="App Password",
|
| 781 |
+
placeholder="Enter the password to access this app...",
|
| 782 |
+
type="password",
|
| 783 |
+
elem_id="api-key-input",
|
| 784 |
+
info="This app is private. Please enter the authorized password."
|
| 785 |
+
)
|
| 786 |
+
|
| 787 |
+
# Transcription output
|
| 788 |
+
transcription_display = gr.HTML(
|
| 789 |
+
value=get_transcription_html("", "ready", "Calibrating..."),
|
| 790 |
+
elem_id="transcription-output"
|
| 791 |
+
)
|
| 792 |
+
|
| 793 |
+
# Audio input
|
| 794 |
+
audio_input = gr.Audio(
|
| 795 |
+
sources=["microphone"],
|
| 796 |
+
streaming=True,
|
| 797 |
+
type="numpy",
|
| 798 |
+
format="wav",
|
| 799 |
+
elem_id="audio-input",
|
| 800 |
+
label="Microphone Input"
|
| 801 |
+
)
|
| 802 |
+
|
| 803 |
+
# Clear button
|
| 804 |
+
clear_btn = gr.Button(
|
| 805 |
+
"Clear History",
|
| 806 |
+
elem_classes=["clear-btn"]
|
| 807 |
+
)
|
| 808 |
+
|
| 809 |
+
# Info text
|
| 810 |
+
gr.HTML('<p class="info-text">To start again - click on Clear History AND refresh your website.</p>')
|
| 811 |
+
|
| 812 |
+
# Event handlers
|
| 813 |
+
clear_btn.click(
|
| 814 |
+
clear_history,
|
| 815 |
+
inputs=[session_state, api_key_input],
|
| 816 |
+
outputs=[transcription_display, audio_input, session_state]
|
| 817 |
+
)
|
| 818 |
+
|
| 819 |
+
|
| 820 |
+
audio_input.stop_recording(
|
| 821 |
+
stop_session,
|
| 822 |
+
inputs=[session_state, api_key_input],
|
| 823 |
+
outputs=[transcription_display, session_state]
|
| 824 |
+
)
|
| 825 |
+
|
| 826 |
+
audio_input.stream(
|
| 827 |
+
process_audio,
|
| 828 |
+
inputs=[audio_input, session_state, api_key_input],
|
| 829 |
+
outputs=[transcription_display, session_state],
|
| 830 |
+
show_progress="hidden",
|
| 831 |
+
concurrency_limit=500,
|
| 832 |
+
)
|
| 833 |
+
|
| 834 |
+
get_event_loop()
|
| 835 |
+
|
| 836 |
+
demo.queue(default_concurrency_limit=200)
|
| 837 |
+
demo.launch(css=CUSTOM_CSS, theme=gr.themes.Base(), ssr_mode=False, max_threads=200)
|