Spaces:

mgbam
/

rentbot

Runtime error

App Files Files Community

mgbam commited on Jul 18, 2025

Commit

0e94c69

verified ·

1 Parent(s): 4425127

Update stt_handler.pyc

Browse files

Files changed (1) hide show

stt_handler.pyc +44 -21

stt_handler.pyc CHANGED Viewed

@@ -2,34 +2,57 @@
 import whisper
 import numpy as np
 import asyncio
-import os
-from io import BytesIO
-# Load the model once when the module is imported
 print("Loading Whisper model...")
-model = whisper.load_model("base.en")
-print("Whisper model loaded.")
 async def transcribe_audio_chunk(audio_chunk: np.ndarray) -> str:
     """
-    Transcribes an audio chunk using Whisper.
-    Runs the blocking whisper call in a separate thread.
     """
-    # The audio data is 16-bit PCM, 8000 Hz. Whisper expects float32.
-    audio_float32 = audio_chunk.astype(np.float32) / 32768.0
-    # Using an in-memory buffer
-    wav_buffer = BytesIO()
-    # We must provide the sample rate to whisper's transcribe function
     loop = asyncio.get_event_loop()
-    result = await loop.run_in_executor(
-        None,  # Use the default executor (a ThreadPoolExecutor)
-        lambda: model.transcribe(
-            audio_float32,
-            language="en",
-            fp16=False # Set to False if not using a GPU
-        )
     )
-    return result.get("text", "").strip()

 import whisper
 import numpy as np
 import asyncio
+from concurrent.futures import ThreadPoolExecutor
+# --- Model Loading ---
+# This is a CPU/memory intensive operation, so it's done once when the server starts.
 print("Loading Whisper model...")
+try:
+    # Use a smaller model for faster loading and lower resource usage, ideal for real-time.
+    # 'base.en' is a good starting point.
+    model = whisper.load_model("base.en")
+    print("Whisper model 'base.en' loaded successfully.")
+except Exception as e:
+    print(f"Error loading Whisper model: {e}")
+    # Exit if the model can't be loaded, as the app is non-functional without it.
+    exit()
+# --- End Model Loading ---
+# We use a thread pool to run the blocking Whisper transcription
+# without blocking the main async event loop.
+executor = ThreadPoolExecutor(max_workers=4)
+def _transcribe(audio_np: np.ndarray):
+    """
+    Internal synchronous function to run in a separate thread.
+    """
+    # The audio data is 16-bit PCM. Whisper expects 32-bit float.
+    # Normalize the audio from integers to the range [-1.0, 1.0]
+    audio_float32 = audio_np.astype(np.float32) / 32768.0
+    result = model.transcribe(
+        audio_float32,
+        language="en",
+        fp16=False  # Set to False for CPU-based inference
+    )
+    return result.get("text", "").strip()
 async def transcribe_audio_chunk(audio_chunk: np.ndarray) -> str:
     """
+    Transcribes an audio chunk using Whisper in a non-blocking way.
     """
+    if audio_chunk.size == 0:
+        return ""
     loop = asyncio.get_event_loop()
+    # Run the blocking _transcribe function in the thread pool
+    text = await loop.run_in_executor(
+        executor,
+        _transcribe,
+        audio_chunk
     )
+    return text