mgbam commited on
Commit
0e94c69
·
verified ·
1 Parent(s): 4425127

Update stt_handler.pyc

Browse files
Files changed (1) hide show
  1. stt_handler.pyc +44 -21
stt_handler.pyc CHANGED
@@ -2,34 +2,57 @@
2
  import whisper
3
  import numpy as np
4
  import asyncio
5
- import os
6
- from io import BytesIO
7
 
8
- # Load the model once when the module is imported
 
9
  print("Loading Whisper model...")
10
- model = whisper.load_model("base.en")
11
- print("Whisper model loaded.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  async def transcribe_audio_chunk(audio_chunk: np.ndarray) -> str:
14
  """
15
- Transcribes an audio chunk using Whisper.
16
- Runs the blocking whisper call in a separate thread.
17
  """
18
- # The audio data is 16-bit PCM, 8000 Hz. Whisper expects float32.
19
- audio_float32 = audio_chunk.astype(np.float32) / 32768.0
20
 
21
- # Using an in-memory buffer
22
- wav_buffer = BytesIO()
23
-
24
- # We must provide the sample rate to whisper's transcribe function
25
  loop = asyncio.get_event_loop()
26
- result = await loop.run_in_executor(
27
- None, # Use the default executor (a ThreadPoolExecutor)
28
- lambda: model.transcribe(
29
- audio_float32,
30
- language="en",
31
- fp16=False # Set to False if not using a GPU
32
- )
33
  )
34
 
35
- return result.get("text", "").strip()
 
2
  import whisper
3
  import numpy as np
4
  import asyncio
5
+ from concurrent.futures import ThreadPoolExecutor
 
6
 
7
+ # --- Model Loading ---
8
+ # This is a CPU/memory intensive operation, so it's done once when the server starts.
9
  print("Loading Whisper model...")
10
+ try:
11
+ # Use a smaller model for faster loading and lower resource usage, ideal for real-time.
12
+ # 'base.en' is a good starting point.
13
+ model = whisper.load_model("base.en")
14
+ print("Whisper model 'base.en' loaded successfully.")
15
+ except Exception as e:
16
+ print(f"Error loading Whisper model: {e}")
17
+ # Exit if the model can't be loaded, as the app is non-functional without it.
18
+ exit()
19
+ # --- End Model Loading ---
20
+
21
+
22
+ # We use a thread pool to run the blocking Whisper transcription
23
+ # without blocking the main async event loop.
24
+ executor = ThreadPoolExecutor(max_workers=4)
25
+
26
+ def _transcribe(audio_np: np.ndarray):
27
+ """
28
+ Internal synchronous function to run in a separate thread.
29
+ """
30
+ # The audio data is 16-bit PCM. Whisper expects 32-bit float.
31
+ # Normalize the audio from integers to the range [-1.0, 1.0]
32
+ audio_float32 = audio_np.astype(np.float32) / 32768.0
33
+
34
+ result = model.transcribe(
35
+ audio_float32,
36
+ language="en",
37
+ fp16=False # Set to False for CPU-based inference
38
+ )
39
+ return result.get("text", "").strip()
40
+
41
 
42
  async def transcribe_audio_chunk(audio_chunk: np.ndarray) -> str:
43
  """
44
+ Transcribes an audio chunk using Whisper in a non-blocking way.
 
45
  """
46
+ if audio_chunk.size == 0:
47
+ return ""
48
 
 
 
 
 
49
  loop = asyncio.get_event_loop()
50
+
51
+ # Run the blocking _transcribe function in the thread pool
52
+ text = await loop.run_in_executor(
53
+ executor,
54
+ _transcribe,
55
+ audio_chunk
 
56
  )
57
 
58
+ return text