Spaces:

mgbam
/

rentbot

Runtime error

App Files Files Community

rentbot / stt_handler.py

mgbam

Rename stt_handler.pyc to stt_handler.py

4587acc verified 7 months ago

raw

history blame contribute delete

1.78 kB

	# rentbot/stt_handler.py
	import whisper
	import numpy as np
	import asyncio
	from concurrent.futures import ThreadPoolExecutor

	# --- Model Loading ---
	# This is a CPU/memory intensive operation, so it's done once when the server starts.
	print("Loading Whisper model...")
	try:
	# Use a smaller model for faster loading and lower resource usage, ideal for real-time.
	# 'base.en' is a good starting point.
	model = whisper.load_model("base.en")
	print("Whisper model 'base.en' loaded successfully.")
	except Exception as e:
	print(f"Error loading Whisper model: {e}")
	# Exit if the model can't be loaded, as the app is non-functional without it.
	exit()
	# --- End Model Loading ---


	# We use a thread pool to run the blocking Whisper transcription
	# without blocking the main async event loop.
	executor = ThreadPoolExecutor(max_workers=4)

	def _transcribe(audio_np: np.ndarray):
	"""
	Internal synchronous function to run in a separate thread.
	"""
	# The audio data is 16-bit PCM. Whisper expects 32-bit float.
	# Normalize the audio from integers to the range [-1.0, 1.0]
	audio_float32 = audio_np.astype(np.float32) / 32768.0

	result = model.transcribe(
	audio_float32,
	language="en",
	fp16=False # Set to False for CPU-based inference
	)
	return result.get("text", "").strip()


	async def transcribe_audio_chunk(audio_chunk: np.ndarray) -> str:
	"""
	Transcribes an audio chunk using Whisper in a non-blocking way.
	"""
	if audio_chunk.size == 0:
	return ""

	loop = asyncio.get_event_loop()

	# Run the blocking _transcribe function in the thread pool
	text = await loop.run_in_executor(
	executor,
	_transcribe,
	audio_chunk
	)

	return text