# rentbot/stt_handler.py
import whisper
import numpy as np
import asyncio
from concurrent.futures import ThreadPoolExecutor

# --- Model Loading ---
# This is a CPU/memory intensive operation, so it's done once when the server starts.
print("Loading Whisper model...")
try:
    # Use a smaller model for faster loading and lower resource usage, ideal for real-time.
    # 'base.en' is a good starting point.
    model = whisper.load_model("base.en")
    print("Whisper model 'base.en' loaded successfully.")
except Exception as e:
    print(f"Error loading Whisper model: {e}")
    # Exit if the model can't be loaded, as the app is non-functional without it.
    exit()
# --- End Model Loading ---


# We use a thread pool to run the blocking Whisper transcription
# without blocking the main async event loop.
executor = ThreadPoolExecutor(max_workers=4) 

def _transcribe(audio_np: np.ndarray):
    """
    Internal synchronous function to run in a separate thread.
    """
    # The audio data is 16-bit PCM. Whisper expects 32-bit float.
    # Normalize the audio from integers to the range [-1.0, 1.0]
    audio_float32 = audio_np.astype(np.float32) / 32768.0

    result = model.transcribe(
        audio_float32,
        language="en",
        fp16=False  # Set to False for CPU-based inference
    )
    return result.get("text", "").strip()


async def transcribe_audio_chunk(audio_chunk: np.ndarray) -> str:
    """
    Transcribes an audio chunk using Whisper in a non-blocking way.
    """
    if audio_chunk.size == 0:
        return ""

    loop = asyncio.get_event_loop()
    
    # Run the blocking _transcribe function in the thread pool
    text = await loop.run_in_executor(
        executor, 
        _transcribe, 
        audio_chunk
    )
    
    return text