import numpy as np from fastapi import FastAPI, WebSocket from faster_whisper import WhisperModel import uvicorn """The core of the real-time processing. It receives the audio stream sent by clients, segments it and transmits it to a speech recognition model (STT).""" app = FastAPI() # --- Charger le modèle UNE seule fois --- model = WhisperModel("small", compute_type="int8") SAMPLE_RATE = 16000 CHUNK_SECONDS = 1.0 BUFFER_SIZE = int(SAMPLE_RATE * CHUNK_SECONDS) @app.websocket("/ws/transcribe") async def websocket_transcribe(ws: WebSocket): await ws.accept() print("Client connected") audio_buffer = np.array([], dtype=np.float32) try: while True: try: data = await ws.receive_bytes() except: print("Client disconnected") break chunk = np.frombuffer(data, np.int16).astype(np.float32) / 32768.0 audio_buffer = np.concatenate((audio_buffer, chunk)) # Transcrire seulement si on a assez if len(audio_buffer) >= BUFFER_SIZE: segments, _ = model.transcribe(audio_buffer, language="en") text = " ".join([seg.text for seg in segments]) await ws.send_text(text) # Rolling buffer : garder dernier 0.5s pour contexte audio_buffer = audio_buffer[-int(SAMPLE_RATE * 0.5):] except Exception as e: print("WebSocket closed", e) if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=8000)