Spaces:
Sleeping
Sleeping
| import numpy as np | |
| from fastapi import FastAPI, WebSocket | |
| from faster_whisper import WhisperModel | |
| import uvicorn | |
| """The core of the real-time processing. It receives the audio stream sent by clients, | |
| segments it and transmits it to a speech recognition model (STT).""" | |
| app = FastAPI() | |
| # --- Charger le modèle UNE seule fois --- | |
| model = WhisperModel("small", compute_type="int8") | |
| SAMPLE_RATE = 16000 | |
| CHUNK_SECONDS = 1.0 | |
| BUFFER_SIZE = int(SAMPLE_RATE * CHUNK_SECONDS) | |
| async def websocket_transcribe(ws: WebSocket): | |
| await ws.accept() | |
| print("Client connected") | |
| audio_buffer = np.array([], dtype=np.float32) | |
| try: | |
| while True: | |
| try: | |
| data = await ws.receive_bytes() | |
| except: | |
| print("Client disconnected") | |
| break | |
| chunk = np.frombuffer(data, np.int16).astype(np.float32) / 32768.0 | |
| audio_buffer = np.concatenate((audio_buffer, chunk)) | |
| # Transcrire seulement si on a assez | |
| if len(audio_buffer) >= BUFFER_SIZE: | |
| segments, _ = model.transcribe(audio_buffer, language="en") | |
| text = " ".join([seg.text for seg in segments]) | |
| await ws.send_text(text) | |
| # Rolling buffer : garder dernier 0.5s pour contexte | |
| audio_buffer = audio_buffer[-int(SAMPLE_RATE * 0.5):] | |
| except Exception as e: | |
| print("WebSocket closed", e) | |
| if __name__ == "__main__": | |
| uvicorn.run(app, host="0.0.0.0", port=8000) | |