Relay / server.py
FredyHoundayi's picture
Initial commit: Whisper WebSocket API for Hugging Face
da12a71
import numpy as np
from fastapi import FastAPI, WebSocket
from faster_whisper import WhisperModel
import uvicorn
"""The core of the real-time processing. It receives the audio stream sent by clients,
segments it and transmits it to a speech recognition model (STT)."""
app = FastAPI()
# --- Charger le modèle UNE seule fois ---
model = WhisperModel("small", compute_type="int8")
SAMPLE_RATE = 16000
CHUNK_SECONDS = 1.0
BUFFER_SIZE = int(SAMPLE_RATE * CHUNK_SECONDS)
@app.websocket("/ws/transcribe")
async def websocket_transcribe(ws: WebSocket):
await ws.accept()
print("Client connected")
audio_buffer = np.array([], dtype=np.float32)
try:
while True:
try:
data = await ws.receive_bytes()
except:
print("Client disconnected")
break
chunk = np.frombuffer(data, np.int16).astype(np.float32) / 32768.0
audio_buffer = np.concatenate((audio_buffer, chunk))
# Transcrire seulement si on a assez
if len(audio_buffer) >= BUFFER_SIZE:
segments, _ = model.transcribe(audio_buffer, language="en")
text = " ".join([seg.text for seg in segments])
await ws.send_text(text)
# Rolling buffer : garder dernier 0.5s pour contexte
audio_buffer = audio_buffer[-int(SAMPLE_RATE * 0.5):]
except Exception as e:
print("WebSocket closed", e)
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000)