Spaces:
Sleeping
Sleeping
File size: 1,542 Bytes
da12a71 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | import numpy as np
from fastapi import FastAPI, WebSocket
from faster_whisper import WhisperModel
import uvicorn
"""The core of the real-time processing. It receives the audio stream sent by clients,
segments it and transmits it to a speech recognition model (STT)."""
app = FastAPI()
# --- Charger le modèle UNE seule fois ---
model = WhisperModel("small", compute_type="int8")
SAMPLE_RATE = 16000
CHUNK_SECONDS = 1.0
BUFFER_SIZE = int(SAMPLE_RATE * CHUNK_SECONDS)
@app.websocket("/ws/transcribe")
async def websocket_transcribe(ws: WebSocket):
await ws.accept()
print("Client connected")
audio_buffer = np.array([], dtype=np.float32)
try:
while True:
try:
data = await ws.receive_bytes()
except:
print("Client disconnected")
break
chunk = np.frombuffer(data, np.int16).astype(np.float32) / 32768.0
audio_buffer = np.concatenate((audio_buffer, chunk))
# Transcrire seulement si on a assez
if len(audio_buffer) >= BUFFER_SIZE:
segments, _ = model.transcribe(audio_buffer, language="en")
text = " ".join([seg.text for seg in segments])
await ws.send_text(text)
# Rolling buffer : garder dernier 0.5s pour contexte
audio_buffer = audio_buffer[-int(SAMPLE_RATE * 0.5):]
except Exception as e:
print("WebSocket closed", e)
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000)
|