Spaces:
Running
Running
| from fastapi import FastAPI, WebSocket | |
| from fastapi.staticfiles import StaticFiles | |
| from fastapi.responses import HTMLResponse | |
| from app.asr_worker import create_recognizer, stream_audio | |
| import json | |
| from starlette.websockets import WebSocketDisconnect | |
| app = FastAPI() | |
| app.mount("/static", StaticFiles(directory="app/static"), name="static") | |
| async def root(): | |
| with open("app/static/index.html") as f: | |
| return HTMLResponse(f.read()) | |
| async def websocket_endpoint(websocket: WebSocket): | |
| print("[DEBUG main] ▶ Attempting to accept WebSocket…") | |
| await websocket.accept() | |
| print("[DEBUG main] ▶ WebSocket.accept() returned → client is connected!") | |
| recognizer = None | |
| stream = None | |
| orig_sr = 48000 # default fallback | |
| try: | |
| while True: | |
| data = await websocket.receive() | |
| kind = data.get("type") | |
| # Handle config messages | |
| if kind not in ("websocket.receive", "websocket.receive_bytes"): | |
| print(f"[DEBUG main] Received control/frame: {data}") | |
| continue | |
| if kind == "websocket.receive" and "text" in data: | |
| raw = data["text"] | |
| try: | |
| config_msg = json.loads(raw) | |
| except Exception as e: | |
| print(f"[ERROR main] JSON parse failed: {e}") | |
| continue | |
| if config_msg.get("type") == "config": | |
| # 1) sample rate | |
| orig_sr = int(config_msg["sampleRate"]) | |
| print(f"[INFO main] Set original sample rate to {orig_sr}") | |
| # 2) model & precision | |
| model_id = config_msg.get("model") | |
| precision = config_msg.get("precision") | |
| print(f"[INFO main] Selected model: {model_id}, precision: {precision}") | |
| # 3) hotwords & boost score | |
| hotwords = config_msg.get("hotwords", []) | |
| hotwords_score = float(config_msg.get("hotwordsScore", 0.0)) | |
| print(f"[INFO main] Hotwords: {hotwords}, score: {hotwords_score}") | |
| # 4) Parse endpoint detection rules | |
| ep1 = float(config_msg.get("epRule1", 2.4)) | |
| ep2 = float(config_msg.get("epRule2", 1.2)) | |
| ep3 = int( config_msg.get("epRule3", 300)) | |
| print(f"[INFO main] Endpoint rules: rule1={ep1}s, rule2={ep2}s, rule3={ep3}ms") | |
| # 5) create recognizer with endpoint settings & biasing | |
| recognizer = create_recognizer( | |
| model_id, | |
| precision, | |
| hotwords=hotwords, | |
| hotwords_score=hotwords_score, | |
| ep_rule1=ep1, | |
| ep_rule2=ep2, | |
| ep_rule3=ep3 | |
| ) | |
| stream = recognizer.create_stream() | |
| print("[INFO main] WebSocket connection accepted; created a streaming context.") | |
| continue | |
| # Don't process audio until after config | |
| if recognizer is None or stream is None: | |
| continue | |
| # If it’s a text payload but with bytes (some FastAPI versions put audio under 'text'!) | |
| if kind == "websocket.receive" and "bytes" in data: | |
| raw_audio = data["bytes"] | |
| # print(f"[INFO main] (text+bytes) Received audio chunk: {len(raw_audio)} bytes") | |
| result, rms = stream_audio(raw_audio, stream, recognizer, orig_sr) | |
| vol_to_send = min(rms, 1.0) | |
| # print(f"[INFO main] Sending → partial='{result[:30]}…', volume={vol_to_send:.4f}") | |
| # 1) send the interim | |
| await websocket.send_json({"partial": result, "volume": vol_to_send}) | |
| # 2) DEBUG: log when endpoint is seen | |
| is_ep = recognizer.is_endpoint(stream) | |
| # print(f"[DEBUG main] is_endpoint={is_ep}") | |
| # 3) if endpoint, emit final and reset | |
| if is_ep: | |
| if result.strip(): | |
| print(f"[DEBUG main] Emitting final: {result!r}") | |
| await websocket.send_json({"final": result}) | |
| recognizer.reset(stream) | |
| continue | |
| elif kind == "websocket.receive_bytes": | |
| raw_audio = data["bytes"] | |
| # print(f"[INFO main] Received audio chunk: {len(raw_audio)} bytes") | |
| # This will also print its own debug info (see asr_worker.py) | |
| result, rms = stream_audio(raw_audio, stream, recognizer, orig_sr) | |
| vol_to_send = min(rms, 1.0) | |
| # print(f"[INFO main] Sending → partial='{result[:30]}…', volume={vol_to_send:.4f}") | |
| await websocket.send_json({ | |
| "partial": result, | |
| "volume": min(rms, 1.0) | |
| }) | |
| # -- INSERT: emit final on endpoint detection -- | |
| if recognizer.is_endpoint(stream): | |
| if result.strip(): | |
| await websocket.send_json({"final": result}) | |
| recognizer.reset(stream) | |
| except Exception as e: | |
| print(f"[ERROR main] Unexpected exception: {e}") | |
| try: | |
| await websocket.close() | |
| except: | |
| pass | |
| print("[INFO main] WebSocket closed, cleanup complete.") | |