| from fastapi import FastAPI, WebSocket |
| from fastapi.responses import HTMLResponse |
| from app.asr import create_recognizer, stream_audio |
| |
| import json |
| from pydantic import BaseModel |
|
|
| app = FastAPI() |
|
|
| |
|
|
| @app.get("/") |
| async def root(): |
| with open("app/index.html") as f: |
| return HTMLResponse(f.read()) |
|
|
|
|
| @app.websocket("/ws") |
| async def websocket_endpoint(websocket: WebSocket): |
| print("[DEBUG main] ▶ Attempting to accept WebSocket…") |
| await websocket.accept() |
| print("[DEBUG main] ▶ WebSocket.accept() returned → client is connected!") |
|
|
| recognizer = None |
| stream = None |
| orig_sr = 48000 |
|
|
| try: |
| while True: |
| data = await websocket.receive() |
| kind = data.get("type") |
|
|
| |
| if kind not in ("websocket.receive", "websocket.receive_bytes"): |
| print(f"[DEBUG main] Received control/frame: {data}") |
| continue |
| if kind == "websocket.receive" and "text" in data: |
| raw = data["text"] |
| try: |
| config_msg = json.loads(raw) |
| except Exception as e: |
| print(f"[ERROR main] JSON parse failed: {e}") |
| continue |
| if config_msg.get("type") == "config": |
| |
| orig_sr = int(config_msg["sampleRate"]) |
| print(f"[INFO main] Set original sample rate to {orig_sr}") |
|
|
| |
| recognizer = create_recognizer() |
| stream = recognizer.create_stream() |
| print("[INFO main] WebSocket connection accepted; created a streaming context.") |
| continue |
|
|
| |
| if recognizer is None or stream is None: |
| continue |
|
|
| |
| if kind == "websocket.receive" and "bytes" in data: |
| raw_audio = data["bytes"] |
| |
| result, rms = stream_audio(raw_audio, stream, recognizer, orig_sr) |
| vol_to_send = min(rms, 1.0) |
| |
| |
| await websocket.send_json({"partial": result, "volume": vol_to_send}) |
|
|
| |
| is_ep = recognizer.is_endpoint(stream) |
| |
|
|
| |
| if is_ep: |
| if result.strip(): |
| print(f"[DEBUG main] Emitting final: {result!r}") |
| await websocket.send_json({"final": result}) |
| recognizer.reset(stream) |
| continue |
|
|
| elif kind == "websocket.receive_bytes": |
| raw_audio = data["bytes"] |
| |
|
|
| |
| result, rms = stream_audio(raw_audio, stream, recognizer, orig_sr) |
|
|
| vol_to_send = min(rms, 1.0) |
| |
|
|
| await websocket.send_json({ |
| "partial": result, |
| "volume": min(rms, 1.0) |
| }) |
| |
| if recognizer.is_endpoint(stream): |
| if result.strip(): |
| await websocket.send_json({"final": result}) |
| recognizer.reset(stream) |
| except Exception as e: |
| print(f"[ERROR main] Unexpected exception: {e}") |
| try: |
| await websocket.close() |
| except: |
| pass |
| print("[INFO main] WebSocket closed, cleanup complete.") |
|
|
|
|
| class TranslationRequest(BaseModel): |
| text: str |
|
|
|
|
| |
| |
| |
| |
|
|