Spaces:

auralodyssey
/

api

Running

App Files Files Community

auralodyssey commited on Jan 6

Commit

9bca27a

verified ·

1 Parent(s): 667ab5c

Update app.py

Browse files

Files changed (1) hide show

app.py +343 -158

app.py CHANGED Viewed

@@ -295,32 +295,28 @@
 # if __name__ == "__main__":
 #     uvicorn.run(final_app, host="0.0.0.0", port=7860)
 import os
-import re
 import time
 import asyncio
 from concurrent.futures import ThreadPoolExecutor
 import numpy as np
-import torch
 from fastapi import FastAPI, WebSocket, WebSocketDisconnect
 import uvicorn
-from kokoro import KPipeline
-# ----------------------------
-# CPU THREAD CAP (HF free tier is typically 2 vCPU)
-# ----------------------------
 os.environ.setdefault("OMP_NUM_THREADS", "2")
 os.environ.setdefault("MKL_NUM_THREADS", "2")
 os.environ.setdefault("NUMEXPR_NUM_THREADS", "2")
-try:
-    torch.set_num_threads(int(os.environ.get("TORCH_NUM_THREADS", "2")))
-    torch.set_num_interop_threads(int(os.environ.get("TORCH_NUM_INTEROP_THREADS", "1")))
-except Exception:
-    pass
-# Optional uvloop (safe to skip if not installed)
 try:
     import uvloop  # type: ignore
     asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
@@ -329,12 +325,13 @@ except Exception:
 SAMPLE_RATE = 24000
-print("🚀 BOOTING KOKORO API ONLY (OFFICIAL PIPELINE)")
-# ----------------------------
-# VOICES (UI label -> kokoro voice id)
-# Client can send either label or id.
-# ----------------------------
 VOICE_CHOICES = {
     "🇺🇸 🚺 Heart": "af_heart", "🇺🇸 🚺 Bella": "af_bella", "🇺🇸 🚺 Nicole": "af_nicole",
     "🇺🇸 🚺 Aoede": "af_aoede", "🇺🇸 🚺 Kore": "af_kore", "🇺🇸 🚺 Sarah": "af_sarah",
@@ -350,140 +347,330 @@ VOICE_CHOICES = {
 ALLOWED_VOICE_IDS = set(VOICE_CHOICES.values())
 # ✅ DEFAULT VOICE = ONYX
-DEFAULT_VOICE_LABEL = "🇺🇸 🚹 Onyx"
-DEFAULT_VOICE_ID = VOICE_CHOICES[DEFAULT_VOICE_LABEL]
 DEFAULT_SPEED = 1.0
-def voice_to_lang_code(voice_id: str) -> str:
-    if voice_id.startswith("bf_") or voice_id.startswith("bm_"):
-        return "b"  # British
-    return "a"      # American
-# ----------------------------
-# PIPELINES (keep hot in RAM)
-# ----------------------------
-PIPELINES = {
-    "a": KPipeline(lang_code="a"),
-    "b": KPipeline(lang_code="b"),
-}
-# ----------------------------
-# TEXT NORMALIZATION (from your provided docs)
-# ----------------------------
-_SENT_BOUNDARY = re.compile(r"([.!?;:])\s+")
-_MULTI_NL = re.compile(r"\n{3,}")
-_CAMEL = re.compile(r"([a-z])([A-Z])")
-_ALLCAPS = re.compile(r"\b([A-Z]{2,})\b")
-def normalize_text(text: str) -> str:
-    if not text:
-        return ""
-    return text.replace("Kokoro", "[Kokoro](/kˈOkəɹO/)")
-def reduce_name_skips(text: str) -> str:
     if not text:
         return ""
-    text = _ALLCAPS.sub(lambda m: " ".join(list(m.group(1))), text)
-    text = _CAMEL.sub(r"\1 \2", text)
     return text
-def inject_newlines_for_fast_stream(text: str) -> str:
-    text = normalize_text(text).strip()
-    if not text:
-        return ""
-    text = _SENT_BOUNDARY.sub(r"\1\n", text)
-    text = _MULTI_NL.sub("\n\n", text)
-    # Ensure a small first segment for faster first audio
-    if "\n" not in text and len(text) > 90:
-        cut = text.rfind(" ", 0, 70)
-        if cut < 35:
-            cut = 70
-        text = text[:cut].strip() + "\n" + text[cut:].strip()
-    return text
-# ----------------------------
-# AUDIO CONVERSION
-# ----------------------------
-def audio_to_int16_np(audio):
-    if isinstance(audio, torch.Tensor):
-        a = audio.detach().cpu()
-        a = torch.clamp(a, -1.0, 1.0)
-        return (a * 32767.0).to(torch.int16).numpy()
-    a = np.asarray(audio)
-    a = np.clip(a, -1.0, 1.0)
-    return (a * 32767.0).astype(np.int16)
-def audio_to_pcm_bytes(audio) -> bytes:
-    return audio_to_int16_np(audio).tobytes()
-# ----------------------------
-# OFFICIAL GENERATION PATH (single pipeline call per request)
-# ----------------------------
-def kokoro_audio_iter(text: str, voice_id: str, speed: float):
-    lang_code = voice_to_lang_code(voice_id)
-    pipeline = PIPELINES[lang_code]
-    prepared = inject_newlines_for_fast_stream(text)
-    if not prepared:
-        return
-    with torch.inference_mode():
-        gen = pipeline(
-            prepared,
-            voice=voice_id,
-            speed=float(speed),
-            split_pattern=r"\n+",
-        )
-        for _, _, audio in gen:
-            yield audio
-def warmup():
-    try:
-        t0 = time.time()
-        for _ in kokoro_audio_iter("Hello.", DEFAULT_VOICE_ID, 1.0):
-            break
-        print(f"✅ WARMUP DONE in {time.time() - t0:.2f}s")
-    except Exception as e:
-        print(f"⚠️ WARMUP FAILED: {e}")
-# ----------------------------
-# FASTAPI APP (API ONLY)
-# ----------------------------
 api = FastAPI()
 INFERENCE_EXECUTOR = ThreadPoolExecutor(max_workers=1)
-INFERENCE_QUEUE: asyncio.Queue = asyncio.Queue(maxsize=64)
 @api.get("/health")
 async def health():
-    return {"ok": True, "model": "kokoro", "sample_rate": SAMPLE_RATE, "default_voice": DEFAULT_VOICE_ID}
-async def audio_engine_loop():
     print("⚡ API AUDIO PIPELINE STARTED")
     loop = asyncio.get_running_loop()
     while True:
-        ws, voice_id, speed, text = await INFERENCE_QUEUE.get()
         if ws.client_state.value > 1:
             continue
         frame_q: asyncio.Queue = asyncio.Queue(maxsize=8)
-        def _worker():
             try:
                 first = True
-                started = time.time()
-                for audio in kokoro_audio_iter(text, voice_id, speed):
-                    b = audio_to_pcm_bytes(audio)
-                    loop.call_soon_threadsafe(frame_q.put_nowait, b)
                     if first:
                         first = False
-                        dt = time.time() - started
-                        print(f"⚡ first audio ready in {dt:.2f}s")
                 loop.call_soon_threadsafe(frame_q.put_nowait, None)
             except Exception as e:
                 print(f"API Worker Error: {e}")
                 try:
@@ -491,7 +678,10 @@ async def audio_engine_loop():
                 except Exception:
                     pass
-        INFERENCE_EXECUTOR.submit(_worker)
         while True:
             frame = await frame_q.get()
@@ -499,36 +689,23 @@ async def audio_engine_loop():
                 break
             if ws.client_state.value > 1:
                 break
             try:
                 await ws.send_bytes(frame)
             except Exception:
                 break
-@api.on_event("startup")
-async def startup():
-    loop = asyncio.get_running_loop()
-    await loop.run_in_executor(INFERENCE_EXECUTOR, warmup)
-    asyncio.create_task(audio_engine_loop())
-def resolve_voice(value: str) -> str:
-    if not value:
-        return DEFAULT_VOICE_ID
-    if value in VOICE_CHOICES:
-        vid = VOICE_CHOICES[value]
-    else:
-        vid = value.strip()
-    if vid not in ALLOWED_VOICE_IDS:
-        return DEFAULT_VOICE_ID
-    return vid
 @api.websocket("/ws/audio")
 async def websocket_endpoint(ws: WebSocket):
     await ws.accept()
     voice_id = DEFAULT_VOICE_ID  # ✅ default Onyx
     speed = DEFAULT_SPEED
@@ -554,34 +731,42 @@ async def websocket_endpoint(ws: WebSocket):
             except Exception:
                 break
-            is_config = ("config" in data) or (data.get("type") == "config")
-            if is_config:
                 voice_id = resolve_voice(str(data.get("voice", voice_id)))
                 try:
                     speed = float(data.get("speed", speed))
                 except Exception:
                     speed = DEFAULT_SPEED
-            has_text = ("text" in data) or (data.get("type") == "text")
-            if has_text:
-                raw = data.get("text", "")
-                raw = reduce_name_skips(raw)
-                raw = normalize_text(raw)
-                if raw and raw.strip():
-                    try:
-                        INFERENCE_QUEUE.put_nowait((ws, voice_id, speed, raw))
-                    except asyncio.QueueFull:
-                        try:
-                            await ws.send_json({"type": "error", "message": "server_busy"})
-                        except Exception:
-                            pass
-            if "flush" in data or data.get("type") == "flush":
                 try:
-                    await ws.send_json({"type": "flushed"})
-                except Exception:
-                    pass
     finally:
         heartbeat_task.cancel()

 # if __name__ == "__main__":
 #     uvicorn.run(final_app, host="0.0.0.0", port=7860)
 import os
+import json
 import time
+import re
 import asyncio
+import threading
 from concurrent.futures import ThreadPoolExecutor
 import numpy as np
+import onnxruntime as ort
+from huggingface_hub import hf_hub_download
+from misaki import en
+from functools import lru_cache
 from fastapi import FastAPI, WebSocket, WebSocketDisconnect
 import uvicorn
+# =========================================================
+# HF CPU BOX TUNING (2 vCPU)
+# =========================================================
 os.environ.setdefault("OMP_NUM_THREADS", "2")
 os.environ.setdefault("MKL_NUM_THREADS", "2")
 os.environ.setdefault("NUMEXPR_NUM_THREADS", "2")
 try:
     import uvloop  # type: ignore
     asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
 SAMPLE_RATE = 24000
+# =========================================================
+# ONNX KOKORO CONFIG (YOUR ONNX STYLE)
+# =========================================================
+MODEL_REPO = "onnx-community/Kokoro-82M-v1.0-ONNX"
+MODEL_FILE = "onnx/model.onnx"
+TOKENIZER_FILE = "tokenizer.json"
 VOICE_CHOICES = {
     "🇺🇸 🚺 Heart": "af_heart", "🇺🇸 🚺 Bella": "af_bella", "🇺🇸 🚺 Nicole": "af_nicole",
     "🇺🇸 🚺 Aoede": "af_aoede", "🇺🇸 🚺 Kore": "af_kore", "🇺🇸 🚺 Sarah": "af_sarah",
 ALLOWED_VOICE_IDS = set(VOICE_CHOICES.values())
 # ✅ DEFAULT VOICE = ONYX
+DEFAULT_VOICE_ID = "am_onyx"
 DEFAULT_SPEED = 1.0
+print("🚀 BOOTING ONNX KOKORO API (LOW LATENCY, API ONLY)")
+# =========================================================
+# 1) G2P
+# =========================================================
+G2P = en.G2P(trf=False, british=False, fallback=None)
+# =========================================================
+# 2) TOKENIZER
+# =========================================================
+vocab_path = hf_hub_download(repo_id=MODEL_REPO, filename=TOKENIZER_FILE)
+with open(vocab_path, "r", encoding="utf-8") as f:
+    data = json.load(f)
+TOKENIZER = data["model"]["vocab"] if "model" in data else data.get("vocab", {})
+# =========================================================
+# 3) VOICES (LAZY LOAD, CACHE)
+# =========================================================
+VOICE_CACHE = {}  # voice_id -> np.ndarray (T,1,256)
+def _load_voice_bin(voice_id: str) -> np.ndarray:
+    path = hf_hub_download(repo_id=MODEL_REPO, filename=f"voices/{voice_id}.bin")
+    return np.fromfile(path, dtype=np.float32).reshape(-1, 1, 256)
+def get_voice(voice_id_or_label: str) -> np.ndarray:
+    vid = VOICE_CHOICES.get(voice_id_or_label, voice_id_or_label).strip()
+    if vid not in ALLOWED_VOICE_IDS:
+        vid = DEFAULT_VOICE_ID
+    if vid not in VOICE_CACHE:
+        try:
+            print(f"⬇️ Loading Voice: {vid}")
+            VOICE_CACHE[vid] = _load_voice_bin(vid)
+        except Exception:
+            if "af_bella" not in VOICE_CACHE:
+                print("⚠️ Voice load failed, falling back to af_bella")
+                VOICE_CACHE["af_bella"] = _load_voice_bin("af_bella")
+            return VOICE_CACHE["af_bella"]
+    return VOICE_CACHE[vid]
+# =========================================================
+# 4) ONNX SESSION (TUNED FOR 2 vCPU)
+# =========================================================
+model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
+sess_options = ort.SessionOptions()
+sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+sess_options.add_session_config_entry("session.intra_op.allow_spinning", "0")
+# On 2 vCPU, keep it tight
+sess_options.intra_op_num_threads = int(os.environ.get("ORT_INTRA_OP_THREADS", "2"))
+sess_options.inter_op_num_threads = int(os.environ.get("ORT_INTER_OP_THREADS", "1"))
+SESSION = ort.InferenceSession(model_path, sess_options, providers=["CPUExecutionProvider"])
+print("✅ ONNX SESSION READY")
+# =========================================================
+# TEXT QUALITY FIXES (NAMES, ACRONYMS, CAMELCASE)
+# =========================================================
+RE_ALLCAPS = re.compile(r"\b([A-Z]{2,})\b")
+RE_CAMEL = re.compile(r"([a-z])([A-Z])")
+RE_SENT_SPLIT = re.compile(r'([.,!?;:\n]+)')
+def normalize_names(text: str) -> str:
     if not text:
         return ""
+    # AI -> A I
+    text = RE_ALLCAPS.sub(lambda m: " ".join(list(m.group(1))), text)
+    # OpenAI -> Open AI
+    text = RE_CAMEL.sub(r"\1 \2", text)
     return text
+@lru_cache(maxsize=10000)
+def get_tokens_cached(text: str):
+    # Your IPA hint behavior from v1
+    if "Kokoro" in text:
+        text = text.replace("Kokoro", "kˈOkəɹO")
+    phonemes, _ = G2P(text)
+    return tuple(TOKENIZER.get(p, 0) for p in phonemes)
+def tuned_splitter(text: str):
+    # Fast first audio, bigger later chunks
+    parts = RE_SENT_SPLIT.split(text)
+    buf = ""
+    chunk_idx = 0
+    for p in parts:
+        if p is None:
+            continue
+        buf += p
+        if chunk_idx == 0:
+            threshold = 60
+        elif chunk_idx == 1:
+            threshold = 120
+        elif chunk_idx == 2:
+            threshold = 180
+        else:
+            threshold = 280
+        if buf and re.search(r"[.,!?;:\n]$", buf) and len(buf) >= threshold:
+            s = buf.strip()
+            if s:
+                yield s
+                chunk_idx += 1
+            buf = ""
+    s = buf.strip()
+    if s:
+        yield s
+# =========================================================
+# AUDIO POST (LESS AGGRESSIVE TRIM + CROSSFADE TO REMOVE "DROPS")
+# =========================================================
+def trim_leading(audio_f32: np.ndarray, threshold=0.01, pad=80) -> np.ndarray:
+    if audio_f32.size == 0:
+        return audio_f32
+    mask = np.abs(audio_f32) > threshold
+    if not np.any(mask):
+        return audio_f32
+    start = int(np.argmax(mask))
+    start = max(0, start - pad)
+    return audio_f32[start:]
+def trim_trailing(audio_f32: np.ndarray, threshold=0.01, pad=120) -> np.ndarray:
+    if audio_f32.size == 0:
+        return audio_f32
+    mask = np.abs(audio_f32) > threshold
+    if not np.any(mask):
+        return audio_f32
+    end = int(len(mask) - np.argmax(mask[::-1]))
+    end = min(len(audio_f32), end + pad)
+    return audio_f32[:end]
+def float_to_pcm_bytes(audio_f32: np.ndarray) -> bytes:
+    audio_f32 = np.clip(audio_f32, -1.0, 1.0).astype(np.float32)
+    pcm = (audio_f32 * 32767.0).astype(np.int16)
+    return pcm.tobytes()
+def crossfade_bytes_stream(chunks_f32, overlap=1200):
+    """
+    overlap=1200 samples ~= 50ms at 24kHz
+    We hold the last overlap of each chunk, blend into next chunk head,
+    then stream without clicks or "drops".
+    """
+    prev_tail = None
+    for i, a in enumerate(chunks_f32):
+        if a is None or a.size == 0:
+            continue
+        if prev_tail is None:
+            if a.size <= overlap * 2:
+                yield float_to_pcm_bytes(a)
+                prev_tail = None
+                continue
+            body = a[:-overlap]
+            prev_tail = a[-overlap:]
+            yield float_to_pcm_bytes(body)
+            continue
+        if a.size < overlap:
+            # too small, just append
+            blended = np.concatenate([prev_tail, a])
+            prev_tail = None
+            yield float_to_pcm_bytes(blended)
+            continue
+        fade_out = np.linspace(1.0, 0.0, overlap, dtype=np.float32)
+        fade_in = 1.0 - fade_out
+        head = a[:overlap]
+        blended = (prev_tail * fade_out) + (head * fade_in)
+        if a.size <= overlap * 2:
+            # nothing meaningful to hold
+            out = np.concatenate([blended, a[overlap:]])
+            prev_tail = None
+            yield float_to_pcm_bytes(out)
+            continue
+        mid = a[overlap:-overlap]
+        prev_tail = a[-overlap:]
+        out = np.concatenate([blended, mid])
+        yield float_to_pcm_bytes(out)
+    if prev_tail is not None and prev_tail.size > 0:
+        yield float_to_pcm_bytes(prev_tail)
+# =========================================================
+# ONNX INFER (FAST)
+# =========================================================
+def infer_tokens(tokens, voice_vec, speed: float):
+    ids = tokens[:510]
+    if not ids:
+        return None
+    # voice_vec shape: (T,1,256)
+    style = voice_vec[min(len(ids), voice_vec.shape[0] - 1)]  # -> (1,256)
+    audio = SESSION.run(
+        None,
+        {
+            "input_ids": np.array([[0, *ids, 0]], dtype=np.int64),
+            "style": style,
+            "speed": np.array([float(speed)], dtype=np.float32),
+        },
+    )[0]  # expected shape: (1, N)
+    out = audio[0].astype(np.float32, copy=False)
+    return out
+# =========================================================
+# API ONLY (FASTAPI + WS)
+# =========================================================
 api = FastAPI()
+# Single worker thread for full job generation (tokens + onnx + crossfade)
 INFERENCE_EXECUTOR = ThreadPoolExecutor(max_workers=1)
+# Queue of jobs: each job is 1 full text for 1 websocket
+JOB_QUEUE: asyncio.Queue = asyncio.Queue(maxsize=64)
+def resolve_voice(value: str) -> str:
+    if not value:
+        return DEFAULT_VOICE_ID
+    v = VOICE_CHOICES.get(value, value).strip()
+    if v not in ALLOWED_VOICE_IDS:
+        return DEFAULT_VOICE_ID
+    return v
 @api.get("/health")
 async def health():
+    return {
+        "ok": True,
+        "engine": "onnxruntime",
+        "sample_rate": SAMPLE_RATE,
+        "default_voice": DEFAULT_VOICE_ID,
+    }
+def warmup_once():
+    try:
+        get_voice(DEFAULT_VOICE_ID)
+        tokens = get_tokens_cached("Hello.")  # cached tuple
+        _ = infer_tokens(tokens, VOICE_CACHE[DEFAULT_VOICE_ID], 1.0)
+        print("✅ WARMUP OK")
+    except Exception as e:
+        print(f"⚠️ WARMUP FAILED: {e}")
+@api.on_event("startup")
+async def startup():
+    loop = asyncio.get_running_loop()
+    await loop.run_in_executor(INFERENCE_EXECUTOR, warmup_once)
+    asyncio.create_task(engine_loop())
+async def engine_loop():
     print("⚡ API AUDIO PIPELINE STARTED")
     loop = asyncio.get_running_loop()
     while True:
+        ws, voice_id, speed, text = await JOB_QUEUE.get()
         if ws.client_state.value > 1:
             continue
+        # This queue carries PCM frames from the worker thread back to asyncio
         frame_q: asyncio.Queue = asyncio.Queue(maxsize=8)
+        stop_flag = threading.Event()
+        def _worker_full_job():
             try:
+                t0 = time.time()
+                voice_vec = get_voice(voice_id)
+                # Build per-chunk float32 audio list, with light leading trim
+                audio_chunks = []
                 first = True
+                for chunk in tuned_splitter(text):
+                    if stop_flag.is_set():
+                        break
+                    # tokenize (cached)
+                    tokens = get_tokens_cached(chunk)
+                    if not tokens:
+                        continue
+                    a = infer_tokens(tokens, voice_vec, speed)
+                    if a is None or a.size == 0:
+                        continue
+                    # do NOT aggressively trim every chunk, only leading a bit
                     if first:
+                        a = trim_leading(a, threshold=0.01, pad=120)
                         first = False
+                    else:
+                        a = trim_leading(a, threshold=0.01, pad=60)
+                    audio_chunks.append(a)
+                    # Push first audio as soon as we have it, no waiting for the full list
+                    if len(audio_chunks) == 1:
+                        for frame in crossfade_bytes_stream(audio_chunks, overlap=1200):
+                            loop.call_soon_threadsafe(frame_q.put_nowait, frame)
+                        audio_chunks.clear()
+                # Flush remaining with crossfade
+                if not stop_flag.is_set():
+                    if audio_chunks:
+                        # trim trailing only at the very end to avoid cutting words mid stream
+                        audio_chunks[-1] = trim_trailing(audio_chunks[-1], threshold=0.01, pad=160)
+                        for frame in crossfade_bytes_stream(audio_chunks, overlap=1200):
+                            loop.call_soon_threadsafe(frame_q.put_nowait, frame)
                 loop.call_soon_threadsafe(frame_q.put_nowait, None)
+                dt = time.time() - t0
+                print(f"✅ job done in {dt:.2f}s")
             except Exception as e:
                 print(f"API Worker Error: {e}")
                 try:
                 except Exception:
                     pass
+        INFERENCE_EXECUTOR.submit(_worker_full_job)
+        first_sent = False
+        started = time.time()
         while True:
             frame = await frame_q.get()
                 break
             if ws.client_state.value > 1:
+                stop_flag.set()
                 break
             try:
                 await ws.send_bytes(frame)
+                if not first_sent:
+                    first_sent = True
+                    print(f"⚡ first audio sent in {time.time() - started:.2f}s")
             except Exception:
+                stop_flag.set()
                 break
 @api.websocket("/ws/audio")
 async def websocket_endpoint(ws: WebSocket):
     await ws.accept()
+    # per-connection state
     voice_id = DEFAULT_VOICE_ID  # ✅ default Onyx
     speed = DEFAULT_SPEED
             except Exception:
                 break
+            # client config
+            if "config" in data or data.get("type") == "config":
                 voice_id = resolve_voice(str(data.get("voice", voice_id)))
                 try:
                     speed = float(data.get("speed", speed))
                 except Exception:
                     speed = DEFAULT_SPEED
+                # preload voice immediately so the next text has no voice load delay
+                try:
+                    get_voice(voice_id)
+                except Exception:
+                    voice_id = DEFAULT_VOICE_ID
+                    get_voice(voice_id)
+            # client text
+            if "text" in data or data.get("type") == "text":
+                raw = str(data.get("text", ""))
+                raw = raw.strip()
+                if not raw:
+                    continue
+                # name + acronym fix so it stops skipping brands and people names
+                raw = normalize_names(raw)
+                # hard cap to prevent one user blocking the box forever
+                if len(raw) > 6000:
+                    await ws.send_json({"type": "error", "message": "text_too_long", "max_chars": 6000})
+                    continue
                 try:
+                    JOB_QUEUE.put_nowait((ws, voice_id, speed, raw))
+                except asyncio.QueueFull:
+                    await ws.send_json({"type": "error", "message": "server_busy"})
+            if "flush" in data or data.get("type") == "flush":
+                await ws.send_json({"type": "flushed"})
     finally:
         heartbeat_task.cancel()