Spaces:

thecollabagepatch
/

magenta-retry

Running

App Files Files Community

thecollabagepatch commited on Jan 24

Commit

989061c

1 Parent(s): 1c7440e

updating web tester and simplifying websockets route

Browse files

Files changed (2) hide show

app.py +196 -326
magentaRT_rt_tester.html +31 -79

app.py CHANGED Viewed

@@ -1497,19 +1497,24 @@ async def log_requests(request: Request, call_next):
 # ----------------------------
-# websockets route
 # ----------------------------
 @app.websocket("/ws/jam")
 async def ws_jam(websocket: WebSocket):
     await websocket.accept()
-    sid = None
-    worker = None
     binary_audio = False
-    mode = "rt"  # or "bar"
-    # NEW: capture ws in closure
     async def send_json(obj):
         return await send_json_safe(websocket, obj)
@@ -1519,335 +1524,193 @@ async def ws_jam(websocket: WebSocket):
             msg = json.loads(raw)
             mtype = msg.get("type")
-            # --- START ---
             if mtype == "start":
                 binary_audio = bool(msg.get("binary_audio", False))
-                mode = msg.get("mode", "rt")
                 params = msg.get("params", {}) or {}
-                sid = msg.get("session_id")
-                # attach or create
-                if sid:
-                    with jam_lock:
-                        worker = jam_registry.get(sid)
-                    if worker is None or not worker.is_alive():
-                        await send_json({"type":"error","error":"Session not found"})
-                        continue
-                else:
-                    # optionally accept base64 loop and start a new worker (bar-mode)
-                    if mode == "bar":
-                        loop_b64 = msg.get("loop_audio_b64")
-                        if not loop_b64:
-                            await send_json({"type":"error","error":"loop_audio_b64 required for mode=bar when no session_id"})
-                            continue
-                        loop_bytes = base64.b64decode(loop_b64)
-                        # mimic /jam/start
-                        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
-                            tmp.write(loop_bytes); tmp_path = tmp.name
-                        # build JamParams similar to /jam/start
-                        mrt = get_mrt()
-                        model_sr = int(mrt.sample_rate)  # typically 48000
-                        # Defaults for WS: raw loudness @ model SR, unless overridden by client:
-                        target_sr = int(params.get("target_sr", model_sr))
-                        loudness_mode = params.get("loudness_mode", "none")
-                        headroom_db = float(params.get("headroom_db", 1.0))
-                        loop = au.Waveform.from_file(tmp_path).resample(mrt.sample_rate).as_stereo()
-                        codec_fps = float(mrt.codec.frame_rate)
-                        ctx_seconds = float(mrt.config.context_length_frames) / codec_fps
-                        bpm = float(params.get("bpm", 120.0))
-                        bpb = int(params.get("beats_per_bar", 4))
-                        loop_tail = take_bar_aligned_tail(loop, bpm, bpb, ctx_seconds)
-                        # style vector (loop + extra styles)
-                        embeds, weights = [mrt.embed_style(loop_tail)], [float(params.get("loop_weight", 1.0))]
-                        extra = [s for s in (params.get("styles","").split(",")) if s.strip()]
-                        sw = [float(x) for x in params.get("style_weights","").split(",") if x.strip()]
-                        for i, s in enumerate(extra):
-                            embeds.append(mrt.embed_style(s.strip()))
-                            weights.append(sw[i] if i < len(sw) else 1.0)
-                        wsum = sum(weights) or 1.0
-                        weights = [w/wsum for w in weights]
-                        style_vec = np.sum([w*e for w, e in zip(weights, embeds)], axis=0).astype(np.float32)
-                        # target SR fallback: input SR
-                        inp_info = sf.info(tmp_path)
-                        target_sr = int(params.get("target_sr", int(inp_info.samplerate)))
-                        # Build JamParams for WS bar-mode
-                        jp = JamParams(
-                            bpm=bpm, beats_per_bar=bpb, bars_per_chunk=int(params.get("bars_per_chunk", 8)),
-                            target_sr=target_sr,
-                            loudness_mode=loudness_mode, headroom_db=headroom_db,
-                            style_vec=style_vec,
-                            ref_loop=None if loudness_mode == "none" else loop_tail,  # disable match by default
-                            combined_loop=loop,
-                            guidance_weight=float(params.get("guidance_weight", 1.1)),
-                            temperature=float(params.get("temperature", 1.1)),
-                            topk=int(params.get("topk", 40)),
-                        )
-                        worker = JamWorker(get_mrt(), jp)
-                        sid = str(uuid.uuid4())
-                        with jam_lock:
-                            # single active jam per GPU, mirroring /jam/start
-                            for _sid, w in list(jam_registry.items()):
-                                if w.is_alive():
-                                    await send_json({"type":"error","error":"A jam is already running"})
-                                    worker = None; sid = None
-                                    break
-                            if worker is not None:
-                                jam_registry[sid] = worker
-                                worker.start()
-                    else:
-                        # mode == "rt" (Colab-style, no loop context)
-                        mrt = get_mrt()
-                        state = mrt.init_state()
-                        # Build silent context (10s) tokens
-                        codec_fps   = float(mrt.codec.frame_rate)
-                        ctx_seconds = float(mrt.config.context_length_frames) / codec_fps
-                        sr = int(mrt.sample_rate)
-                        samples = int(max(1, round(ctx_seconds * sr)))
-                        silent = au.Waveform(np.zeros((samples, 2), np.float32), sr)
-                        tokens = mrt.codec.encode(silent).astype(np.int32)[:, :mrt.config.decoder_codec_rvq_depth]
-                        state.context_tokens = tokens
-                        # Parse params (including steering)
-                        asset_manager.ensure_assets_loaded(get_mrt())
-                        styles_str        = params.get("styles", "warmup") or ""
-                        style_weights_str = params.get("style_weights", "") or ""
-                        mean_w            = float(params.get("mean", 0.0) or 0.0)
-                        cw_str            = str(params.get("centroid_weights", "") or "")
-                        text_list = [s.strip() for s in styles_str.split(",") if s.strip()]
-                        try:
-                            text_w = [float(x) for x in style_weights_str.split(",")] if style_weights_str else []
-                        except ValueError:
-                            text_w = []
-                        try:
-                            cw = [float(x) for x in cw_str.split(",") if x.strip() != ""]
-                        except ValueError:
-                            cw = []
-                        # Clamp centroid weights to available centroids
-                        if _CENTROIDS is not None and len(cw) > int(_CENTROIDS.shape[0]):
-                            cw = cw[: int(_CENTROIDS.shape[0])]
-                        # Build initial style vector (no loop_embed in rt mode)
-                        style_vec = build_style_vector(
-                            mrt,
-                            text_styles=text_list,
-                            text_weights=text_w,
-                            loop_embed=None,
-                            loop_weight=None,
-                            mean_weight=mean_w,
-                            centroid_weights=cw,
-                        )
-                        # Stash rt session fields
-                        websocket._mrt   = mrt
-                        websocket._state = state
-                        websocket._style_cur = style_vec
-                        websocket._style_tgt = style_vec
-                        websocket._style_ramp_s = float(params.get("style_ramp_seconds", 0.0))
-                        websocket._rt_mean              = mean_w
-                        websocket._rt_centroid_weights  = cw
-                        websocket._rt_running           = True
-                        websocket._rt_sr                = sr
-                        websocket._rt_topk              = int(params.get("topk", 40))
-                        websocket._rt_temp              = float(params.get("temperature", 1.1))
-                        websocket._rt_guid              = float(params.get("guidance_weight", 1.1))
-                        websocket._pace                 = params.get("pace", "asap")  # "realtime" | "asap"
-                        # (Optional) report whether steering assets were loaded
-                        assets_ok = (_MEAN_EMBED is not None) or (_CENTROIDS is not None)
-                        await send_json({"type": "started", "mode": "rt", "steering_assets": "loaded" if assets_ok else "none"})
-                        # kick off the ~2s streaming loop
-                        async def _rt_loop():
-                            try:
-                                mrt = websocket._mrt
-                                chunk_secs = (mrt.config.chunk_length_frames * mrt.config.frame_length_samples) / float(mrt.sample_rate)
-                                target_next = time.perf_counter()
-                                while websocket._rt_running:
-                                    mrt.guidance_weight = websocket._rt_guid
-                                    mrt.temperature     = websocket._rt_temp
-                                    mrt.topk            = websocket._rt_topk
-                                    # ramp style
-                                    ramp = float(getattr(websocket, "_style_ramp_s", 0.0) or 0.0)
-                                    if ramp <= 0.0:
-                                        websocket._style_cur = websocket._style_tgt
-                                    else:
-                                        step = min(1.0, chunk_secs / ramp)
-                                        websocket._style_cur = websocket._style_cur + step * (websocket._style_tgt - websocket._style_cur)
-                                    wav, new_state = mrt.generate_chunk(state=websocket._state, style=websocket._style_cur)
-                                    websocket._state = new_state
-                                    x = wav.samples.astype(np.float32, copy=False)
-                                    buf = io.BytesIO()
-                                    sf.write(buf, x, mrt.sample_rate, subtype="FLOAT", format="WAV")
-                                    ok = True
-                                    if binary_audio:
-                                        try:
-                                            await websocket.send_bytes(buf.getvalue())
-                                            ok = await send_json({"type": "chunk_meta", "metadata": {"sample_rate": mrt.sample_rate}})
-                                        except Exception:
-                                            ok = False
-                                    else:
-                                        b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
-                                        ok = await send_json({"type": "chunk", "audio_base64": b64,
-                                                            "metadata": {"sample_rate": mrt.sample_rate}})
-                                    if not ok:
-                                        break
-                                    if getattr(websocket, "_pace", "asap") == "realtime":
-                                        t1 = time.perf_counter()
-                                        target_next += chunk_secs
-                                        sleep_s = max(0.0, target_next - t1 - 0.02)
-                                        if sleep_s > 0:
-                                            await asyncio.sleep(sleep_s)
-                            except asyncio.CancelledError:
-                                pass
-                            except Exception:
-                                pass
-                        websocket._rt_task = asyncio.create_task(_rt_loop())
-                        continue  # skip the “bar-mode started” message below
-                await send_json({"type":"started","session_id": sid, "mode": mode})
-                # if we’re in bar-mode, begin pushing chunks as they arrive
-                if mode == "bar" and worker is not None:
-                    async def _pump():
-                        while True:
-                            if not worker.is_alive():
-                                break
-                            chunk = worker.get_next_chunk(timeout=60.0)
-                            if chunk is None:
-                                continue
-                            audio_base64 = base64.b64encode(chunk.audio_bytes).decode("utf-8")
                             if binary_audio:
-                                await websocket.send_bytes(chunk.audio_bytes)
-                                await send_json({"type":"chunk_meta","index":chunk.index,"metadata":chunk.metadata})
                             else:
-                                await send_json({"type":"chunk","index":chunk.index,
-                                                 "audio_base64":audio_base64,"metadata":chunk.metadata})
-                    asyncio.create_task(_pump())
-            # --- UPDATES (bar or rt) ---
-            elif mtype == "update":
-                if mode == "bar":
-                    if not sid:
-                        await send_json({"type":"error","error":"No session_id yet"}); return
-                    # fan values straight into your existing HTTP handler:
-                    res = jam_update(
-                        session_id=sid,
-                        guidance_weight=msg.get("guidance_weight"),
-                        temperature=msg.get("temperature"),
-                        topk=msg.get("topk"),
-                        styles=msg.get("styles",""),
-                        style_weights=msg.get("style_weights",""),
-                        loop_weight=msg.get("loop_weight"),
-                        use_current_mix_as_style=bool(msg.get("use_current_mix_as_style", False)),
-                    )
-                    await send_json({"type":"status", **res})  # {"ok": True}
-                else:
-                    # rt-mode: there’s no JamWorker; update the local knobs/state
-                    websocket._rt_temp = float(msg.get("temperature", websocket._rt_temp))
-                    websocket._rt_topk = int(msg.get("topk", websocket._rt_topk))
-                    websocket._rt_guid = float(msg.get("guidance_weight", websocket._rt_guid))
-                    # NEW steering fields
-                    if "mean" in msg and msg["mean"] is not None:
-                        try: websocket._rt_mean = float(msg["mean"])
-                        except: websocket._rt_mean = 0.0
-                    if "centroid_weights" in msg:
-                        cw = [w.strip() for w in str(msg["centroid_weights"]).split(",") if w.strip() != ""]
-                        try:
-                            websocket._rt_centroid_weights = [float(x) for x in cw]
-                        except:
-                            websocket._rt_centroid_weights = []
-                    # styles / text weights (optional, comma-separated)
-                    styles_str = msg.get("styles", None)
-                    style_weights_str = msg.get("style_weights", "")
-                    text_list = [s for s in (styles_str.split(",") if styles_str else []) if s.strip()]
-                    text_w = [float(x) for x in style_weights_str.split(",")] if style_weights_str else []
-                    asset_manager.ensure_assets_loaded(get_mrt())
-                    websocket._style_tgt = build_style_vector(
-                        websocket._mrt,
-                        text_styles=text_list,
-                        text_weights=text_w,
-                        loop_embed=None,
-                        loop_weight=None,
-                        mean_weight=float(websocket._rt_mean),
-                        centroid_weights=websocket._rt_centroid_weights,
-                    )
-                    # optionally allow live changes to ramp:
-                    if "style_ramp_seconds" in msg:
-                        try: websocket._style_ramp_s = float(msg["style_ramp_seconds"])
-                        except: pass
-                    await send_json({"type":"status","updated":"rt-knobs+style"})
-            elif mtype == "consume" and mode == "bar":
-                with jam_lock:
-                    worker = jam_registry.get(msg.get("session_id"))
-                if worker is not None:
-                    worker.mark_chunk_consumed(int(msg.get("chunk_index", -1)))
-            elif mtype == "reseed" and mode == "bar":
-                with jam_lock:
-                    worker = jam_registry.get(msg.get("session_id"))
-                if worker is None or not worker.is_alive():
-                    await send_json({"type":"error","error":"Session not found"}); continue
-                loop_b64 = msg.get("loop_audio_b64")
-                if not loop_b64:
-                    await send_json({"type":"error","error":"loop_audio_b64 required"}); continue
-                loop_bytes = base64.b64decode(loop_b64)
-                with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
-                    tmp.write(loop_bytes); path = tmp.name
-                wav = au.Waveform.from_file(path).resample(worker.mrt.sample_rate).as_stereo()
-                worker.reseed_from_waveform(wav)
-                await send_json({"type":"status","reseeded":True})
-            elif mtype == "reseed_splice" and mode == "bar":
-                with jam_lock:
-                    worker = jam_registry.get(msg.get("session_id"))
-                if worker is None or not worker.is_alive():
-                    await send_json({"type":"error","error":"Session not found"}); continue
-                anchor = float(msg.get("anchor_bars", 2.0))
-                b64 = msg.get("combined_audio_b64")
-                if b64:
-                    data = base64.b64decode(b64)
-                    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
-                        tmp.write(data); path = tmp.name
-                    wav = au.Waveform.from_file(path).resample(worker.mrt.sample_rate).as_stereo()
-                    worker.reseed_splice(wav, anchor_bars=anchor)
-                else:
-                    # fallback: model-side stream splice
-                    worker.reseed_splice(worker.params.combined_loop, anchor_bars=anchor)
-                await send_json({"type":"status","splice":anchor})
-            elif mtype == "stop":
-                if mode == "rt":
-                    websocket._rt_running = False
-                    task = getattr(websocket, "_rt_task", None)
-                    if task is not None:
-                        task.cancel()
-                        try: await task
-                        except asyncio.CancelledError: pass
-                    await send_json({"type":"stopped"})
-                    break  # <- add this if you want to end the socket after stop
             elif mtype == "ping":
                 await send_json({"type":"pong"})
@@ -1855,7 +1718,6 @@ async def ws_jam(websocket: WebSocket):
                 await send_json({"type":"error","error":f"Unknown type {mtype}"})
     except WebSocketDisconnect:
-        # best-effort cleanup for bar-mode sessions started within this socket (optional)
         pass
     except Exception as e:
         try:
@@ -1863,6 +1725,14 @@ async def ws_jam(websocket: WebSocket):
         except Exception:
             pass
     finally:
         try:
             if websocket.client_state != WebSocketState.DISCONNECTED:
                 await websocket.close()

 # ----------------------------
+# websockets route (rt-mode only)
 # ----------------------------
 @app.websocket("/ws/jam")
 async def ws_jam(websocket: WebSocket):
+    """
+    Real-time streaming WebSocket endpoint for MagentaRT.
+    This route operates in 'rt' mode only - for bar-aligned jam sessions,
+    use the HTTP endpoints (/jam/start, /jam/chunk, etc.) instead.
+    The server handles crossfading internally via MagentaRTState, so clients
+    can simply play back chunks sequentially without additional crossfade logic.
+    """
     await websocket.accept()
     binary_audio = False
     async def send_json(obj):
         return await send_json_safe(websocket, obj)
             msg = json.loads(raw)
             mtype = msg.get("type")
+            # --- START (rt-mode only) ---
             if mtype == "start":
                 binary_audio = bool(msg.get("binary_audio", False))
                 params = msg.get("params", {}) or {}
+                # Initialize MagentaRT state
+                mrt = get_mrt()
+                state = mrt.init_state()
+                # Build silent context (10s) tokens
+                codec_fps   = float(mrt.codec.frame_rate)
+                ctx_seconds = float(mrt.config.context_length_frames) / codec_fps
+                sr = int(mrt.sample_rate)
+                samples = int(max(1, round(ctx_seconds * sr)))
+                silent = au.Waveform(np.zeros((samples, 2), np.float32), sr)
+                tokens = mrt.codec.encode(silent).astype(np.int32)[:, :mrt.config.decoder_codec_rvq_depth]
+                state.context_tokens = tokens
+                # Parse params (including steering)
+                asset_manager.ensure_assets_loaded(get_mrt())
+                styles_str        = params.get("styles", "warmup") or ""
+                style_weights_str = params.get("style_weights", "") or ""
+                mean_w            = float(params.get("mean", 0.0) or 0.0)
+                cw_str            = str(params.get("centroid_weights", "") or "")
+                text_list = [s.strip() for s in styles_str.split(",") if s.strip()]
+                try:
+                    text_w = [float(x) for x in style_weights_str.split(",")] if style_weights_str else []
+                except ValueError:
+                    text_w = []
+                try:
+                    cw = [float(x) for x in cw_str.split(",") if x.strip() != ""]
+                except ValueError:
+                    cw = []
+                # Clamp centroid weights to available centroids
+                if _CENTROIDS is not None and len(cw) > int(_CENTROIDS.shape[0]):
+                    cw = cw[: int(_CENTROIDS.shape[0])]
+                # Build initial style vector (no loop_embed in rt mode)
+                style_vec = build_style_vector(
+                    mrt,
+                    text_styles=text_list,
+                    text_weights=text_w,
+                    loop_embed=None,
+                    loop_weight=None,
+                    mean_weight=mean_w,
+                    centroid_weights=cw,
+                )
+                # Stash rt session fields on the websocket object
+                websocket._mrt   = mrt
+                websocket._state = state
+                websocket._style_cur = style_vec
+                websocket._style_tgt = style_vec
+                websocket._style_ramp_s = float(params.get("style_ramp_seconds", 0.0))
+                websocket._rt_mean              = mean_w
+                websocket._rt_centroid_weights  = cw
+                websocket._rt_running           = True
+                websocket._rt_sr                = sr
+                websocket._rt_topk              = int(params.get("topk", 40))
+                websocket._rt_temp              = float(params.get("temperature", 1.1))
+                websocket._rt_guid              = float(params.get("guidance_weight", 1.1))
+                websocket._pace                 = params.get("pace", "asap")  # "realtime" | "asap"
+                # Report whether steering assets were loaded
+                assets_ok = (_MEAN_EMBED is not None) or (_CENTROIDS is not None)
+                await send_json({"type": "started", "mode": "rt", "steering_assets": "loaded" if assets_ok else "none"})
+                # Kick off the ~2s streaming loop
+                async def _rt_loop():
+                    try:
+                        mrt = websocket._mrt
+                        chunk_secs = (mrt.config.chunk_length_frames * mrt.config.frame_length_samples) / float(mrt.sample_rate)
+                        target_next = time.perf_counter()
+                        while websocket._rt_running:
+                            mrt.guidance_weight = websocket._rt_guid
+                            mrt.temperature     = websocket._rt_temp
+                            mrt.topk            = websocket._rt_topk
+                            # Ramp style toward target
+                            ramp = float(getattr(websocket, "_style_ramp_s", 0.0) or 0.0)
+                            if ramp <= 0.0:
+                                websocket._style_cur = websocket._style_tgt
+                            else:
+                                step = min(1.0, chunk_secs / ramp)
+                                websocket._style_cur = websocket._style_cur + step * (websocket._style_tgt - websocket._style_cur)
+                            # Generate chunk (crossfading handled internally by MagentaRT)
+                            wav, new_state = mrt.generate_chunk(state=websocket._state, style=websocket._style_cur)
+                            websocket._state = new_state
+                            x = wav.samples.astype(np.float32, copy=False)
+                            buf = io.BytesIO()
+                            sf.write(buf, x, mrt.sample_rate, subtype="FLOAT", format="WAV")
+                            ok = True
                             if binary_audio:
+                                try:
+                                    await websocket.send_bytes(buf.getvalue())
+                                    ok = await send_json({"type": "chunk_meta", "metadata": {"sample_rate": mrt.sample_rate}})
+                                except Exception:
+                                    ok = False
                             else:
+                                b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
+                                ok = await send_json({"type": "chunk", "audio_base64": b64,
+                                                    "metadata": {"sample_rate": mrt.sample_rate}})
+                            if not ok:
+                                break
+                            if getattr(websocket, "_pace", "asap") == "realtime":
+                                t1 = time.perf_counter()
+                                target_next += chunk_secs
+                                sleep_s = max(0.0, target_next - t1 - 0.02)
+                                if sleep_s > 0:
+                                    await asyncio.sleep(sleep_s)
+                    except asyncio.CancelledError:
+                        pass
+                    except Exception:
+                        pass
+                websocket._rt_task = asyncio.create_task(_rt_loop())
+            # --- UPDATE (rt-mode knobs & style) ---
+            elif mtype == "update":
+                if not hasattr(websocket, "_rt_running"):
+                    await send_json({"type":"error","error":"Session not started"}); continue
+                websocket._rt_temp = float(msg.get("temperature", websocket._rt_temp))
+                websocket._rt_topk = int(msg.get("topk", websocket._rt_topk))
+                websocket._rt_guid = float(msg.get("guidance_weight", websocket._rt_guid))
+                # Steering fields
+                if "mean" in msg and msg["mean"] is not None:
+                    try: websocket._rt_mean = float(msg["mean"])
+                    except: websocket._rt_mean = 0.0
+                if "centroid_weights" in msg:
+                    cw = [w.strip() for w in str(msg["centroid_weights"]).split(",") if w.strip() != ""]
+                    try:
+                        websocket._rt_centroid_weights = [float(x) for x in cw]
+                    except:
+                        websocket._rt_centroid_weights = []
+                # Styles / text weights (optional, comma-separated)
+                styles_str = msg.get("styles", None)
+                style_weights_str = msg.get("style_weights", "")
+                text_list = [s for s in (styles_str.split(",") if styles_str else []) if s.strip()]
+                text_w = [float(x) for x in style_weights_str.split(",")] if style_weights_str else []
+                asset_manager.ensure_assets_loaded(get_mrt())
+                websocket._style_tgt = build_style_vector(
+                    websocket._mrt,
+                    text_styles=text_list,
+                    text_weights=text_w,
+                    loop_embed=None,
+                    loop_weight=None,
+                    mean_weight=float(websocket._rt_mean),
+                    centroid_weights=websocket._rt_centroid_weights,
+                )
+                # Optionally allow live changes to ramp:
+                if "style_ramp_seconds" in msg:
+                    try: websocket._style_ramp_s = float(msg["style_ramp_seconds"])
+                    except: pass
+                await send_json({"type":"status","updated":"rt-knobs+style"})
+            # --- BUFFER STATUS (from frontend for adaptive pacing - acknowledged silently) ---
+            elif mtype == "buffer_status":
+                # Frontend reports its buffer level; could be used for adaptive pacing
+                # For now we just acknowledge receipt without action
+                pass
+            # --- STOP ---
+            elif mtype == "stop":
+                websocket._rt_running = False
+                task = getattr(websocket, "_rt_task", None)
+                if task is not None:
+                    task.cancel()
+                    try: await task
+                    except asyncio.CancelledError: pass
+                await send_json({"type":"stopped"})
+                break
+            # --- PING/PONG ---
             elif mtype == "ping":
                 await send_json({"type":"pong"})
                 await send_json({"type":"error","error":f"Unknown type {mtype}"})
     except WebSocketDisconnect:
         pass
     except Exception as e:
         try:
         except Exception:
             pass
     finally:
+        # Ensure streaming loop is stopped
+        if hasattr(websocket, "_rt_running"):
+            websocket._rt_running = False
+        task = getattr(websocket, "_rt_task", None)
+        if task is not None:
+            task.cancel()
+            try: await asyncio.wait_for(task, timeout=1.0)
+            except: pass
         try:
             if websocket.client_state != WebSocketState.DISCONNECTED:
                 await websocket.close()

magentaRT_rt_tester.html CHANGED Viewed

@@ -256,93 +256,45 @@
   const rngC4 = $("rngC4"), numC4 = $("numC4");
   const rngC5 = $("rngC5"), numC5 = $("numC5");
-  const XFADE_MS = 40; // crossfade length
   let pending = [];            // decoded AudioBuffers waiting to be scheduled
-let playing = false;         // have we started playback?
-const START_CUSHION = 0.12;  // already used
-const fade = XFADE_MS / 1000;
-// Equal-power crossfading functions
-function equalPowerFadeOut(t) {
-  // cos²(t * π/2) where t goes from 0 to 1
-  return Math.cos(t * Math.PI / 2) ** 2;
-}
-function equalPowerFadeIn(t) {
-  // sin²(t * π/2) where t goes from 0 to 1
-  return Math.sin(t * Math.PI / 2) ** 2;
-}
-function scheduleAudioBuffer(abuf) {
-  // Equal-power crossfade scheduling
-  const src = ctx.createBufferSource();
-  const g = ctx.createGain();
-  src.buffer = abuf;
-  src.connect(g); g.connect(gain);
-  if (nextTime < ctx.currentTime + 0.05) nextTime = ctx.currentTime + START_CUSHION;
-  const startAt = nextTime;
-  const dur = abuf.duration;
-  // Overlap by 'fade' so there's no dip
-  nextTime = startAt + Math.max(0, dur - fade);
-  // Equal-power crossfading using custom curves
-  const numPoints = 64; // More points for smoother curves
-  const times = [];
-  const values = [];
-  // Fade in from 0 to 1 over fade duration
-  for (let i = 0; i <= numPoints; i++) {
-    const t = i / numPoints;
-    const time = startAt + t * fade;
-    const value = equalPowerFadeIn(t);
-    times.push(time);
-    values.push(value);
-  }
-  // Hold at 1.0 during the main portion
-  const holdStart = startAt + fade;
-  const holdEnd = startAt + Math.max(0, dur - fade);
-  if (holdEnd > holdStart) {
-    times.push(holdStart);
-    values.push(1.0);
-    times.push(holdEnd);
-    values.push(1.0);
-  }
-  // Fade out from 1 to 0 over fade duration
-  for (let i = 0; i <= numPoints; i++) {
-    const t = i / numPoints;
-    const time = startAt + Math.max(0, dur - fade) + t * fade;
-    const value = equalPowerFadeOut(t);
-    times.push(time);
-    values.push(value);
-  }
-  // Apply the envelope
-  g.gain.setValueAtTime(values[0], times[0]);
-  for (let i = 1; i < times.length; i++) {
-    g.gain.linearRampToValueAtTime(values[i], times[i]);
   }
-  src.start(startAt);
-  scheduled.push({ src, when: startAt, dur });
-  updateQueueUI();
-  src.onended = () => { scheduled = scheduled.filter(s => s.src !== src); updateQueueUI(); };
-}
-function beginPlaybackFromPending() {
-  if (playing) return;
-  playing = true;
-  nextTime = ctx.currentTime + START_CUSHION;
-  while (pending.length) {
-    const abuf = pending.shift();
-    scheduleAudioBuffer(abuf);
   }
-}
   // Audio chain
   let AudioCtx = window.AudioContext || window.webkitAudioContext;

   const rngC4 = $("rngC4"), numC4 = $("numC4");
   const rngC5 = $("rngC5"), numC5 = $("numC5");
+  // Simplified playback - server handles crossfading via MagentaRTState
+  // Chunks arrive pre-crossfaded; we just schedule them back-to-back
   let pending = [];            // decoded AudioBuffers waiting to be scheduled
+  let playing = false;         // have we started playback?
+  const START_CUSHION = 0.12;  // initial buffer before first playback
+  function scheduleAudioBuffer(abuf) {
+    // Simple back-to-back scheduling (no client-side crossfade needed)
+    const src = ctx.createBufferSource();
+    src.buffer = abuf;
+    src.connect(gain);
+    // Ensure we don't schedule in the past
+    if (nextTime < ctx.currentTime + 0.05) {
+      nextTime = ctx.currentTime + START_CUSHION;
+    }
+    const startAt = nextTime;
+    const dur = abuf.duration;
+    // Schedule next chunk right after this one ends
+    nextTime = startAt + dur;
+    src.start(startAt);
+    scheduled.push({ src, when: startAt, dur });
+    updateQueueUI();
+    src.onended = () => { scheduled = scheduled.filter(s => s.src !== src); updateQueueUI(); };
   }
+  function beginPlaybackFromPending() {
+    if (playing) return;
+    playing = true;
+    nextTime = ctx.currentTime + START_CUSHION;
+    while (pending.length) {
+      const abuf = pending.shift();
+      scheduleAudioBuffer(abuf);
+    }
   }
   // Audio chain
   let AudioCtx = window.AudioContext || window.webkitAudioContext;