bgkb
/

bs_polarformer

@@ -18,11 +18,6 @@ import torch
 import torch.nn as nn
 import yaml
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'msst'))
-from models.bs_roformer.bs_roformer import BSRoformer
 # ---------------------------------------------------------------------------
 # Wrapper that exposes only the exportable "core" of BSRoformer
 # ---------------------------------------------------------------------------
@@ -34,7 +29,7 @@ class BSRoformerCore(nn.Module):
     Output: mask           – (B, num_stems, F, T, 2) complex mask as real tensor
     """
-    def __init__(self, model: BSRoformer):
         super().__init__()
         self.band_split = model.band_split
         self.layers = model.layers
@@ -93,7 +88,7 @@ class BSRoformerCore(nn.Module):
 # Helper: replace PoPE attention with standard attention for ONNX export
 # ---------------------------------------------------------------------------
-def replace_pope_with_standard_attn(model: BSRoformer):
     """Replace PoPE-based flash attention with equivalent standard attention.
     PoPE attention applies: q, k = softplus(q), softplus(k) then rotates by
@@ -209,6 +204,11 @@ def replace_pope_with_standard_attn(model: BSRoformer):
 def load_model(config_path, checkpoint_path):
     """Load BS PolarFormer model from config and checkpoint."""
     with open(config_path, 'r') as f:
         config = yaml.full_load(f)
@@ -503,6 +503,7 @@ def cascade_large_ops(onnx_path, out_path, max_bindings=7):
     """
     import onnx
     from onnx import helper, TensorProto, numpy_helper
     print(f"\n=== Cascading large Split/Concat ops (max {max_bindings} bindings) ===")
     model = onnx.load(onnx_path)
@@ -514,27 +515,38 @@ def cascade_large_ops(onnx_path, out_path, max_bindings=7):
         type_map[vi.name] = vi
     def _get_shape(tensor_name):
-        """Get the shape of a tensor as a list of (dim_value, dim_param) tuples."""
         vi = type_map.get(tensor_name)
         if vi is None:
             return None
         try:
             dims = vi.type.tensor_type.shape.dim
-            return [(d.dim_value, d.dim_param) for d in dims]
         except Exception:
             return None
     def _make_value_info(name, shape_dims, elem_type=1):
         """Create a TensorValueInfoProto with the given shape dimensions.
-        shape_dims is a list of (dim_value, dim_param) tuples.
         elem_type=1 means FLOAT."""
         vi = helper.make_tensor_value_info(name, elem_type, [None] * len(shape_dims))
-        for i, (dv, dp) in enumerate(shape_dims):
             dim = vi.type.tensor_type.shape.dim[i]
             dim.Clear()
             if dp:
                 dim.dim_param = dp
-            else:
                 dim.dim_value = dv
         return vi
@@ -559,7 +571,7 @@ def cascade_large_ops(onnx_path, out_path, max_bindings=7):
             for oi, oname in enumerate(output_names):
                 if oname not in type_map:
                     shape = list(input_shape)
-                    shape[a] = (sizes[oi], '')
                     vi = _make_value_info(oname, shape)
                     new_value_infos.append(vi)
                     type_map[oname] = vi
@@ -592,7 +604,7 @@ def cascade_large_ops(onnx_path, out_path, max_bindings=7):
                 shape = list(input_shape)
                 # Replace the split axis dimension with the group's concrete size
                 a = axis if axis >= 0 else len(shape) + axis
-                shape[a] = (group_sizes[gi], '')  # concrete dim_value, no dim_param
                 vi = _make_value_info(gout, shape)
                 new_value_infos.append(vi)
                 type_map[gout] = vi
@@ -658,6 +670,28 @@ def cascade_large_ops(onnx_path, out_path, max_bindings=7):
         inputs = list(node.input)
         # Group inputs into chunks of max_bindings
         groups = []
         for i in range(0, n_in, max_bindings):
@@ -673,13 +707,13 @@ def cascade_large_ops(onnx_path, out_path, max_bindings=7):
             total = 0
             all_concrete = True
             for s in shapes:
-                dv, dp = s[a]
-                if dp or dv == 0:
                     all_concrete = False
                     break
                 total += dv
             if all_concrete:
-                base[a] = (total, '')
                 return base
             return None
@@ -745,6 +779,50 @@ def cascade_large_ops(onnx_path, out_path, max_bindings=7):
         graph.node.remove(node)
     graph.node.extend(nodes_to_add)
     # Add explicit value_info entries for cascade intermediates (as hints).
     for vi in new_value_infos:
         graph.value_info.append(vi)
@@ -800,6 +878,59 @@ def cascade_large_ops(onnx_path, out_path, max_bindings=7):
     else:
         print(f"  All intermediate tensors have proper shape annotations")
     return out_path

 import torch.nn as nn
 import yaml
 # ---------------------------------------------------------------------------
 # Wrapper that exposes only the exportable "core" of BSRoformer
 # ---------------------------------------------------------------------------
     Output: mask           – (B, num_stems, F, T, 2) complex mask as real tensor
     """
+    def __init__(self, model: nn.Module):
         super().__init__()
         self.band_split = model.band_split
         self.layers = model.layers
 # Helper: replace PoPE attention with standard attention for ONNX export
 # ---------------------------------------------------------------------------
+def replace_pope_with_standard_attn(model: nn.Module):
     """Replace PoPE-based flash attention with equivalent standard attention.
     PoPE attention applies: q, k = softplus(q), softplus(k) then rotates by
 def load_model(config_path, checkpoint_path):
     """Load BS PolarFormer model from config and checkpoint."""
+    msst_root = os.path.join(os.path.dirname(__file__), '..', 'msst')
+    if msst_root not in sys.path:
+        sys.path.insert(0, msst_root)
+    from models.bs_roformer.bs_roformer import BSRoformer
     with open(config_path, 'r') as f:
         config = yaml.full_load(f)
     """
     import onnx
     from onnx import helper, TensorProto, numpy_helper
+    from collections import defaultdict, deque
     print(f"\n=== Cascading large Split/Concat ops (max {max_bindings} bindings) ===")
     model = onnx.load(onnx_path)
         type_map[vi.name] = vi
     def _get_shape(tensor_name):
+        """Get shape dims as (dim_value, dim_param, has_dim_value).
+        Important: in ONNX protobuf, dim_value defaults to 0 when unset.
+        We must track whether dim_value is explicitly set, otherwise unknown
+        dimensions can be misread as concrete zeros.
+        """
         vi = type_map.get(tensor_name)
         if vi is None:
             return None
         try:
             dims = vi.type.tensor_type.shape.dim
+            shape = []
+            for d in dims:
+                has_dim_value = d.HasField('dim_value')
+                dv = d.dim_value if has_dim_value else None
+                dp = d.dim_param if d.HasField('dim_param') else ''
+                shape.append((dv, dp, has_dim_value))
+            return shape
         except Exception:
             return None
     def _make_value_info(name, shape_dims, elem_type=1):
         """Create a TensorValueInfoProto with the given shape dimensions.
+        shape_dims is a list of (dim_value, dim_param, has_dim_value) tuples.
         elem_type=1 means FLOAT."""
         vi = helper.make_tensor_value_info(name, elem_type, [None] * len(shape_dims))
+        for i, (dv, dp, has_dim_value) in enumerate(shape_dims):
             dim = vi.type.tensor_type.shape.dim[i]
             dim.Clear()
             if dp:
                 dim.dim_param = dp
+            elif has_dim_value and dv is not None:
                 dim.dim_value = dv
         return vi
             for oi, oname in enumerate(output_names):
                 if oname not in type_map:
                     shape = list(input_shape)
+                    shape[a] = (sizes[oi], '', True)
                     vi = _make_value_info(oname, shape)
                     new_value_infos.append(vi)
                     type_map[oname] = vi
                 shape = list(input_shape)
                 # Replace the split axis dimension with the group's concrete size
                 a = axis if axis >= 0 else len(shape) + axis
+                shape[a] = (group_sizes[gi], '', True)  # concrete dim_value, no dim_param
                 vi = _make_value_info(gout, shape)
                 new_value_infos.append(vi)
                 type_map[gout] = vi
         inputs = list(node.input)
+        # Filter out 0-sized inputs to avoid creating Concat nodes with ONLY 0-sized inputs
+        # (which crashes WebGPU's WGSL compiler).
+        valid_inputs = []
+        for inp in inputs:
+            shape = _get_shape(inp)
+            is_zero = False
+            if shape:
+                a = axis if axis >= 0 else len(shape) + axis
+                if a < len(shape):
+                    dv, dp, has_dv = shape[a]
+                    if has_dv and dv == 0 and not dp:
+                        is_zero = True
+            if not is_zero:
+                valid_inputs.append(inp)
+        # If all inputs were 0-sized, keep at least one so the Concat node is valid
+        if not valid_inputs and inputs:
+            valid_inputs = [inputs[0]]
+        inputs = valid_inputs
+        n_in = len(inputs)
         # Group inputs into chunks of max_bindings
         groups = []
         for i in range(0, n_in, max_bindings):
             total = 0
             all_concrete = True
             for s in shapes:
+                dv, dp, has_dv = s[a]
+                if dp or not has_dv:
                     all_concrete = False
                     break
                 total += dv
             if all_concrete:
+                base[a] = (total, '', True)
                 return base
             return None
         graph.node.remove(node)
     graph.node.extend(nodes_to_add)
+    # Re-establish topological ordering after graph rewrites.
+    # Appending new nodes at the end can violate node dependency order.
+    def _toposort_nodes(g):
+        producers = {}
+        for i, node in enumerate(g.node):
+            for out in node.output:
+                if out:
+                    producers[out] = i
+        deps = defaultdict(set)
+        users = defaultdict(set)
+        indegree = [0] * len(g.node)
+        for i, node in enumerate(g.node):
+            for inp in node.input:
+                if not inp:
+                    continue
+                p = producers.get(inp)
+                if p is None or p == i:
+                    continue
+                if p not in deps[i]:
+                    deps[i].add(p)
+                    users[p].add(i)
+                    indegree[i] += 1
+        q = deque([i for i, d in enumerate(indegree) if d == 0])
+        order = []
+        while q:
+            cur = q.popleft()
+            order.append(cur)
+            for nxt in users[cur]:
+                indegree[nxt] -= 1
+                if indegree[nxt] == 0:
+                    q.append(nxt)
+        if len(order) != len(g.node):
+            raise RuntimeError("Failed to topologically sort rewritten ONNX graph (cycle or missing dependency).")
+        sorted_nodes = [g.node[i] for i in order]
+        del g.node[:]
+        g.node.extend(sorted_nodes)
+    _toposort_nodes(graph)
     # Add explicit value_info entries for cascade intermediates (as hints).
     for vi in new_value_infos:
         graph.value_info.append(vi)
     else:
         print(f"  All intermediate tensors have proper shape annotations")
+    # Extra safety check: reject models where a Concat would end up with only
+    # statically zero-sized inputs on its concat axis (known to break WebGPU WGSL).
+    vi_map2 = {}
+    for vi in list(model2.graph.value_info) + list(model2.graph.input) + list(model2.graph.output):
+        vi_map2[vi.name] = vi
+    def _shape_from_vi(vi):
+        try:
+            dims = vi.type.tensor_type.shape.dim
+            out = []
+            for d in dims:
+                has_dv = d.HasField('dim_value')
+                dv = d.dim_value if has_dv else None
+                dp = d.dim_param if d.HasField('dim_param') else ''
+                out.append((dv, dp, has_dv))
+            return out
+        except Exception:
+            return None
+    bad_concat = 0
+    for node in model2.graph.node:
+        if node.op_type != 'Concat':
+            continue
+        axis = 0
+        for attr in node.attribute:
+            if attr.name == 'axis':
+                axis = attr.i
+        all_zero = True
+        for inp in node.input:
+            vi = vi_map2.get(inp)
+            shape = _shape_from_vi(vi) if vi is not None else None
+            if not shape:
+                all_zero = False
+                continue
+            a = axis if axis >= 0 else len(shape) + axis
+            if a < 0 or a >= len(shape):
+                all_zero = False
+                continue
+            dv, dp, has_dv = shape[a]
+            if dp or not has_dv:
+                all_zero = False
+            elif dv > 0:
+                all_zero = False
+        if all_zero or not node.input:
+            bad_concat += 1
+            print(f"  ERROR: problematic Concat '{node.name}' may produce invalid WebGPU shader")
+    if bad_concat:
+        raise RuntimeError(
+            f"Detected {bad_concat} problematic Concat nodes after cascading; "
+            "refuse to output potentially invalid WebGPU model."
+        )
     return out_path