diff --git a/build/torch210-cxx11-cu126-x86_64-linux/__init__.py b/build/torch210-cxx11-cu126-x86_64-linux/__init__.py
deleted file mode 100644
index 946160f16b9dc91fefeea037fb7ac84fd6afd802..0000000000000000000000000000000000000000
--- a/build/torch210-cxx11-cu126-x86_64-linux/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-from . import layers
-
-def dropout_add_ln_fwd(input, gamma, beta, rowscale, colscale, x0_subset, z_subset, dropout_p, epsilon, rowscale_const, z_numrows, gen, residual_in_fp32, is_rms_norm):
-    return ops.dropout_add_ln_fwd(input, gamma, beta, rowscale, colscale, x0_subset, z_subset, dropout_p, epsilon, rowscale_const, z_numrows, gen, residual_in_fp32, is_rms_norm)
-
-def dropout_add_ln_bwd(dz, dx, x, mu, rsigma, gamma, rowscale, colscale, x0_subset, z_subset, dropout_p, rowscale_const, x0_numrows, has_residual, is_rms_norm):
-    return ops.dropout_add_ln_bwd(dz, dx, x, mu, rsigma, gamma, rowscale, colscale, x0_subset, z_subset, dropout_p, rowscale_const, x0_numrows, has_residual, is_rms_norm)
-
-def dropout_add_ln_parallel_residual_fwd(input, gamma0, beta0, gamma1, beta1, dropout_p, epsilon, gen, residual_in_fp32, is_rms_norm):
-    return ops.dropout_add_ln_parallel_residual_fwd(input, gamma0, beta0, gamma1, beta1, dropout_p, epsilon, gen, residual_in_fp32, is_rms_norm)
-
-def dropout_add_ln_parallel_residual_bwd(dz0, dz1, dx, x, mu, rsigma, gamma0, gamma1, dropout_p, has_x1, has_residual, is_rms_norm):
-    return ops.dropout_add_ln_parallel_residual_bwd(dz0, dz1, dx, x, mu, rsigma, gamma0, gamma1, dropout_p, has_x1, has_residual, is_rms_norm)
-
-__all__ = [
-    "layers",
-    "dropout_add_ln_fwd",
-    "dropout_add_ln_bwd",
-    "dropout_add_ln_parallel_residual_fwd",
-    "dropout_add_ln_parallel_residual_bwd",
-]
\ No newline at end of file
diff --git a/build/torch210-cxx11-cu126-x86_64-linux/_layer_norm_fd07706.abi3.so b/build/torch210-cxx11-cu126-x86_64-linux/_layer_norm_fd07706.abi3.so
deleted file mode 100644
index 3e1b316855495addd39ea7ef89e2a628788b4478..0000000000000000000000000000000000000000
--- a/build/torch210-cxx11-cu126-x86_64-linux/_layer_norm_fd07706.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:49fd317d18b8b13367c70f037d1e8e3077aad8318d6dc40cd3050ab6f4e1d091
-size 712114272
diff --git a/build/torch210-cxx11-cu126-x86_64-linux/_ops.py b/build/torch210-cxx11-cu126-x86_64-linux/_ops.py
deleted file mode 100644
index 58eafd610133ab10f4d6afed289b76a069f413c9..0000000000000000000000000000000000000000
--- a/build/torch210-cxx11-cu126-x86_64-linux/_ops.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import torch
-from . import _layer_norm_fd07706
-ops = torch.ops._layer_norm_fd07706
-
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_layer_norm_fd07706::{op_name}"
\ No newline at end of file
diff --git a/build/torch210-cxx11-cu126-x86_64-linux/layer_norm/__init__.py b/build/torch210-cxx11-cu126-x86_64-linux/layer_norm/__init__.py
deleted file mode 100644
index 03dbc1afe1cf156661a2b1b22003cd5f599a0309..0000000000000000000000000000000000000000
--- a/build/torch210-cxx11-cu126-x86_64-linux/layer_norm/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import ctypes
-import sys
-
-import importlib
-from pathlib import Path
-from types import ModuleType
-
-def _import_from_path(file_path: Path) -> ModuleType:
-    # We cannot use the module name as-is, after adding it to `sys.modules`,
-    # it would also be used for other imports. So, we make a module name that
-    # depends on the path for it to be unique using the hex-encoded hash of
-    # the path.
-    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
-    module_name = path_hash
-    spec = importlib.util.spec_from_file_location(module_name, file_path)
-    if spec is None:
-        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
-    module = importlib.util.module_from_spec(spec)
-    if module is None:
-        raise ImportError(f"Cannot load module {module_name} from spec")
-    sys.modules[module_name] = module
-    spec.loader.exec_module(module)  # type: ignore
-    return module
-
-
-globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
diff --git a/build/torch210-cxx11-cu126-x86_64-linux/layers.py b/build/torch210-cxx11-cu126-x86_64-linux/layers.py
deleted file mode 100644
index 7ed883f42ead452f8b60f498ec11302c53d3cf74..0000000000000000000000000000000000000000
--- a/build/torch210-cxx11-cu126-x86_64-linux/layers.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-
-class LayerNorm(nn.Module):
-    weight: torch.Tensor
-    variance_epsilon: float
-
-    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        output = ops.dropout_add_ln_fwd(
-            hidden_states.view(-1, hidden_states.shape[-1]),
-            gamma = self.weight,
-            beta = None,
-            rowscale = None,
-            colscale = None,
-            x0_subset = None,
-            z_subset = None,
-            dropout_p = 0,
-            epsilon = self.variance_epsilon,
-            rowscale_const = 1.0,
-            z_numrows = hidden_states.shape[1],
-            gen = None,
-            residual_in_fp32 = False,
-            is_rms_norm = False,
-        )
-        return output[0].view(hidden_states.shape)
-
-class LlamaRMSNorm(nn.Module):
-    weight: torch.Tensor
-    variance_epsilon: float
-
-    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        output = ops.dropout_add_ln_fwd(
-            hidden_states.view(-1, hidden_states.shape[-1]),
-            gamma = self.weight,
-            beta = None,
-            rowscale = None,
-            colscale = None,
-            x0_subset = None,
-            z_subset = None,
-            dropout_p = 0,
-            epsilon = self.variance_epsilon,
-            rowscale_const = 1.0,
-            z_numrows = hidden_states.shape[1],
-            gen = None,
-            residual_in_fp32 = False,
-            is_rms_norm = True,
-        )
-        return output[0].view(hidden_states.shape)
\ No newline at end of file
diff --git a/build/torch210-cxx11-cu126-x86_64-linux/metadata.json b/build/torch210-cxx11-cu126-x86_64-linux/metadata.json
deleted file mode 100644
index 76bafa5f33b6818aa6bb4cab04be811b87519b44..0000000000000000000000000000000000000000
--- a/build/torch210-cxx11-cu126-x86_64-linux/metadata.json
+++ /dev/null
@@ -1 +0,0 @@
-{"python-depends":[]}
\ No newline at end of file
diff --git a/build/torch210-cxx11-cu128-x86_64-linux/__init__.py b/build/torch210-cxx11-cu128-x86_64-linux/__init__.py
deleted file mode 100644
index 946160f16b9dc91fefeea037fb7ac84fd6afd802..0000000000000000000000000000000000000000
--- a/build/torch210-cxx11-cu128-x86_64-linux/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-from . import layers
-
-def dropout_add_ln_fwd(input, gamma, beta, rowscale, colscale, x0_subset, z_subset, dropout_p, epsilon, rowscale_const, z_numrows, gen, residual_in_fp32, is_rms_norm):
-    return ops.dropout_add_ln_fwd(input, gamma, beta, rowscale, colscale, x0_subset, z_subset, dropout_p, epsilon, rowscale_const, z_numrows, gen, residual_in_fp32, is_rms_norm)
-
-def dropout_add_ln_bwd(dz, dx, x, mu, rsigma, gamma, rowscale, colscale, x0_subset, z_subset, dropout_p, rowscale_const, x0_numrows, has_residual, is_rms_norm):
-    return ops.dropout_add_ln_bwd(dz, dx, x, mu, rsigma, gamma, rowscale, colscale, x0_subset, z_subset, dropout_p, rowscale_const, x0_numrows, has_residual, is_rms_norm)
-
-def dropout_add_ln_parallel_residual_fwd(input, gamma0, beta0, gamma1, beta1, dropout_p, epsilon, gen, residual_in_fp32, is_rms_norm):
-    return ops.dropout_add_ln_parallel_residual_fwd(input, gamma0, beta0, gamma1, beta1, dropout_p, epsilon, gen, residual_in_fp32, is_rms_norm)
-
-def dropout_add_ln_parallel_residual_bwd(dz0, dz1, dx, x, mu, rsigma, gamma0, gamma1, dropout_p, has_x1, has_residual, is_rms_norm):
-    return ops.dropout_add_ln_parallel_residual_bwd(dz0, dz1, dx, x, mu, rsigma, gamma0, gamma1, dropout_p, has_x1, has_residual, is_rms_norm)
-
-__all__ = [
-    "layers",
-    "dropout_add_ln_fwd",
-    "dropout_add_ln_bwd",
-    "dropout_add_ln_parallel_residual_fwd",
-    "dropout_add_ln_parallel_residual_bwd",
-]
\ No newline at end of file
diff --git a/build/torch210-cxx11-cu128-x86_64-linux/_layer_norm_fd07706.abi3.so b/build/torch210-cxx11-cu128-x86_64-linux/_layer_norm_fd07706.abi3.so
deleted file mode 100644
index bd5edf6c104e59f60b0b7984d42f3734f7e04e69..0000000000000000000000000000000000000000
--- a/build/torch210-cxx11-cu128-x86_64-linux/_layer_norm_fd07706.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a8f9c486fa147def1328121949fe502ba856d73e599a00844acf78faa8129cee
-size 1231439976
diff --git a/build/torch210-cxx11-cu128-x86_64-linux/_ops.py b/build/torch210-cxx11-cu128-x86_64-linux/_ops.py
deleted file mode 100644
index 58eafd610133ab10f4d6afed289b76a069f413c9..0000000000000000000000000000000000000000
--- a/build/torch210-cxx11-cu128-x86_64-linux/_ops.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import torch
-from . import _layer_norm_fd07706
-ops = torch.ops._layer_norm_fd07706
-
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_layer_norm_fd07706::{op_name}"
\ No newline at end of file
diff --git a/build/torch210-cxx11-cu128-x86_64-linux/layer_norm/__init__.py b/build/torch210-cxx11-cu128-x86_64-linux/layer_norm/__init__.py
deleted file mode 100644
index 03dbc1afe1cf156661a2b1b22003cd5f599a0309..0000000000000000000000000000000000000000
--- a/build/torch210-cxx11-cu128-x86_64-linux/layer_norm/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import ctypes
-import sys
-
-import importlib
-from pathlib import Path
-from types import ModuleType
-
-def _import_from_path(file_path: Path) -> ModuleType:
-    # We cannot use the module name as-is, after adding it to `sys.modules`,
-    # it would also be used for other imports. So, we make a module name that
-    # depends on the path for it to be unique using the hex-encoded hash of
-    # the path.
-    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
-    module_name = path_hash
-    spec = importlib.util.spec_from_file_location(module_name, file_path)
-    if spec is None:
-        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
-    module = importlib.util.module_from_spec(spec)
-    if module is None:
-        raise ImportError(f"Cannot load module {module_name} from spec")
-    sys.modules[module_name] = module
-    spec.loader.exec_module(module)  # type: ignore
-    return module
-
-
-globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
diff --git a/build/torch210-cxx11-cu128-x86_64-linux/layers.py b/build/torch210-cxx11-cu128-x86_64-linux/layers.py
deleted file mode 100644
index 7ed883f42ead452f8b60f498ec11302c53d3cf74..0000000000000000000000000000000000000000
--- a/build/torch210-cxx11-cu128-x86_64-linux/layers.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-
-class LayerNorm(nn.Module):
-    weight: torch.Tensor
-    variance_epsilon: float
-
-    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        output = ops.dropout_add_ln_fwd(
-            hidden_states.view(-1, hidden_states.shape[-1]),
-            gamma = self.weight,
-            beta = None,
-            rowscale = None,
-            colscale = None,
-            x0_subset = None,
-            z_subset = None,
-            dropout_p = 0,
-            epsilon = self.variance_epsilon,
-            rowscale_const = 1.0,
-            z_numrows = hidden_states.shape[1],
-            gen = None,
-            residual_in_fp32 = False,
-            is_rms_norm = False,
-        )
-        return output[0].view(hidden_states.shape)
-
-class LlamaRMSNorm(nn.Module):
-    weight: torch.Tensor
-    variance_epsilon: float
-
-    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        output = ops.dropout_add_ln_fwd(
-            hidden_states.view(-1, hidden_states.shape[-1]),
-            gamma = self.weight,
-            beta = None,
-            rowscale = None,
-            colscale = None,
-            x0_subset = None,
-            z_subset = None,
-            dropout_p = 0,
-            epsilon = self.variance_epsilon,
-            rowscale_const = 1.0,
-            z_numrows = hidden_states.shape[1],
-            gen = None,
-            residual_in_fp32 = False,
-            is_rms_norm = True,
-        )
-        return output[0].view(hidden_states.shape)
\ No newline at end of file
diff --git a/build/torch210-cxx11-cu128-x86_64-linux/metadata.json b/build/torch210-cxx11-cu128-x86_64-linux/metadata.json
deleted file mode 100644
index 76bafa5f33b6818aa6bb4cab04be811b87519b44..0000000000000000000000000000000000000000
--- a/build/torch210-cxx11-cu128-x86_64-linux/metadata.json
+++ /dev/null
@@ -1 +0,0 @@
-{"python-depends":[]}
\ No newline at end of file
diff --git a/build/torch210-cxx11-cu130-x86_64-linux/__init__.py b/build/torch210-cxx11-cu130-x86_64-linux/__init__.py
deleted file mode 100644
index 946160f16b9dc91fefeea037fb7ac84fd6afd802..0000000000000000000000000000000000000000
--- a/build/torch210-cxx11-cu130-x86_64-linux/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-from . import layers
-
-def dropout_add_ln_fwd(input, gamma, beta, rowscale, colscale, x0_subset, z_subset, dropout_p, epsilon, rowscale_const, z_numrows, gen, residual_in_fp32, is_rms_norm):
-    return ops.dropout_add_ln_fwd(input, gamma, beta, rowscale, colscale, x0_subset, z_subset, dropout_p, epsilon, rowscale_const, z_numrows, gen, residual_in_fp32, is_rms_norm)
-
-def dropout_add_ln_bwd(dz, dx, x, mu, rsigma, gamma, rowscale, colscale, x0_subset, z_subset, dropout_p, rowscale_const, x0_numrows, has_residual, is_rms_norm):
-    return ops.dropout_add_ln_bwd(dz, dx, x, mu, rsigma, gamma, rowscale, colscale, x0_subset, z_subset, dropout_p, rowscale_const, x0_numrows, has_residual, is_rms_norm)
-
-def dropout_add_ln_parallel_residual_fwd(input, gamma0, beta0, gamma1, beta1, dropout_p, epsilon, gen, residual_in_fp32, is_rms_norm):
-    return ops.dropout_add_ln_parallel_residual_fwd(input, gamma0, beta0, gamma1, beta1, dropout_p, epsilon, gen, residual_in_fp32, is_rms_norm)
-
-def dropout_add_ln_parallel_residual_bwd(dz0, dz1, dx, x, mu, rsigma, gamma0, gamma1, dropout_p, has_x1, has_residual, is_rms_norm):
-    return ops.dropout_add_ln_parallel_residual_bwd(dz0, dz1, dx, x, mu, rsigma, gamma0, gamma1, dropout_p, has_x1, has_residual, is_rms_norm)
-
-__all__ = [
-    "layers",
-    "dropout_add_ln_fwd",
-    "dropout_add_ln_bwd",
-    "dropout_add_ln_parallel_residual_fwd",
-    "dropout_add_ln_parallel_residual_bwd",
-]
\ No newline at end of file
diff --git a/build/torch210-cxx11-cu130-x86_64-linux/_layer_norm_fd07706.abi3.so b/build/torch210-cxx11-cu130-x86_64-linux/_layer_norm_fd07706.abi3.so
deleted file mode 100644
index 74da187913202ae33734c1557bd953a88f3abfae..0000000000000000000000000000000000000000
--- a/build/torch210-cxx11-cu130-x86_64-linux/_layer_norm_fd07706.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:657b35fbbd096c4e34b804790484286941b781ef936fb920f9f1d10f7b0d4281
-size 1238357112
diff --git a/build/torch210-cxx11-cu130-x86_64-linux/_ops.py b/build/torch210-cxx11-cu130-x86_64-linux/_ops.py
deleted file mode 100644
index 58eafd610133ab10f4d6afed289b76a069f413c9..0000000000000000000000000000000000000000
--- a/build/torch210-cxx11-cu130-x86_64-linux/_ops.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import torch
-from . import _layer_norm_fd07706
-ops = torch.ops._layer_norm_fd07706
-
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_layer_norm_fd07706::{op_name}"
\ No newline at end of file
diff --git a/build/torch210-cxx11-cu130-x86_64-linux/layer_norm/__init__.py b/build/torch210-cxx11-cu130-x86_64-linux/layer_norm/__init__.py
deleted file mode 100644
index 03dbc1afe1cf156661a2b1b22003cd5f599a0309..0000000000000000000000000000000000000000
--- a/build/torch210-cxx11-cu130-x86_64-linux/layer_norm/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import ctypes
-import sys
-
-import importlib
-from pathlib import Path
-from types import ModuleType
-
-def _import_from_path(file_path: Path) -> ModuleType:
-    # We cannot use the module name as-is, after adding it to `sys.modules`,
-    # it would also be used for other imports. So, we make a module name that
-    # depends on the path for it to be unique using the hex-encoded hash of
-    # the path.
-    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
-    module_name = path_hash
-    spec = importlib.util.spec_from_file_location(module_name, file_path)
-    if spec is None:
-        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
-    module = importlib.util.module_from_spec(spec)
-    if module is None:
-        raise ImportError(f"Cannot load module {module_name} from spec")
-    sys.modules[module_name] = module
-    spec.loader.exec_module(module)  # type: ignore
-    return module
-
-
-globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
diff --git a/build/torch210-cxx11-cu130-x86_64-linux/layers.py b/build/torch210-cxx11-cu130-x86_64-linux/layers.py
deleted file mode 100644
index 7ed883f42ead452f8b60f498ec11302c53d3cf74..0000000000000000000000000000000000000000
--- a/build/torch210-cxx11-cu130-x86_64-linux/layers.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-
-class LayerNorm(nn.Module):
-    weight: torch.Tensor
-    variance_epsilon: float
-
-    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        output = ops.dropout_add_ln_fwd(
-            hidden_states.view(-1, hidden_states.shape[-1]),
-            gamma = self.weight,
-            beta = None,
-            rowscale = None,
-            colscale = None,
-            x0_subset = None,
-            z_subset = None,
-            dropout_p = 0,
-            epsilon = self.variance_epsilon,
-            rowscale_const = 1.0,
-            z_numrows = hidden_states.shape[1],
-            gen = None,
-            residual_in_fp32 = False,
-            is_rms_norm = False,
-        )
-        return output[0].view(hidden_states.shape)
-
-class LlamaRMSNorm(nn.Module):
-    weight: torch.Tensor
-    variance_epsilon: float
-
-    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        output = ops.dropout_add_ln_fwd(
-            hidden_states.view(-1, hidden_states.shape[-1]),
-            gamma = self.weight,
-            beta = None,
-            rowscale = None,
-            colscale = None,
-            x0_subset = None,
-            z_subset = None,
-            dropout_p = 0,
-            epsilon = self.variance_epsilon,
-            rowscale_const = 1.0,
-            z_numrows = hidden_states.shape[1],
-            gen = None,
-            residual_in_fp32 = False,
-            is_rms_norm = True,
-        )
-        return output[0].view(hidden_states.shape)
\ No newline at end of file
diff --git a/build/torch210-cxx11-cu130-x86_64-linux/metadata.json b/build/torch210-cxx11-cu130-x86_64-linux/metadata.json
deleted file mode 100644
index 76bafa5f33b6818aa6bb4cab04be811b87519b44..0000000000000000000000000000000000000000
--- a/build/torch210-cxx11-cu130-x86_64-linux/metadata.json
+++ /dev/null
@@ -1 +0,0 @@
-{"python-depends":[]}
\ No newline at end of file
diff --git a/build/torch28-cxx11-cu126-x86_64-linux/__init__.py b/build/torch28-cxx11-cu126-x86_64-linux/__init__.py
deleted file mode 100644
index 946160f16b9dc91fefeea037fb7ac84fd6afd802..0000000000000000000000000000000000000000
--- a/build/torch28-cxx11-cu126-x86_64-linux/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-from . import layers
-
-def dropout_add_ln_fwd(input, gamma, beta, rowscale, colscale, x0_subset, z_subset, dropout_p, epsilon, rowscale_const, z_numrows, gen, residual_in_fp32, is_rms_norm):
-    return ops.dropout_add_ln_fwd(input, gamma, beta, rowscale, colscale, x0_subset, z_subset, dropout_p, epsilon, rowscale_const, z_numrows, gen, residual_in_fp32, is_rms_norm)
-
-def dropout_add_ln_bwd(dz, dx, x, mu, rsigma, gamma, rowscale, colscale, x0_subset, z_subset, dropout_p, rowscale_const, x0_numrows, has_residual, is_rms_norm):
-    return ops.dropout_add_ln_bwd(dz, dx, x, mu, rsigma, gamma, rowscale, colscale, x0_subset, z_subset, dropout_p, rowscale_const, x0_numrows, has_residual, is_rms_norm)
-
-def dropout_add_ln_parallel_residual_fwd(input, gamma0, beta0, gamma1, beta1, dropout_p, epsilon, gen, residual_in_fp32, is_rms_norm):
-    return ops.dropout_add_ln_parallel_residual_fwd(input, gamma0, beta0, gamma1, beta1, dropout_p, epsilon, gen, residual_in_fp32, is_rms_norm)
-
-def dropout_add_ln_parallel_residual_bwd(dz0, dz1, dx, x, mu, rsigma, gamma0, gamma1, dropout_p, has_x1, has_residual, is_rms_norm):
-    return ops.dropout_add_ln_parallel_residual_bwd(dz0, dz1, dx, x, mu, rsigma, gamma0, gamma1, dropout_p, has_x1, has_residual, is_rms_norm)
-
-__all__ = [
-    "layers",
-    "dropout_add_ln_fwd",
-    "dropout_add_ln_bwd",
-    "dropout_add_ln_parallel_residual_fwd",
-    "dropout_add_ln_parallel_residual_bwd",
-]
\ No newline at end of file
diff --git a/build/torch28-cxx11-cu126-x86_64-linux/_layer_norm_fd07706.abi3.so b/build/torch28-cxx11-cu126-x86_64-linux/_layer_norm_fd07706.abi3.so
deleted file mode 100644
index 998d160f15c98c3d90fe4a5c6a8c229c28ec5349..0000000000000000000000000000000000000000
--- a/build/torch28-cxx11-cu126-x86_64-linux/_layer_norm_fd07706.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f4c4fce45ad6f08cfa1a3e2c7851c0964524975543a3e16b72406b6c8187bba4
-size 712034088
diff --git a/build/torch28-cxx11-cu126-x86_64-linux/_ops.py b/build/torch28-cxx11-cu126-x86_64-linux/_ops.py
deleted file mode 100644
index 58eafd610133ab10f4d6afed289b76a069f413c9..0000000000000000000000000000000000000000
--- a/build/torch28-cxx11-cu126-x86_64-linux/_ops.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import torch
-from . import _layer_norm_fd07706
-ops = torch.ops._layer_norm_fd07706
-
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_layer_norm_fd07706::{op_name}"
\ No newline at end of file
diff --git a/build/torch28-cxx11-cu126-x86_64-linux/layer_norm/__init__.py b/build/torch28-cxx11-cu126-x86_64-linux/layer_norm/__init__.py
deleted file mode 100644
index 03dbc1afe1cf156661a2b1b22003cd5f599a0309..0000000000000000000000000000000000000000
--- a/build/torch28-cxx11-cu126-x86_64-linux/layer_norm/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import ctypes
-import sys
-
-import importlib
-from pathlib import Path
-from types import ModuleType
-
-def _import_from_path(file_path: Path) -> ModuleType:
-    # We cannot use the module name as-is, after adding it to `sys.modules`,
-    # it would also be used for other imports. So, we make a module name that
-    # depends on the path for it to be unique using the hex-encoded hash of
-    # the path.
-    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
-    module_name = path_hash
-    spec = importlib.util.spec_from_file_location(module_name, file_path)
-    if spec is None:
-        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
-    module = importlib.util.module_from_spec(spec)
-    if module is None:
-        raise ImportError(f"Cannot load module {module_name} from spec")
-    sys.modules[module_name] = module
-    spec.loader.exec_module(module)  # type: ignore
-    return module
-
-
-globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
diff --git a/build/torch28-cxx11-cu126-x86_64-linux/layers.py b/build/torch28-cxx11-cu126-x86_64-linux/layers.py
deleted file mode 100644
index 7ed883f42ead452f8b60f498ec11302c53d3cf74..0000000000000000000000000000000000000000
--- a/build/torch28-cxx11-cu126-x86_64-linux/layers.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-
-class LayerNorm(nn.Module):
-    weight: torch.Tensor
-    variance_epsilon: float
-
-    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        output = ops.dropout_add_ln_fwd(
-            hidden_states.view(-1, hidden_states.shape[-1]),
-            gamma = self.weight,
-            beta = None,
-            rowscale = None,
-            colscale = None,
-            x0_subset = None,
-            z_subset = None,
-            dropout_p = 0,
-            epsilon = self.variance_epsilon,
-            rowscale_const = 1.0,
-            z_numrows = hidden_states.shape[1],
-            gen = None,
-            residual_in_fp32 = False,
-            is_rms_norm = False,
-        )
-        return output[0].view(hidden_states.shape)
-
-class LlamaRMSNorm(nn.Module):
-    weight: torch.Tensor
-    variance_epsilon: float
-
-    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        output = ops.dropout_add_ln_fwd(
-            hidden_states.view(-1, hidden_states.shape[-1]),
-            gamma = self.weight,
-            beta = None,
-            rowscale = None,
-            colscale = None,
-            x0_subset = None,
-            z_subset = None,
-            dropout_p = 0,
-            epsilon = self.variance_epsilon,
-            rowscale_const = 1.0,
-            z_numrows = hidden_states.shape[1],
-            gen = None,
-            residual_in_fp32 = False,
-            is_rms_norm = True,
-        )
-        return output[0].view(hidden_states.shape)
\ No newline at end of file
diff --git a/build/torch28-cxx11-cu126-x86_64-linux/metadata.json b/build/torch28-cxx11-cu126-x86_64-linux/metadata.json
deleted file mode 100644
index 76bafa5f33b6818aa6bb4cab04be811b87519b44..0000000000000000000000000000000000000000
--- a/build/torch28-cxx11-cu126-x86_64-linux/metadata.json
+++ /dev/null
@@ -1 +0,0 @@
-{"python-depends":[]}
\ No newline at end of file
diff --git a/build/torch28-cxx11-cu128-x86_64-linux/__init__.py b/build/torch28-cxx11-cu128-x86_64-linux/__init__.py
deleted file mode 100644
index 946160f16b9dc91fefeea037fb7ac84fd6afd802..0000000000000000000000000000000000000000
--- a/build/torch28-cxx11-cu128-x86_64-linux/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-from . import layers
-
-def dropout_add_ln_fwd(input, gamma, beta, rowscale, colscale, x0_subset, z_subset, dropout_p, epsilon, rowscale_const, z_numrows, gen, residual_in_fp32, is_rms_norm):
-    return ops.dropout_add_ln_fwd(input, gamma, beta, rowscale, colscale, x0_subset, z_subset, dropout_p, epsilon, rowscale_const, z_numrows, gen, residual_in_fp32, is_rms_norm)
-
-def dropout_add_ln_bwd(dz, dx, x, mu, rsigma, gamma, rowscale, colscale, x0_subset, z_subset, dropout_p, rowscale_const, x0_numrows, has_residual, is_rms_norm):
-    return ops.dropout_add_ln_bwd(dz, dx, x, mu, rsigma, gamma, rowscale, colscale, x0_subset, z_subset, dropout_p, rowscale_const, x0_numrows, has_residual, is_rms_norm)
-
-def dropout_add_ln_parallel_residual_fwd(input, gamma0, beta0, gamma1, beta1, dropout_p, epsilon, gen, residual_in_fp32, is_rms_norm):
-    return ops.dropout_add_ln_parallel_residual_fwd(input, gamma0, beta0, gamma1, beta1, dropout_p, epsilon, gen, residual_in_fp32, is_rms_norm)
-
-def dropout_add_ln_parallel_residual_bwd(dz0, dz1, dx, x, mu, rsigma, gamma0, gamma1, dropout_p, has_x1, has_residual, is_rms_norm):
-    return ops.dropout_add_ln_parallel_residual_bwd(dz0, dz1, dx, x, mu, rsigma, gamma0, gamma1, dropout_p, has_x1, has_residual, is_rms_norm)
-
-__all__ = [
-    "layers",
-    "dropout_add_ln_fwd",
-    "dropout_add_ln_bwd",
-    "dropout_add_ln_parallel_residual_fwd",
-    "dropout_add_ln_parallel_residual_bwd",
-]
\ No newline at end of file
diff --git a/build/torch28-cxx11-cu128-x86_64-linux/_layer_norm_fd07706.abi3.so b/build/torch28-cxx11-cu128-x86_64-linux/_layer_norm_fd07706.abi3.so
deleted file mode 100644
index 76d9391c84580a3423af8099fbc29253cef21975..0000000000000000000000000000000000000000
--- a/build/torch28-cxx11-cu128-x86_64-linux/_layer_norm_fd07706.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:5821346938e86e0308c60fd072d54b57aba427aac75e354d3132dddc755ba125
-size 1231343024
diff --git a/build/torch28-cxx11-cu128-x86_64-linux/_ops.py b/build/torch28-cxx11-cu128-x86_64-linux/_ops.py
deleted file mode 100644
index 58eafd610133ab10f4d6afed289b76a069f413c9..0000000000000000000000000000000000000000
--- a/build/torch28-cxx11-cu128-x86_64-linux/_ops.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import torch
-from . import _layer_norm_fd07706
-ops = torch.ops._layer_norm_fd07706
-
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_layer_norm_fd07706::{op_name}"
\ No newline at end of file
diff --git a/build/torch28-cxx11-cu128-x86_64-linux/layer_norm/__init__.py b/build/torch28-cxx11-cu128-x86_64-linux/layer_norm/__init__.py
deleted file mode 100644
index 03dbc1afe1cf156661a2b1b22003cd5f599a0309..0000000000000000000000000000000000000000
--- a/build/torch28-cxx11-cu128-x86_64-linux/layer_norm/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import ctypes
-import sys
-
-import importlib
-from pathlib import Path
-from types import ModuleType
-
-def _import_from_path(file_path: Path) -> ModuleType:
-    # We cannot use the module name as-is, after adding it to `sys.modules`,
-    # it would also be used for other imports. So, we make a module name that
-    # depends on the path for it to be unique using the hex-encoded hash of
-    # the path.
-    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
-    module_name = path_hash
-    spec = importlib.util.spec_from_file_location(module_name, file_path)
-    if spec is None:
-        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
-    module = importlib.util.module_from_spec(spec)
-    if module is None:
-        raise ImportError(f"Cannot load module {module_name} from spec")
-    sys.modules[module_name] = module
-    spec.loader.exec_module(module)  # type: ignore
-    return module
-
-
-globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
diff --git a/build/torch28-cxx11-cu128-x86_64-linux/layers.py b/build/torch28-cxx11-cu128-x86_64-linux/layers.py
deleted file mode 100644
index 7ed883f42ead452f8b60f498ec11302c53d3cf74..0000000000000000000000000000000000000000
--- a/build/torch28-cxx11-cu128-x86_64-linux/layers.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-
-class LayerNorm(nn.Module):
-    weight: torch.Tensor
-    variance_epsilon: float
-
-    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        output = ops.dropout_add_ln_fwd(
-            hidden_states.view(-1, hidden_states.shape[-1]),
-            gamma = self.weight,
-            beta = None,
-            rowscale = None,
-            colscale = None,
-            x0_subset = None,
-            z_subset = None,
-            dropout_p = 0,
-            epsilon = self.variance_epsilon,
-            rowscale_const = 1.0,
-            z_numrows = hidden_states.shape[1],
-            gen = None,
-            residual_in_fp32 = False,
-            is_rms_norm = False,
-        )
-        return output[0].view(hidden_states.shape)
-
-class LlamaRMSNorm(nn.Module):
-    weight: torch.Tensor
-    variance_epsilon: float
-
-    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        output = ops.dropout_add_ln_fwd(
-            hidden_states.view(-1, hidden_states.shape[-1]),
-            gamma = self.weight,
-            beta = None,
-            rowscale = None,
-            colscale = None,
-            x0_subset = None,
-            z_subset = None,
-            dropout_p = 0,
-            epsilon = self.variance_epsilon,
-            rowscale_const = 1.0,
-            z_numrows = hidden_states.shape[1],
-            gen = None,
-            residual_in_fp32 = False,
-            is_rms_norm = True,
-        )
-        return output[0].view(hidden_states.shape)
\ No newline at end of file
diff --git a/build/torch28-cxx11-cu128-x86_64-linux/metadata.json b/build/torch28-cxx11-cu128-x86_64-linux/metadata.json
deleted file mode 100644
index 76bafa5f33b6818aa6bb4cab04be811b87519b44..0000000000000000000000000000000000000000
--- a/build/torch28-cxx11-cu128-x86_64-linux/metadata.json
+++ /dev/null
@@ -1 +0,0 @@
-{"python-depends":[]}
\ No newline at end of file
diff --git a/build/torch28-cxx11-cu129-x86_64-linux/__init__.py b/build/torch28-cxx11-cu129-x86_64-linux/__init__.py
deleted file mode 100644
index 946160f16b9dc91fefeea037fb7ac84fd6afd802..0000000000000000000000000000000000000000
--- a/build/torch28-cxx11-cu129-x86_64-linux/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-from . import layers
-
-def dropout_add_ln_fwd(input, gamma, beta, rowscale, colscale, x0_subset, z_subset, dropout_p, epsilon, rowscale_const, z_numrows, gen, residual_in_fp32, is_rms_norm):
-    return ops.dropout_add_ln_fwd(input, gamma, beta, rowscale, colscale, x0_subset, z_subset, dropout_p, epsilon, rowscale_const, z_numrows, gen, residual_in_fp32, is_rms_norm)
-
-def dropout_add_ln_bwd(dz, dx, x, mu, rsigma, gamma, rowscale, colscale, x0_subset, z_subset, dropout_p, rowscale_const, x0_numrows, has_residual, is_rms_norm):
-    return ops.dropout_add_ln_bwd(dz, dx, x, mu, rsigma, gamma, rowscale, colscale, x0_subset, z_subset, dropout_p, rowscale_const, x0_numrows, has_residual, is_rms_norm)
-
-def dropout_add_ln_parallel_residual_fwd(input, gamma0, beta0, gamma1, beta1, dropout_p, epsilon, gen, residual_in_fp32, is_rms_norm):
-    return ops.dropout_add_ln_parallel_residual_fwd(input, gamma0, beta0, gamma1, beta1, dropout_p, epsilon, gen, residual_in_fp32, is_rms_norm)
-
-def dropout_add_ln_parallel_residual_bwd(dz0, dz1, dx, x, mu, rsigma, gamma0, gamma1, dropout_p, has_x1, has_residual, is_rms_norm):
-    return ops.dropout_add_ln_parallel_residual_bwd(dz0, dz1, dx, x, mu, rsigma, gamma0, gamma1, dropout_p, has_x1, has_residual, is_rms_norm)
-
-__all__ = [
-    "layers",
-    "dropout_add_ln_fwd",
-    "dropout_add_ln_bwd",
-    "dropout_add_ln_parallel_residual_fwd",
-    "dropout_add_ln_parallel_residual_bwd",
-]
\ No newline at end of file
diff --git a/build/torch28-cxx11-cu129-x86_64-linux/_layer_norm_fd07706.abi3.so b/build/torch28-cxx11-cu129-x86_64-linux/_layer_norm_fd07706.abi3.so
deleted file mode 100644
index 6de541efe55ef1a858397aa4f3ab8f239f3e7c70..0000000000000000000000000000000000000000
--- a/build/torch28-cxx11-cu129-x86_64-linux/_layer_norm_fd07706.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:43c278069ef7e766a8eae76c27b4c91a3e84065c4714f7d9e0d6ff8413732e7a
-size 1283038336
diff --git a/build/torch28-cxx11-cu129-x86_64-linux/_ops.py b/build/torch28-cxx11-cu129-x86_64-linux/_ops.py
deleted file mode 100644
index 58eafd610133ab10f4d6afed289b76a069f413c9..0000000000000000000000000000000000000000
--- a/build/torch28-cxx11-cu129-x86_64-linux/_ops.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import torch
-from . import _layer_norm_fd07706
-ops = torch.ops._layer_norm_fd07706
-
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_layer_norm_fd07706::{op_name}"
\ No newline at end of file
diff --git a/build/torch28-cxx11-cu129-x86_64-linux/layer_norm/__init__.py b/build/torch28-cxx11-cu129-x86_64-linux/layer_norm/__init__.py
deleted file mode 100644
index 03dbc1afe1cf156661a2b1b22003cd5f599a0309..0000000000000000000000000000000000000000
--- a/build/torch28-cxx11-cu129-x86_64-linux/layer_norm/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import ctypes
-import sys
-
-import importlib
-from pathlib import Path
-from types import ModuleType
-
-def _import_from_path(file_path: Path) -> ModuleType:
-    # We cannot use the module name as-is, after adding it to `sys.modules`,
-    # it would also be used for other imports. So, we make a module name that
-    # depends on the path for it to be unique using the hex-encoded hash of
-    # the path.
-    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
-    module_name = path_hash
-    spec = importlib.util.spec_from_file_location(module_name, file_path)
-    if spec is None:
-        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
-    module = importlib.util.module_from_spec(spec)
-    if module is None:
-        raise ImportError(f"Cannot load module {module_name} from spec")
-    sys.modules[module_name] = module
-    spec.loader.exec_module(module)  # type: ignore
-    return module
-
-
-globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
diff --git a/build/torch28-cxx11-cu129-x86_64-linux/layers.py b/build/torch28-cxx11-cu129-x86_64-linux/layers.py
deleted file mode 100644
index 7ed883f42ead452f8b60f498ec11302c53d3cf74..0000000000000000000000000000000000000000
--- a/build/torch28-cxx11-cu129-x86_64-linux/layers.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-
-class LayerNorm(nn.Module):
-    weight: torch.Tensor
-    variance_epsilon: float
-
-    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        output = ops.dropout_add_ln_fwd(
-            hidden_states.view(-1, hidden_states.shape[-1]),
-            gamma = self.weight,
-            beta = None,
-            rowscale = None,
-            colscale = None,
-            x0_subset = None,
-            z_subset = None,
-            dropout_p = 0,
-            epsilon = self.variance_epsilon,
-            rowscale_const = 1.0,
-            z_numrows = hidden_states.shape[1],
-            gen = None,
-            residual_in_fp32 = False,
-            is_rms_norm = False,
-        )
-        return output[0].view(hidden_states.shape)
-
-class LlamaRMSNorm(nn.Module):
-    weight: torch.Tensor
-    variance_epsilon: float
-
-    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        output = ops.dropout_add_ln_fwd(
-            hidden_states.view(-1, hidden_states.shape[-1]),
-            gamma = self.weight,
-            beta = None,
-            rowscale = None,
-            colscale = None,
-            x0_subset = None,
-            z_subset = None,
-            dropout_p = 0,
-            epsilon = self.variance_epsilon,
-            rowscale_const = 1.0,
-            z_numrows = hidden_states.shape[1],
-            gen = None,
-            residual_in_fp32 = False,
-            is_rms_norm = True,
-        )
-        return output[0].view(hidden_states.shape)
\ No newline at end of file
diff --git a/build/torch28-cxx11-cu129-x86_64-linux/metadata.json b/build/torch28-cxx11-cu129-x86_64-linux/metadata.json
deleted file mode 100644
index 76bafa5f33b6818aa6bb4cab04be811b87519b44..0000000000000000000000000000000000000000
--- a/build/torch28-cxx11-cu129-x86_64-linux/metadata.json
+++ /dev/null
@@ -1 +0,0 @@
-{"python-depends":[]}
\ No newline at end of file
diff --git a/build/torch29-cxx11-cu126-x86_64-linux/__init__.py b/build/torch29-cxx11-cu126-x86_64-linux/__init__.py
deleted file mode 100644
index 946160f16b9dc91fefeea037fb7ac84fd6afd802..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu126-x86_64-linux/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-from . import layers
-
-def dropout_add_ln_fwd(input, gamma, beta, rowscale, colscale, x0_subset, z_subset, dropout_p, epsilon, rowscale_const, z_numrows, gen, residual_in_fp32, is_rms_norm):
-    return ops.dropout_add_ln_fwd(input, gamma, beta, rowscale, colscale, x0_subset, z_subset, dropout_p, epsilon, rowscale_const, z_numrows, gen, residual_in_fp32, is_rms_norm)
-
-def dropout_add_ln_bwd(dz, dx, x, mu, rsigma, gamma, rowscale, colscale, x0_subset, z_subset, dropout_p, rowscale_const, x0_numrows, has_residual, is_rms_norm):
-    return ops.dropout_add_ln_bwd(dz, dx, x, mu, rsigma, gamma, rowscale, colscale, x0_subset, z_subset, dropout_p, rowscale_const, x0_numrows, has_residual, is_rms_norm)
-
-def dropout_add_ln_parallel_residual_fwd(input, gamma0, beta0, gamma1, beta1, dropout_p, epsilon, gen, residual_in_fp32, is_rms_norm):
-    return ops.dropout_add_ln_parallel_residual_fwd(input, gamma0, beta0, gamma1, beta1, dropout_p, epsilon, gen, residual_in_fp32, is_rms_norm)
-
-def dropout_add_ln_parallel_residual_bwd(dz0, dz1, dx, x, mu, rsigma, gamma0, gamma1, dropout_p, has_x1, has_residual, is_rms_norm):
-    return ops.dropout_add_ln_parallel_residual_bwd(dz0, dz1, dx, x, mu, rsigma, gamma0, gamma1, dropout_p, has_x1, has_residual, is_rms_norm)
-
-__all__ = [
-    "layers",
-    "dropout_add_ln_fwd",
-    "dropout_add_ln_bwd",
-    "dropout_add_ln_parallel_residual_fwd",
-    "dropout_add_ln_parallel_residual_bwd",
-]
\ No newline at end of file
diff --git a/build/torch29-cxx11-cu126-x86_64-linux/_layer_norm_fd07706.abi3.so b/build/torch29-cxx11-cu126-x86_64-linux/_layer_norm_fd07706.abi3.so
deleted file mode 100644
index dec1e0b9a762965582fbf18968fb76454b839f2e..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu126-x86_64-linux/_layer_norm_fd07706.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:bc404a5e076466f49a0be4fa53652f2a7b40f1c611478ba8d1c4ef07c524815a
-size 712034248
diff --git a/build/torch29-cxx11-cu126-x86_64-linux/_ops.py b/build/torch29-cxx11-cu126-x86_64-linux/_ops.py
deleted file mode 100644
index 58eafd610133ab10f4d6afed289b76a069f413c9..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu126-x86_64-linux/_ops.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import torch
-from . import _layer_norm_fd07706
-ops = torch.ops._layer_norm_fd07706
-
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_layer_norm_fd07706::{op_name}"
\ No newline at end of file
diff --git a/build/torch29-cxx11-cu126-x86_64-linux/layer_norm/__init__.py b/build/torch29-cxx11-cu126-x86_64-linux/layer_norm/__init__.py
deleted file mode 100644
index 03dbc1afe1cf156661a2b1b22003cd5f599a0309..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu126-x86_64-linux/layer_norm/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import ctypes
-import sys
-
-import importlib
-from pathlib import Path
-from types import ModuleType
-
-def _import_from_path(file_path: Path) -> ModuleType:
-    # We cannot use the module name as-is, after adding it to `sys.modules`,
-    # it would also be used for other imports. So, we make a module name that
-    # depends on the path for it to be unique using the hex-encoded hash of
-    # the path.
-    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
-    module_name = path_hash
-    spec = importlib.util.spec_from_file_location(module_name, file_path)
-    if spec is None:
-        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
-    module = importlib.util.module_from_spec(spec)
-    if module is None:
-        raise ImportError(f"Cannot load module {module_name} from spec")
-    sys.modules[module_name] = module
-    spec.loader.exec_module(module)  # type: ignore
-    return module
-
-
-globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
diff --git a/build/torch29-cxx11-cu126-x86_64-linux/layers.py b/build/torch29-cxx11-cu126-x86_64-linux/layers.py
deleted file mode 100644
index 7ed883f42ead452f8b60f498ec11302c53d3cf74..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu126-x86_64-linux/layers.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-
-class LayerNorm(nn.Module):
-    weight: torch.Tensor
-    variance_epsilon: float
-
-    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        output = ops.dropout_add_ln_fwd(
-            hidden_states.view(-1, hidden_states.shape[-1]),
-            gamma = self.weight,
-            beta = None,
-            rowscale = None,
-            colscale = None,
-            x0_subset = None,
-            z_subset = None,
-            dropout_p = 0,
-            epsilon = self.variance_epsilon,
-            rowscale_const = 1.0,
-            z_numrows = hidden_states.shape[1],
-            gen = None,
-            residual_in_fp32 = False,
-            is_rms_norm = False,
-        )
-        return output[0].view(hidden_states.shape)
-
-class LlamaRMSNorm(nn.Module):
-    weight: torch.Tensor
-    variance_epsilon: float
-
-    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        output = ops.dropout_add_ln_fwd(
-            hidden_states.view(-1, hidden_states.shape[-1]),
-            gamma = self.weight,
-            beta = None,
-            rowscale = None,
-            colscale = None,
-            x0_subset = None,
-            z_subset = None,
-            dropout_p = 0,
-            epsilon = self.variance_epsilon,
-            rowscale_const = 1.0,
-            z_numrows = hidden_states.shape[1],
-            gen = None,
-            residual_in_fp32 = False,
-            is_rms_norm = True,
-        )
-        return output[0].view(hidden_states.shape)
\ No newline at end of file
diff --git a/build/torch29-cxx11-cu126-x86_64-linux/metadata.json b/build/torch29-cxx11-cu126-x86_64-linux/metadata.json
deleted file mode 100644
index 76bafa5f33b6818aa6bb4cab04be811b87519b44..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu126-x86_64-linux/metadata.json
+++ /dev/null
@@ -1 +0,0 @@
-{"python-depends":[]}
\ No newline at end of file
diff --git a/build/torch29-cxx11-cu128-x86_64-linux/__init__.py b/build/torch29-cxx11-cu128-x86_64-linux/__init__.py
deleted file mode 100644
index 946160f16b9dc91fefeea037fb7ac84fd6afd802..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu128-x86_64-linux/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-from . import layers
-
-def dropout_add_ln_fwd(input, gamma, beta, rowscale, colscale, x0_subset, z_subset, dropout_p, epsilon, rowscale_const, z_numrows, gen, residual_in_fp32, is_rms_norm):
-    return ops.dropout_add_ln_fwd(input, gamma, beta, rowscale, colscale, x0_subset, z_subset, dropout_p, epsilon, rowscale_const, z_numrows, gen, residual_in_fp32, is_rms_norm)
-
-def dropout_add_ln_bwd(dz, dx, x, mu, rsigma, gamma, rowscale, colscale, x0_subset, z_subset, dropout_p, rowscale_const, x0_numrows, has_residual, is_rms_norm):
-    return ops.dropout_add_ln_bwd(dz, dx, x, mu, rsigma, gamma, rowscale, colscale, x0_subset, z_subset, dropout_p, rowscale_const, x0_numrows, has_residual, is_rms_norm)
-
-def dropout_add_ln_parallel_residual_fwd(input, gamma0, beta0, gamma1, beta1, dropout_p, epsilon, gen, residual_in_fp32, is_rms_norm):
-    return ops.dropout_add_ln_parallel_residual_fwd(input, gamma0, beta0, gamma1, beta1, dropout_p, epsilon, gen, residual_in_fp32, is_rms_norm)
-
-def dropout_add_ln_parallel_residual_bwd(dz0, dz1, dx, x, mu, rsigma, gamma0, gamma1, dropout_p, has_x1, has_residual, is_rms_norm):
-    return ops.dropout_add_ln_parallel_residual_bwd(dz0, dz1, dx, x, mu, rsigma, gamma0, gamma1, dropout_p, has_x1, has_residual, is_rms_norm)
-
-__all__ = [
-    "layers",
-    "dropout_add_ln_fwd",
-    "dropout_add_ln_bwd",
-    "dropout_add_ln_parallel_residual_fwd",
-    "dropout_add_ln_parallel_residual_bwd",
-]
\ No newline at end of file
diff --git a/build/torch29-cxx11-cu128-x86_64-linux/_layer_norm_fd07706.abi3.so b/build/torch29-cxx11-cu128-x86_64-linux/_layer_norm_fd07706.abi3.so
deleted file mode 100644
index 09248c23d30665178d409dcb53bfcd274d522aaf..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu128-x86_64-linux/_layer_norm_fd07706.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:8da63d5fa4aeca09b5b5f1b3355c401fc516a15622637a2c65a03081fc55fdb3
-size 1231343160
diff --git a/build/torch29-cxx11-cu128-x86_64-linux/_ops.py b/build/torch29-cxx11-cu128-x86_64-linux/_ops.py
deleted file mode 100644
index 58eafd610133ab10f4d6afed289b76a069f413c9..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu128-x86_64-linux/_ops.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import torch
-from . import _layer_norm_fd07706
-ops = torch.ops._layer_norm_fd07706
-
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_layer_norm_fd07706::{op_name}"
\ No newline at end of file
diff --git a/build/torch29-cxx11-cu128-x86_64-linux/layer_norm/__init__.py b/build/torch29-cxx11-cu128-x86_64-linux/layer_norm/__init__.py
deleted file mode 100644
index 03dbc1afe1cf156661a2b1b22003cd5f599a0309..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu128-x86_64-linux/layer_norm/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import ctypes
-import sys
-
-import importlib
-from pathlib import Path
-from types import ModuleType
-
-def _import_from_path(file_path: Path) -> ModuleType:
-    # We cannot use the module name as-is, after adding it to `sys.modules`,
-    # it would also be used for other imports. So, we make a module name that
-    # depends on the path for it to be unique using the hex-encoded hash of
-    # the path.
-    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
-    module_name = path_hash
-    spec = importlib.util.spec_from_file_location(module_name, file_path)
-    if spec is None:
-        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
-    module = importlib.util.module_from_spec(spec)
-    if module is None:
-        raise ImportError(f"Cannot load module {module_name} from spec")
-    sys.modules[module_name] = module
-    spec.loader.exec_module(module)  # type: ignore
-    return module
-
-
-globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
diff --git a/build/torch29-cxx11-cu128-x86_64-linux/layers.py b/build/torch29-cxx11-cu128-x86_64-linux/layers.py
deleted file mode 100644
index 7ed883f42ead452f8b60f498ec11302c53d3cf74..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu128-x86_64-linux/layers.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-
-class LayerNorm(nn.Module):
-    weight: torch.Tensor
-    variance_epsilon: float
-
-    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        output = ops.dropout_add_ln_fwd(
-            hidden_states.view(-1, hidden_states.shape[-1]),
-            gamma = self.weight,
-            beta = None,
-            rowscale = None,
-            colscale = None,
-            x0_subset = None,
-            z_subset = None,
-            dropout_p = 0,
-            epsilon = self.variance_epsilon,
-            rowscale_const = 1.0,
-            z_numrows = hidden_states.shape[1],
-            gen = None,
-            residual_in_fp32 = False,
-            is_rms_norm = False,
-        )
-        return output[0].view(hidden_states.shape)
-
-class LlamaRMSNorm(nn.Module):
-    weight: torch.Tensor
-    variance_epsilon: float
-
-    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        output = ops.dropout_add_ln_fwd(
-            hidden_states.view(-1, hidden_states.shape[-1]),
-            gamma = self.weight,
-            beta = None,
-            rowscale = None,
-            colscale = None,
-            x0_subset = None,
-            z_subset = None,
-            dropout_p = 0,
-            epsilon = self.variance_epsilon,
-            rowscale_const = 1.0,
-            z_numrows = hidden_states.shape[1],
-            gen = None,
-            residual_in_fp32 = False,
-            is_rms_norm = True,
-        )
-        return output[0].view(hidden_states.shape)
\ No newline at end of file
diff --git a/build/torch29-cxx11-cu128-x86_64-linux/metadata.json b/build/torch29-cxx11-cu128-x86_64-linux/metadata.json
deleted file mode 100644
index 76bafa5f33b6818aa6bb4cab04be811b87519b44..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu128-x86_64-linux/metadata.json
+++ /dev/null
@@ -1 +0,0 @@
-{"python-depends":[]}
\ No newline at end of file
diff --git a/build/torch29-cxx11-cu130-x86_64-linux/__init__.py b/build/torch29-cxx11-cu130-x86_64-linux/__init__.py
deleted file mode 100644
index 946160f16b9dc91fefeea037fb7ac84fd6afd802..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu130-x86_64-linux/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-from . import layers
-
-def dropout_add_ln_fwd(input, gamma, beta, rowscale, colscale, x0_subset, z_subset, dropout_p, epsilon, rowscale_const, z_numrows, gen, residual_in_fp32, is_rms_norm):
-    return ops.dropout_add_ln_fwd(input, gamma, beta, rowscale, colscale, x0_subset, z_subset, dropout_p, epsilon, rowscale_const, z_numrows, gen, residual_in_fp32, is_rms_norm)
-
-def dropout_add_ln_bwd(dz, dx, x, mu, rsigma, gamma, rowscale, colscale, x0_subset, z_subset, dropout_p, rowscale_const, x0_numrows, has_residual, is_rms_norm):
-    return ops.dropout_add_ln_bwd(dz, dx, x, mu, rsigma, gamma, rowscale, colscale, x0_subset, z_subset, dropout_p, rowscale_const, x0_numrows, has_residual, is_rms_norm)
-
-def dropout_add_ln_parallel_residual_fwd(input, gamma0, beta0, gamma1, beta1, dropout_p, epsilon, gen, residual_in_fp32, is_rms_norm):
-    return ops.dropout_add_ln_parallel_residual_fwd(input, gamma0, beta0, gamma1, beta1, dropout_p, epsilon, gen, residual_in_fp32, is_rms_norm)
-
-def dropout_add_ln_parallel_residual_bwd(dz0, dz1, dx, x, mu, rsigma, gamma0, gamma1, dropout_p, has_x1, has_residual, is_rms_norm):
-    return ops.dropout_add_ln_parallel_residual_bwd(dz0, dz1, dx, x, mu, rsigma, gamma0, gamma1, dropout_p, has_x1, has_residual, is_rms_norm)
-
-__all__ = [
-    "layers",
-    "dropout_add_ln_fwd",
-    "dropout_add_ln_bwd",
-    "dropout_add_ln_parallel_residual_fwd",
-    "dropout_add_ln_parallel_residual_bwd",
-]
\ No newline at end of file
diff --git a/build/torch29-cxx11-cu130-x86_64-linux/_layer_norm_fd07706.abi3.so b/build/torch29-cxx11-cu130-x86_64-linux/_layer_norm_fd07706.abi3.so
deleted file mode 100644
index 065ce5210251c015968eccb0e361a66d19a2fe31..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu130-x86_64-linux/_layer_norm_fd07706.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7bf6e51b89bda807e770de087312693e67a4f215e8b036c39e92b6bd7de12ebb
-size 1238272584
diff --git a/build/torch29-cxx11-cu130-x86_64-linux/_ops.py b/build/torch29-cxx11-cu130-x86_64-linux/_ops.py
deleted file mode 100644
index 58eafd610133ab10f4d6afed289b76a069f413c9..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu130-x86_64-linux/_ops.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import torch
-from . import _layer_norm_fd07706
-ops = torch.ops._layer_norm_fd07706
-
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_layer_norm_fd07706::{op_name}"
\ No newline at end of file
diff --git a/build/torch29-cxx11-cu130-x86_64-linux/layer_norm/__init__.py b/build/torch29-cxx11-cu130-x86_64-linux/layer_norm/__init__.py
deleted file mode 100644
index 03dbc1afe1cf156661a2b1b22003cd5f599a0309..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu130-x86_64-linux/layer_norm/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import ctypes
-import sys
-
-import importlib
-from pathlib import Path
-from types import ModuleType
-
-def _import_from_path(file_path: Path) -> ModuleType:
-    # We cannot use the module name as-is, after adding it to `sys.modules`,
-    # it would also be used for other imports. So, we make a module name that
-    # depends on the path for it to be unique using the hex-encoded hash of
-    # the path.
-    path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
-    module_name = path_hash
-    spec = importlib.util.spec_from_file_location(module_name, file_path)
-    if spec is None:
-        raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
-    module = importlib.util.module_from_spec(spec)
-    if module is None:
-        raise ImportError(f"Cannot load module {module_name} from spec")
-    sys.modules[module_name] = module
-    spec.loader.exec_module(module)  # type: ignore
-    return module
-
-
-globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
diff --git a/build/torch29-cxx11-cu130-x86_64-linux/layers.py b/build/torch29-cxx11-cu130-x86_64-linux/layers.py
deleted file mode 100644
index 7ed883f42ead452f8b60f498ec11302c53d3cf74..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu130-x86_64-linux/layers.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import torch
-import torch.nn as nn
-
-from ._ops import ops
-
-
-class LayerNorm(nn.Module):
-    weight: torch.Tensor
-    variance_epsilon: float
-
-    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        output = ops.dropout_add_ln_fwd(
-            hidden_states.view(-1, hidden_states.shape[-1]),
-            gamma = self.weight,
-            beta = None,
-            rowscale = None,
-            colscale = None,
-            x0_subset = None,
-            z_subset = None,
-            dropout_p = 0,
-            epsilon = self.variance_epsilon,
-            rowscale_const = 1.0,
-            z_numrows = hidden_states.shape[1],
-            gen = None,
-            residual_in_fp32 = False,
-            is_rms_norm = False,
-        )
-        return output[0].view(hidden_states.shape)
-
-class LlamaRMSNorm(nn.Module):
-    weight: torch.Tensor
-    variance_epsilon: float
-
-    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        output = ops.dropout_add_ln_fwd(
-            hidden_states.view(-1, hidden_states.shape[-1]),
-            gamma = self.weight,
-            beta = None,
-            rowscale = None,
-            colscale = None,
-            x0_subset = None,
-            z_subset = None,
-            dropout_p = 0,
-            epsilon = self.variance_epsilon,
-            rowscale_const = 1.0,
-            z_numrows = hidden_states.shape[1],
-            gen = None,
-            residual_in_fp32 = False,
-            is_rms_norm = True,
-        )
-        return output[0].view(hidden_states.shape)
\ No newline at end of file
diff --git a/build/torch29-cxx11-cu130-x86_64-linux/metadata.json b/build/torch29-cxx11-cu130-x86_64-linux/metadata.json
deleted file mode 100644
index 76bafa5f33b6818aa6bb4cab04be811b87519b44..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu130-x86_64-linux/metadata.json
+++ /dev/null
@@ -1 +0,0 @@
-{"python-depends":[]}
\ No newline at end of file