diff --git a/benchmarks/README.md b/benchmarks/README.md
index 3044b37a9bf811952268da76ce91edd56a1d3ff6..e6ad09a2b1b28b342aac1ee2c58f2d12ac6d01ff 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -15,7 +15,7 @@ Results can be saved as **CSV files** or **plots**.
 ## Usage
 
 ```bash
-python run_cases.py --case <CASE> [--plot] [--save-path <DIR>]
+python main.py --case <CASE> [--plot] [--save-path <DIR>]
 ```
 
 - `--case` (required): one of `rms`, `add_rms`, `poly`, `mul_poly`
@@ -25,8 +25,8 @@ python run_cases.py --case <CASE> [--plot] [--save-path <DIR>]
 ## Examples
 
 ```bash
-python run_cases.py --case add_rms --save-path ./results/
-python run_cases.py --case poly --plot --save-path ./plots/
+python main.py --case add_rms --save-path ./results/
+python main.py --case poly --plot --save-path ./plots/
 ```
 
 ## Output
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_20250907180255.abi3.so b/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_20250907180255.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..1b3674d54c044dddf2d037c1d3bac522bc19440c
--- /dev/null
+++ b/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_20250907180255.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d21a85bf21aa74f1281541e658acfd4f4326d902efe3578b059eccf054443284
+size 8089696
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so b/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so
deleted file mode 100755
index caeb9ed368cfd01c6c05dc434df4a81c3dcf172b..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:eee7aed4f37c13c6accf42db53acad998b46dfc3c6fd70e976b552482a08118e
-size 8600336
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so b/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..5a1e5a3587679a157ba7b067d28d762c6577fb8f
--- /dev/null
+++ b/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec9ea7edc8b27f7983e20d615ab470cef6b82975afc214becfddfd05a867a839
+size 8600336
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so b/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..f3a874e78aac8a38f35e3d3aa4d26c892c9a0d66
--- /dev/null
+++ b/build/torch27-cxx11-cu118-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd84c828d4c15e96d65d6c8f0eb7a945ee8167d92e978b2ebce03eeaf41e7fce
+size 4405112
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py b/build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py
index 43d5009fa5c18b4b7de8ca60b6c76352542ac995..fa68616c13166de47619ed052ed1eba664998b82 100644
--- a/build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py
+++ b/build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _activation_2f66548_dirty
-ops = torch.ops._activation_2f66548_dirty
+from . import _activation_e5e2eeb_dirty
+ops = torch.ops._activation_e5e2eeb_dirty
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_2f66548_dirty::{op_name}"
\ No newline at end of file
+    return f"_activation_e5e2eeb_dirty::{op_name}"
\ No newline at end of file
diff --git a/build/torch27-cxx11-cu118-x86_64-linux/activation/rms_norm.py b/build/torch27-cxx11-cu118-x86_64-linux/activation/rms_norm.py
index 7f9a470d9bb3833083cfa711e9d16c336b73238d..0e2c29e955b87025e63f4795d58a14104318f736 100644
--- a/build/torch27-cxx11-cu118-x86_64-linux/activation/rms_norm.py
+++ b/build/torch27-cxx11-cu118-x86_64-linux/activation/rms_norm.py
@@ -70,9 +70,8 @@ class FusedAddRMSNormFunction(torch.autograd.Function):
         weight_grad = torch.empty_like(
             weight) if ctx.needs_input_grad[2] else None
 
-        ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad,
-                                        add_output_grad, add_output, weight,
-                                        eps)
+        ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad, add_output_grad, add_output,
+                              weight, eps)
         input_grad = grad if need_in else None
         residual_grad = grad if need_res else None
 
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_20250907180255.abi3.so b/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_20250907180255.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..df3c3ae7785a3c30c36d900923c1dd7a349448db
--- /dev/null
+++ b/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_20250907180255.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:74d4955271509451b946495da75f69a0f978e7258b8303fe3c077e585c0d3e6a
+size 8272456
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so b/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so
deleted file mode 100755
index 799c8a7d9606654194f8dbe533494633bc021ce0..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:06901740688c1d1eed4d91275d28864866ac4a768ea6b97d4894f15869f5cedd
-size 8779000
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so b/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..30ab86df7c79038bc40bcd1292a2fa606b44ebc1
--- /dev/null
+++ b/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d3511410cdc288d2fafc500223ed2e625e360f50fa341809cf892fb2c822924
+size 8779000
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so b/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..689760116de97c954865cd824732f04d2f746728
--- /dev/null
+++ b/build/torch27-cxx11-cu126-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:caffcadbb99fbaa27e8a81d5ef508f2e1a798e7626d618c3cf5b0d387d2c8686
+size 4618624
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py b/build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py
index 43d5009fa5c18b4b7de8ca60b6c76352542ac995..fa68616c13166de47619ed052ed1eba664998b82 100644
--- a/build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py
+++ b/build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _activation_2f66548_dirty
-ops = torch.ops._activation_2f66548_dirty
+from . import _activation_e5e2eeb_dirty
+ops = torch.ops._activation_e5e2eeb_dirty
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_2f66548_dirty::{op_name}"
\ No newline at end of file
+    return f"_activation_e5e2eeb_dirty::{op_name}"
\ No newline at end of file
diff --git a/build/torch27-cxx11-cu126-x86_64-linux/activation/rms_norm.py b/build/torch27-cxx11-cu126-x86_64-linux/activation/rms_norm.py
index 7f9a470d9bb3833083cfa711e9d16c336b73238d..0e2c29e955b87025e63f4795d58a14104318f736 100644
--- a/build/torch27-cxx11-cu126-x86_64-linux/activation/rms_norm.py
+++ b/build/torch27-cxx11-cu126-x86_64-linux/activation/rms_norm.py
@@ -70,9 +70,8 @@ class FusedAddRMSNormFunction(torch.autograd.Function):
         weight_grad = torch.empty_like(
             weight) if ctx.needs_input_grad[2] else None
 
-        ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad,
-                                        add_output_grad, add_output, weight,
-                                        eps)
+        ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad, add_output_grad, add_output,
+                              weight, eps)
         input_grad = grad if need_in else None
         residual_grad = grad if need_res else None
 
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_20250907180255.abi3.so b/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_20250907180255.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..0de3488964fc7207148b7b9b62cc4db838e64c7b
--- /dev/null
+++ b/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_20250907180255.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0bf0d2ab5ff5520704e0b0c959b61d0043d360cfd4335950e69677873a87e436
+size 12792112
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so b/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so
deleted file mode 100755
index ea4af2d0ff84587f8e4116903fec5718b04a66ac..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2b85e3e1f218392802dc8642b037f06ad7ac388d597ca9d1fb2b7314e9af5f30
-size 13818872
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so b/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..b57174622d44e91556d4646cc225ce02ae186236
--- /dev/null
+++ b/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25efc9c32e4bd6609a8326025aad861cbf79b544893755fe44519c9df7224c40
+size 13818872
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so b/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..45881f2bf18843120634173e5a0974ebdcbe07c6
--- /dev/null
+++ b/build/torch27-cxx11-cu128-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b7c6ece8e8d316c4cc5fe46b1cec4422b2f61e9bb7240af71a2b4a35975d8e6
+size 6676528
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py b/build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py
index 43d5009fa5c18b4b7de8ca60b6c76352542ac995..fa68616c13166de47619ed052ed1eba664998b82 100644
--- a/build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py
+++ b/build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _activation_2f66548_dirty
-ops = torch.ops._activation_2f66548_dirty
+from . import _activation_e5e2eeb_dirty
+ops = torch.ops._activation_e5e2eeb_dirty
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_2f66548_dirty::{op_name}"
\ No newline at end of file
+    return f"_activation_e5e2eeb_dirty::{op_name}"
\ No newline at end of file
diff --git a/build/torch27-cxx11-cu128-x86_64-linux/activation/rms_norm.py b/build/torch27-cxx11-cu128-x86_64-linux/activation/rms_norm.py
index 7f9a470d9bb3833083cfa711e9d16c336b73238d..0e2c29e955b87025e63f4795d58a14104318f736 100644
--- a/build/torch27-cxx11-cu128-x86_64-linux/activation/rms_norm.py
+++ b/build/torch27-cxx11-cu128-x86_64-linux/activation/rms_norm.py
@@ -70,9 +70,8 @@ class FusedAddRMSNormFunction(torch.autograd.Function):
         weight_grad = torch.empty_like(
             weight) if ctx.needs_input_grad[2] else None
 
-        ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad,
-                                        add_output_grad, add_output, weight,
-                                        eps)
+        ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad, add_output_grad, add_output,
+                              weight, eps)
         input_grad = grad if need_in else None
         residual_grad = grad if need_res else None
 
diff --git a/build/torch27-cxx11-rocm63-x86_64-linux/activation/_activation_20250907180255.abi3.so b/build/torch27-cxx11-rocm63-x86_64-linux/activation/_activation_20250907180255.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..57361102c13046a6a1aab2f7125193ece35b21da
--- /dev/null
+++ b/build/torch27-cxx11-rocm63-x86_64-linux/activation/_activation_20250907180255.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:640322a8fac8fd9d8e9f195a3034c4ee0f81ee1acf897fd7c482a84ce47a1bec
+size 4160688
diff --git a/build/torch27-cxx11-rocm63-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so b/build/torch27-cxx11-rocm63-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so
deleted file mode 100755
index 4791f94688206bd483d67f8df007bc76531288f0..0000000000000000000000000000000000000000
--- a/build/torch27-cxx11-rocm63-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:525a7d6eebafd2c18e2a15c17162b8328c5ade11f87fe9032a136e92c182e888
-size 2764584
diff --git a/build/torch27-cxx11-rocm63-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so b/build/torch27-cxx11-rocm63-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..c0069ea9e4f962208b869f671b23aa15f728cb92
--- /dev/null
+++ b/build/torch27-cxx11-rocm63-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c80d05690547f2842d416ebb85c9f830370373bc7e6c54ba08eec61b3690280f
+size 4386744
diff --git a/build/torch27-cxx11-rocm63-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so b/build/torch27-cxx11-rocm63-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..6e05f5b3045576c970e67481e0182f9aaf5a88d2
--- /dev/null
+++ b/build/torch27-cxx11-rocm63-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4be173820e2a4bf4b6b8de6b63faf6544b599d9b0583f650a940adaef4a048b3
+size 2899184
diff --git a/build/torch27-cxx11-rocm63-x86_64-linux/activation/_ops.py b/build/torch27-cxx11-rocm63-x86_64-linux/activation/_ops.py
index 43d5009fa5c18b4b7de8ca60b6c76352542ac995..fa68616c13166de47619ed052ed1eba664998b82 100644
--- a/build/torch27-cxx11-rocm63-x86_64-linux/activation/_ops.py
+++ b/build/torch27-cxx11-rocm63-x86_64-linux/activation/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _activation_2f66548_dirty
-ops = torch.ops._activation_2f66548_dirty
+from . import _activation_e5e2eeb_dirty
+ops = torch.ops._activation_e5e2eeb_dirty
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_2f66548_dirty::{op_name}"
\ No newline at end of file
+    return f"_activation_e5e2eeb_dirty::{op_name}"
\ No newline at end of file
diff --git a/build/torch27-cxx11-rocm63-x86_64-linux/activation/rms_norm.py b/build/torch27-cxx11-rocm63-x86_64-linux/activation/rms_norm.py
index 7f9a470d9bb3833083cfa711e9d16c336b73238d..0e2c29e955b87025e63f4795d58a14104318f736 100644
--- a/build/torch27-cxx11-rocm63-x86_64-linux/activation/rms_norm.py
+++ b/build/torch27-cxx11-rocm63-x86_64-linux/activation/rms_norm.py
@@ -70,9 +70,8 @@ class FusedAddRMSNormFunction(torch.autograd.Function):
         weight_grad = torch.empty_like(
             weight) if ctx.needs_input_grad[2] else None
 
-        ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad,
-                                        add_output_grad, add_output, weight,
-                                        eps)
+        ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad, add_output_grad, add_output,
+                              weight, eps)
         input_grad = grad if need_in else None
         residual_grad = grad if need_res else None
 
diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_20250907180255.abi3.so b/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_20250907180255.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..c703b3b19594e8b20ee5b4dc7692fbdad8079365
--- /dev/null
+++ b/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_20250907180255.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1768d8d5072ac06d937cb5332988c6b3bfaa191f72d1369a22d2c577e9a3bca2
+size 8215280
diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so b/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so
deleted file mode 100755
index 90420bc46ff1e7da4b82a5f954e0bf1d040058a3..0000000000000000000000000000000000000000
--- a/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b0d8ee61b8e62210df21e40e31560b84c4e985ba448917a8e95bf44167cb9505
-size 8730200
diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so b/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..a50764fa05ea1e21294f84d922050f5d70f7db93
--- /dev/null
+++ b/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:440f5c17a7ddaf73c506bbc84fd1405e2e188b8ceaf4977910608be6b91e89bf
+size 8730200
diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so b/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..6c12e8b587a01fe10f4e73cca22a5a27fd2e794a
--- /dev/null
+++ b/build/torch28-cxx11-cu126-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb222449350310f90f7271f34fcf9052c9eec28021fee0348130a8f239a97bf4
+size 4571976
diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/_ops.py b/build/torch28-cxx11-cu126-x86_64-linux/activation/_ops.py
index 43d5009fa5c18b4b7de8ca60b6c76352542ac995..fa68616c13166de47619ed052ed1eba664998b82 100644
--- a/build/torch28-cxx11-cu126-x86_64-linux/activation/_ops.py
+++ b/build/torch28-cxx11-cu126-x86_64-linux/activation/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _activation_2f66548_dirty
-ops = torch.ops._activation_2f66548_dirty
+from . import _activation_e5e2eeb_dirty
+ops = torch.ops._activation_e5e2eeb_dirty
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_2f66548_dirty::{op_name}"
\ No newline at end of file
+    return f"_activation_e5e2eeb_dirty::{op_name}"
\ No newline at end of file
diff --git a/build/torch28-cxx11-cu126-x86_64-linux/activation/rms_norm.py b/build/torch28-cxx11-cu126-x86_64-linux/activation/rms_norm.py
index 7f9a470d9bb3833083cfa711e9d16c336b73238d..0e2c29e955b87025e63f4795d58a14104318f736 100644
--- a/build/torch28-cxx11-cu126-x86_64-linux/activation/rms_norm.py
+++ b/build/torch28-cxx11-cu126-x86_64-linux/activation/rms_norm.py
@@ -70,9 +70,8 @@ class FusedAddRMSNormFunction(torch.autograd.Function):
         weight_grad = torch.empty_like(
             weight) if ctx.needs_input_grad[2] else None
 
-        ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad,
-                                        add_output_grad, add_output, weight,
-                                        eps)
+        ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad, add_output_grad, add_output,
+                              weight, eps)
         input_grad = grad if need_in else None
         residual_grad = grad if need_res else None
 
diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_20250907180255.abi3.so b/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_20250907180255.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..ecdc467a674247fe3898453418ce88a9983d08c5
--- /dev/null
+++ b/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_20250907180255.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:37a572bd877980ab8c0331ca5682191cb5a2b1f05bc69ea493a9e24f7728ba3f
+size 12730840
diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so b/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so
deleted file mode 100755
index 0950172a10c93c133f412032c1699330d373c3fe..0000000000000000000000000000000000000000
--- a/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:cad1446a247f173104e280c0e38fef98cdb68aa005f76a555192b73397fdda7a
-size 13770064
diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so b/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..d3e4416a52e04ff527f48c721c6c4f1fa16059ed
--- /dev/null
+++ b/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1dfb6d468f9cef0239d4ea47f0a247fa721befc5b8db86e1cddfc25f1814b67a
+size 13770064
diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so b/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..ff5ceef3b840a9957dab36434074fa21417f6711
--- /dev/null
+++ b/build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79be6527f579de1133e50a66310d7d0690649dcac63009a54b5e68809408f12a
+size 6634208
diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/_ops.py b/build/torch28-cxx11-cu128-x86_64-linux/activation/_ops.py
index 43d5009fa5c18b4b7de8ca60b6c76352542ac995..fa68616c13166de47619ed052ed1eba664998b82 100644
--- a/build/torch28-cxx11-cu128-x86_64-linux/activation/_ops.py
+++ b/build/torch28-cxx11-cu128-x86_64-linux/activation/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _activation_2f66548_dirty
-ops = torch.ops._activation_2f66548_dirty
+from . import _activation_e5e2eeb_dirty
+ops = torch.ops._activation_e5e2eeb_dirty
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_2f66548_dirty::{op_name}"
\ No newline at end of file
+    return f"_activation_e5e2eeb_dirty::{op_name}"
\ No newline at end of file
diff --git a/build/torch28-cxx11-cu128-x86_64-linux/activation/rms_norm.py b/build/torch28-cxx11-cu128-x86_64-linux/activation/rms_norm.py
index 7f9a470d9bb3833083cfa711e9d16c336b73238d..0e2c29e955b87025e63f4795d58a14104318f736 100644
--- a/build/torch28-cxx11-cu128-x86_64-linux/activation/rms_norm.py
+++ b/build/torch28-cxx11-cu128-x86_64-linux/activation/rms_norm.py
@@ -70,9 +70,8 @@ class FusedAddRMSNormFunction(torch.autograd.Function):
         weight_grad = torch.empty_like(
             weight) if ctx.needs_input_grad[2] else None
 
-        ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad,
-                                        add_output_grad, add_output, weight,
-                                        eps)
+        ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad, add_output_grad, add_output,
+                              weight, eps)
         input_grad = grad if need_in else None
         residual_grad = grad if need_res else None
 
diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_20250907180255.abi3.so b/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_20250907180255.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..d6c8a74ea050b78cf9dcd4c43ac618094b0ca303
--- /dev/null
+++ b/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_20250907180255.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f15919c4cac697cde550af16256e338472400e50df751e93622350c7f626bc8
+size 12726208
diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so b/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so
deleted file mode 100755
index 255fb1a1d5b663ec3554ed4dce88512498760b61..0000000000000000000000000000000000000000
--- a/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9fddc0b356a5bae74d48638c35d3d0d566812a152d249534371c5e733c3d0dce
-size 13753152
diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so b/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..ebdc9108aad1a1dfd16dc0d8baebf827bc0476f4
--- /dev/null
+++ b/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0815a50e61497b357b2b90fc28602b3f53a25da1161edd2cb0b0fbebc7c62bf6
+size 13757248
diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so b/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..f7ab393218a3d825e10b9e1e838440d8a543ce19
--- /dev/null
+++ b/build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d95e4491d35cb022a6eaa2febbc555f203893f989a4fb1cc483b2632f141869
+size 6687456
diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/_ops.py b/build/torch28-cxx11-cu129-x86_64-linux/activation/_ops.py
index 43d5009fa5c18b4b7de8ca60b6c76352542ac995..fa68616c13166de47619ed052ed1eba664998b82 100644
--- a/build/torch28-cxx11-cu129-x86_64-linux/activation/_ops.py
+++ b/build/torch28-cxx11-cu129-x86_64-linux/activation/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _activation_2f66548_dirty
-ops = torch.ops._activation_2f66548_dirty
+from . import _activation_e5e2eeb_dirty
+ops = torch.ops._activation_e5e2eeb_dirty
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_2f66548_dirty::{op_name}"
\ No newline at end of file
+    return f"_activation_e5e2eeb_dirty::{op_name}"
\ No newline at end of file
diff --git a/build/torch28-cxx11-cu129-x86_64-linux/activation/rms_norm.py b/build/torch28-cxx11-cu129-x86_64-linux/activation/rms_norm.py
index 7f9a470d9bb3833083cfa711e9d16c336b73238d..0e2c29e955b87025e63f4795d58a14104318f736 100644
--- a/build/torch28-cxx11-cu129-x86_64-linux/activation/rms_norm.py
+++ b/build/torch28-cxx11-cu129-x86_64-linux/activation/rms_norm.py
@@ -70,9 +70,8 @@ class FusedAddRMSNormFunction(torch.autograd.Function):
         weight_grad = torch.empty_like(
             weight) if ctx.needs_input_grad[2] else None
 
-        ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad,
-                                        add_output_grad, add_output, weight,
-                                        eps)
+        ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad, add_output_grad, add_output,
+                              weight, eps)
         input_grad = grad if need_in else None
         residual_grad = grad if need_res else None
 
diff --git a/build/torch28-cxx11-rocm63-x86_64-linux/activation/_activation_20250907180255.abi3.so b/build/torch28-cxx11-rocm63-x86_64-linux/activation/_activation_20250907180255.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..670a8291fdc208c690447600ee77449e1fac9929
--- /dev/null
+++ b/build/torch28-cxx11-rocm63-x86_64-linux/activation/_activation_20250907180255.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e72d4bb4459a5da96ca5eda1d305237a361140f0e25360e3d20326a22f1b6d47
+size 4165584
diff --git a/build/torch28-cxx11-rocm63-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so b/build/torch28-cxx11-rocm63-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so
deleted file mode 100755
index bdaa3de514c22703cd0327624181eeedbb39c6a7..0000000000000000000000000000000000000000
--- a/build/torch28-cxx11-rocm63-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:3bebd1aa6a3414be5d77c1306c50310d05df8d75a7ea6c3dbd2bb6bb41a98685
-size 2765376
diff --git a/build/torch28-cxx11-rocm63-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so b/build/torch28-cxx11-rocm63-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..a7e8ec3a1957ec7fa888600e141e2d6acdb1d4be
--- /dev/null
+++ b/build/torch28-cxx11-rocm63-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d404c88b72f1b6da551a64b3373395e80403a52ccff14fc401be3e8ee184d83
+size 4387536
diff --git a/build/torch28-cxx11-rocm63-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so b/build/torch28-cxx11-rocm63-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..1843d54d5917206c0947de8effc1cf347ea9e853
--- /dev/null
+++ b/build/torch28-cxx11-rocm63-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58116124bb2b5d11de2753dd0c30a1e4c84759f18599da7016c791bad37528e9
+size 2899984
diff --git a/build/torch28-cxx11-rocm63-x86_64-linux/activation/_ops.py b/build/torch28-cxx11-rocm63-x86_64-linux/activation/_ops.py
index 43d5009fa5c18b4b7de8ca60b6c76352542ac995..fa68616c13166de47619ed052ed1eba664998b82 100644
--- a/build/torch28-cxx11-rocm63-x86_64-linux/activation/_ops.py
+++ b/build/torch28-cxx11-rocm63-x86_64-linux/activation/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _activation_2f66548_dirty
-ops = torch.ops._activation_2f66548_dirty
+from . import _activation_e5e2eeb_dirty
+ops = torch.ops._activation_e5e2eeb_dirty
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_2f66548_dirty::{op_name}"
\ No newline at end of file
+    return f"_activation_e5e2eeb_dirty::{op_name}"
\ No newline at end of file
diff --git a/build/torch28-cxx11-rocm63-x86_64-linux/activation/rms_norm.py b/build/torch28-cxx11-rocm63-x86_64-linux/activation/rms_norm.py
index 7f9a470d9bb3833083cfa711e9d16c336b73238d..0e2c29e955b87025e63f4795d58a14104318f736 100644
--- a/build/torch28-cxx11-rocm63-x86_64-linux/activation/rms_norm.py
+++ b/build/torch28-cxx11-rocm63-x86_64-linux/activation/rms_norm.py
@@ -70,9 +70,8 @@ class FusedAddRMSNormFunction(torch.autograd.Function):
         weight_grad = torch.empty_like(
             weight) if ctx.needs_input_grad[2] else None
 
-        ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad,
-                                        add_output_grad, add_output, weight,
-                                        eps)
+        ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad, add_output_grad, add_output,
+                              weight, eps)
         input_grad = grad if need_in else None
         residual_grad = grad if need_res else None
 
diff --git a/build/torch28-cxx11-rocm64-x86_64-linux/activation/_activation_20250907180255.abi3.so b/build/torch28-cxx11-rocm64-x86_64-linux/activation/_activation_20250907180255.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..c8f702b9ecfdc1c01dcdd2880d088458c4f11c2d
--- /dev/null
+++ b/build/torch28-cxx11-rocm64-x86_64-linux/activation/_activation_20250907180255.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3325c2748cf7a070383068995078f93f440cc95fbed491d00bd414cdd851376
+size 4171472
diff --git a/build/torch28-cxx11-rocm64-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so b/build/torch28-cxx11-rocm64-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so
deleted file mode 100755
index cbee03a444abf08c9a9a745138ab65ccfc306dd2..0000000000000000000000000000000000000000
--- a/build/torch28-cxx11-rocm64-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:49b998e99c3b26775a59f8bdfe2033fd7871ce4743f8234a4d8360c9867afdaf
-size 2771080
diff --git a/build/torch28-cxx11-rocm64-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so b/build/torch28-cxx11-rocm64-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..dafb119147ed94f04203dd8c8a366ef9a6ed7680
--- /dev/null
+++ b/build/torch28-cxx11-rocm64-x86_64-linux/activation/_activation_e5e2eeb_dirty.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b8d52dee20ba3c4619f7c614984f656f34f32dd74ba6cf866cf80f32245117cf
+size 4393240
diff --git a/build/torch28-cxx11-rocm64-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so b/build/torch28-cxx11-rocm64-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so
new file mode 100644
index 0000000000000000000000000000000000000000..86ae5f11c05134ad7347aca293b13aeff2caf4c1
--- /dev/null
+++ b/build/torch28-cxx11-rocm64-x86_64-linux/activation/_activation_f517c97_dirty.abi3.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65319d3d93ac3bf0f2939fa4e53ddfc8cd633b9e396cde3a97d63b9041ba03a7
+size 2885344
diff --git a/build/torch28-cxx11-rocm64-x86_64-linux/activation/_ops.py b/build/torch28-cxx11-rocm64-x86_64-linux/activation/_ops.py
index 43d5009fa5c18b4b7de8ca60b6c76352542ac995..fa68616c13166de47619ed052ed1eba664998b82 100644
--- a/build/torch28-cxx11-rocm64-x86_64-linux/activation/_ops.py
+++ b/build/torch28-cxx11-rocm64-x86_64-linux/activation/_ops.py
@@ -1,9 +1,9 @@
 import torch
-from . import _activation_2f66548_dirty
-ops = torch.ops._activation_2f66548_dirty
+from . import _activation_e5e2eeb_dirty
+ops = torch.ops._activation_e5e2eeb_dirty
 
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_2f66548_dirty::{op_name}"
\ No newline at end of file
+    return f"_activation_e5e2eeb_dirty::{op_name}"
\ No newline at end of file
diff --git a/build/torch28-cxx11-rocm64-x86_64-linux/activation/rms_norm.py b/build/torch28-cxx11-rocm64-x86_64-linux/activation/rms_norm.py
index 7f9a470d9bb3833083cfa711e9d16c336b73238d..0e2c29e955b87025e63f4795d58a14104318f736 100644
--- a/build/torch28-cxx11-rocm64-x86_64-linux/activation/rms_norm.py
+++ b/build/torch28-cxx11-rocm64-x86_64-linux/activation/rms_norm.py
@@ -70,9 +70,8 @@ class FusedAddRMSNormFunction(torch.autograd.Function):
         weight_grad = torch.empty_like(
             weight) if ctx.needs_input_grad[2] else None
 
-        ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad,
-                                        add_output_grad, add_output, weight,
-                                        eps)
+        ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad, add_output_grad, add_output,
+                              weight, eps)
         input_grad = grad if need_in else None
         residual_grad = grad if need_res else None
 
diff --git a/build/torch29-cxx11-cu126-x86_64-linux/activation/__init__.py b/build/torch29-cxx11-cu126-x86_64-linux/activation/__init__.py
deleted file mode 100644
index 938feeff791794d011fec65cf86df957e2c4da2f..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu126-x86_64-linux/activation/__init__.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import torch
-
-from . import layers
-from ._ops import ops
-from .poly_norm import FusedMulPolyNormFunction, PolyNormFunction
-from .rms_norm import FusedAddRMSNormFunction, RMSNormFunction
-
-
-def poly_norm(
-    x: torch.Tensor,
-    weight: torch.Tensor,
-    bias: torch.Tensor,
-    eps: float = 1e-6,
-) -> None:
-    return PolyNormFunction.apply(x, weight, bias, eps)
-
-
-def fused_mul_poly_norm(
-    x: torch.Tensor,
-    mul: torch.Tensor,
-    weight: torch.Tensor,
-    bias: torch.Tensor,
-    eps: float = 1e-6,
-) -> None:
-    return FusedMulPolyNormFunction.apply(x, mul, weight, bias, eps)
-
-
-def rms_norm(
-    x: torch.Tensor,
-    weight: torch.Tensor,
-    eps: float = 1e-6,
-) -> None:
-    return RMSNormFunction.apply(x, weight, eps)
-
-
-def fused_add_rms_norm(
-    x: torch.Tensor,
-    residual: torch.Tensor,
-    weight: torch.Tensor,
-    eps: float = 1e-6,
-) -> None:
-    return FusedAddRMSNormFunction.apply(x, residual, weight, eps)
-
-
-__all__ = [
-    "poly_norm",
-    "fused_mul_poly_norm",
-    "rms_norm",
-    "fused_add_rms_norm",
-    "layers",
-    "ops",
-]
diff --git a/build/torch29-cxx11-cu126-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so b/build/torch29-cxx11-cu126-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so
deleted file mode 100755
index e34aea1348f452eb4be5107678a0087b89063228..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu126-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2fd8313bde2684cf629375637613278a7a0575272924a30a4a290fdbb339910c
-size 8730176
diff --git a/build/torch29-cxx11-cu126-x86_64-linux/activation/_ops.py b/build/torch29-cxx11-cu126-x86_64-linux/activation/_ops.py
deleted file mode 100644
index 43d5009fa5c18b4b7de8ca60b6c76352542ac995..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu126-x86_64-linux/activation/_ops.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import torch
-from . import _activation_2f66548_dirty
-ops = torch.ops._activation_2f66548_dirty
-
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_activation_2f66548_dirty::{op_name}"
\ No newline at end of file
diff --git a/build/torch29-cxx11-cu126-x86_64-linux/activation/layers.py b/build/torch29-cxx11-cu126-x86_64-linux/activation/layers.py
deleted file mode 100644
index b1880bdbe8dd73ac76d7d4561cf60f9765097ca9..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu126-x86_64-linux/activation/layers.py
+++ /dev/null
@@ -1,94 +0,0 @@
-import torch
-import torch.nn as nn
-from torch.nn import init
-
-from .poly_norm import FusedMulPolyNormFunction, PolyNormFunction
-from .rms_norm import FusedAddRMSNormFunction, RMSNormFunction
-
-
-class PolyNorm(nn.Module):
-
-    def __init__(self, eps=1e-6, dtype: torch.dtype = torch.float32):
-        super().__init__()
-        self.weight = torch.nn.Parameter(torch.ones(3, dtype=dtype) / 3)
-        self.bias = torch.nn.Parameter(torch.zeros(1, dtype=dtype))
-        self.eps = eps
-
-    def forward(
-        self,
-        x: torch.Tensor,
-    ):
-        return PolyNormFunction.apply(x, self.weight, self.bias, self.eps)
-
-    def reset_parameters(self) -> None:
-        """
-        Resets parameters based on their initialization used in __init__.
-        """
-        init.ones_(self.weight)
-        init.zeros_(self.bias)
-
-
-class FusedMulPolyNorm(nn.Module):
-
-    def __init__(self, eps=1e-6, dtype: torch.dtype = torch.float32):
-        super().__init__()
-        self.weight = torch.nn.Parameter(torch.ones(3, dtype=dtype) / 3)
-        self.bias = torch.nn.Parameter(torch.zeros(1, dtype=dtype))
-        self.eps = eps
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mul: torch.Tensor,
-    ):
-        return FusedMulPolyNormFunction.apply(x, mul, self.weight, self.bias,
-                                              self.eps)
-
-    def reset_parameters(self) -> None:
-        """
-        Resets parameters based on their initialization used in __init__.
-        """
-        init.ones_(self.weight)
-        init.zeros_(self.bias)
-
-
-class RMSNorm(nn.Module):
-
-    def __init__(self, dim: int, eps=1e-6, dtype: torch.dtype = torch.float32):
-        super().__init__()
-        self.weight = torch.nn.Parameter(torch.ones(dim, dtype=dtype))
-        self.eps = eps
-
-    def forward(
-        self,
-        x: torch.Tensor,
-    ):
-        return RMSNormFunction.apply(x, self.weight, self.eps)
-
-    def reset_parameters(self) -> None:
-        """
-        Resets parameters based on their initialization used in __init__.
-        """
-        init.ones_(self.weight)
-
-
-class FusedAddRMSNorm(nn.Module):
-
-    def __init__(self, dim: int, eps=1e-6, dtype: torch.dtype = torch.float32):
-        super().__init__()
-        self.weight = torch.nn.Parameter(torch.ones(dim, dtype=dtype))
-        self.eps = eps
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        residual: torch.Tensor,
-    ):
-        return FusedAddRMSNormFunction.apply(x, residual, self.weight,
-                                             self.eps)
-
-    def reset_parameters(self) -> None:
-        """
-        Resets parameters based on their initialization used in __init__.
-        """
-        init.ones_(self.weight)
diff --git a/build/torch29-cxx11-cu126-x86_64-linux/activation/poly_norm.py b/build/torch29-cxx11-cu126-x86_64-linux/activation/poly_norm.py
deleted file mode 100644
index 8a0fd85f1835e02a36eb9184874d77dcad8221f9..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu126-x86_64-linux/activation/poly_norm.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import torch
-
-from ._ops import ops
-
-
-# Inherit from Function
-class PolyNormFunction(torch.autograd.Function):
-    # Note that forward, setup_context, and backward are @staticmethods
-    @staticmethod
-    def forward(input, weight, bias, eps):
-        output = torch.empty_like(input)
-        ops.poly_norm(output, input, weight, bias, eps)
-        return output
-
-    @staticmethod
-    # inputs is a Tuple of all of the inputs passed to forward.
-    # output is the output of the forward().
-    def setup_context(ctx, inputs, output):
-        input, weight, bias, eps = inputs
-        ctx.save_for_backward(input, weight)
-        ctx.eps = eps
-
-    # This function has only a single output, so it gets only one gradient
-    @staticmethod
-    def backward(ctx, output_grad):
-        input, weight = ctx.saved_tensors
-        eps = ctx.eps
-
-        input_grad = torch.empty_like(
-            input) if ctx.needs_input_grad[0] else None
-        weight_grad = torch.empty_like(
-            weight) if ctx.needs_input_grad[1] else None
-        bias_grad = (torch.empty(1, dtype=weight.dtype, device=weight.device)
-                     if ctx.needs_input_grad[2] else None)
-
-        ops.poly_norm_backward(input_grad, weight_grad, bias_grad, output_grad,
-                               input, weight, eps)
-
-        return input_grad, weight_grad, bias_grad, None
-
-
-class FusedMulPolyNormFunction(torch.autograd.Function):
-    # Note that forward, setup_context, and backward are @staticmethods
-    @staticmethod
-    def forward(input, mul, weight, bias, eps):
-        output = torch.empty_like(input)
-        ops.fused_mul_poly_norm(output, input, mul, weight, bias, eps)
-        return output
-
-    @staticmethod
-    # inputs is a Tuple of all of the inputs passed to forward.
-    # output is the output of the forward().
-    def setup_context(ctx, inputs, output):
-        input, mul, weight, bias, eps = inputs
-        ctx.save_for_backward(input, mul, weight, bias)
-        ctx.eps = eps
-
-    # This function has only a single output, so it gets only one gradient
-    @staticmethod
-    def backward(ctx, output_grad):
-        input, mul, weight, bias = ctx.saved_tensors
-        eps = ctx.eps
-
-        input_grad = torch.empty_like(
-            input) if ctx.needs_input_grad[0] else None
-        mul_grad = torch.empty_like(mul) if ctx.needs_input_grad[1] else None
-        weight_grad = torch.empty_like(
-            weight) if ctx.needs_input_grad[2] else None
-        bias_grad = (torch.empty(1, dtype=weight.dtype, device=weight.device)
-                     if ctx.needs_input_grad[3] else None)
-
-        ops.fused_mul_poly_norm_backward(input_grad, mul_grad, weight_grad,
-                                         bias_grad, output_grad, input, mul,
-                                         weight, bias, eps)
-
-        return input_grad, mul_grad, weight_grad, bias_grad, None
diff --git a/build/torch29-cxx11-cu126-x86_64-linux/activation/rms_norm.py b/build/torch29-cxx11-cu126-x86_64-linux/activation/rms_norm.py
deleted file mode 100644
index 7f9a470d9bb3833083cfa711e9d16c336b73238d..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu126-x86_64-linux/activation/rms_norm.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import torch
-
-from ._ops import ops
-
-
-# Inherit from Function
-class RMSNormFunction(torch.autograd.Function):
-    # Note that forward, setup_context, and backward are @staticmethods
-    @staticmethod
-    def forward(input, weight, eps):
-        output = torch.empty_like(input)
-        ops.rms_norm(output, input, weight, eps)
-        return output
-
-    @staticmethod
-    # inputs is a Tuple of all of the inputs passed to forward.
-    # output is the output of the forward().
-    def setup_context(ctx, inputs, output):
-        input, weight, eps = inputs
-        ctx.save_for_backward(input, weight)
-        ctx.eps = eps
-
-    # This function has only a single output, so it gets only one gradient
-    @staticmethod
-    def backward(ctx, output_grad):
-        input, weight = ctx.saved_tensors
-        eps = ctx.eps
-
-        input_grad = torch.empty_like(
-            input) if ctx.needs_input_grad[0] else None
-        weight_grad = torch.empty_like(
-            weight) if ctx.needs_input_grad[1] else None
-
-        ops.rms_norm_backward(input_grad, weight_grad, output_grad, input,
-                              weight, eps)
-
-        return input_grad, weight_grad, None
-
-
-# Inherit from Function
-class FusedAddRMSNormFunction(torch.autograd.Function):
-    # Note that forward, setup_context, and backward are @staticmethods
-    @staticmethod
-    def forward(input, residual, weight, eps):
-        output = torch.empty_like(input)
-        add_output = torch.empty_like(input)
-        ops.fused_add_rms_norm(output, add_output, input, residual, weight,
-                               eps)
-        return output, add_output
-
-    @staticmethod
-    # inputs is a Tuple of all of the inputs passed to forward.
-    # output is the output of the forward().
-    def setup_context(ctx, inputs, outputs):
-        _, _, weight, eps = inputs
-        _, add_output = outputs
-        ctx.save_for_backward(weight, add_output)
-        ctx.eps = eps
-
-    @staticmethod
-    def backward(ctx, output_grad, add_output_grad):
-        weight, add_output = ctx.saved_tensors
-        eps = ctx.eps
-
-        need_in = ctx.needs_input_grad[0]
-        need_res = ctx.needs_input_grad[1]
-
-        grad = torch.empty_like(output_grad) if need_in or need_res else None
-
-        weight_grad = torch.empty_like(
-            weight) if ctx.needs_input_grad[2] else None
-
-        ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad,
-                                        add_output_grad, add_output, weight,
-                                        eps)
-        input_grad = grad if need_in else None
-        residual_grad = grad if need_res else None
-
-        return input_grad, residual_grad, weight_grad, None
diff --git a/build/torch29-cxx11-cu128-x86_64-linux/activation/__init__.py b/build/torch29-cxx11-cu128-x86_64-linux/activation/__init__.py
deleted file mode 100644
index 938feeff791794d011fec65cf86df957e2c4da2f..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu128-x86_64-linux/activation/__init__.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import torch
-
-from . import layers
-from ._ops import ops
-from .poly_norm import FusedMulPolyNormFunction, PolyNormFunction
-from .rms_norm import FusedAddRMSNormFunction, RMSNormFunction
-
-
-def poly_norm(
-    x: torch.Tensor,
-    weight: torch.Tensor,
-    bias: torch.Tensor,
-    eps: float = 1e-6,
-) -> None:
-    return PolyNormFunction.apply(x, weight, bias, eps)
-
-
-def fused_mul_poly_norm(
-    x: torch.Tensor,
-    mul: torch.Tensor,
-    weight: torch.Tensor,
-    bias: torch.Tensor,
-    eps: float = 1e-6,
-) -> None:
-    return FusedMulPolyNormFunction.apply(x, mul, weight, bias, eps)
-
-
-def rms_norm(
-    x: torch.Tensor,
-    weight: torch.Tensor,
-    eps: float = 1e-6,
-) -> None:
-    return RMSNormFunction.apply(x, weight, eps)
-
-
-def fused_add_rms_norm(
-    x: torch.Tensor,
-    residual: torch.Tensor,
-    weight: torch.Tensor,
-    eps: float = 1e-6,
-) -> None:
-    return FusedAddRMSNormFunction.apply(x, residual, weight, eps)
-
-
-__all__ = [
-    "poly_norm",
-    "fused_mul_poly_norm",
-    "rms_norm",
-    "fused_add_rms_norm",
-    "layers",
-    "ops",
-]
diff --git a/build/torch29-cxx11-cu128-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so b/build/torch29-cxx11-cu128-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so
deleted file mode 100755
index 5f1bfdf80b705c2a5d280c909af62979ea0a11ed..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu128-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:0d117c064c8747f7d05eadf25fc0a99281dc09af1d4567312711c785db9ea503
-size 13770040
diff --git a/build/torch29-cxx11-cu128-x86_64-linux/activation/_ops.py b/build/torch29-cxx11-cu128-x86_64-linux/activation/_ops.py
deleted file mode 100644
index 43d5009fa5c18b4b7de8ca60b6c76352542ac995..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu128-x86_64-linux/activation/_ops.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import torch
-from . import _activation_2f66548_dirty
-ops = torch.ops._activation_2f66548_dirty
-
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_activation_2f66548_dirty::{op_name}"
\ No newline at end of file
diff --git a/build/torch29-cxx11-cu128-x86_64-linux/activation/layers.py b/build/torch29-cxx11-cu128-x86_64-linux/activation/layers.py
deleted file mode 100644
index b1880bdbe8dd73ac76d7d4561cf60f9765097ca9..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu128-x86_64-linux/activation/layers.py
+++ /dev/null
@@ -1,94 +0,0 @@
-import torch
-import torch.nn as nn
-from torch.nn import init
-
-from .poly_norm import FusedMulPolyNormFunction, PolyNormFunction
-from .rms_norm import FusedAddRMSNormFunction, RMSNormFunction
-
-
-class PolyNorm(nn.Module):
-
-    def __init__(self, eps=1e-6, dtype: torch.dtype = torch.float32):
-        super().__init__()
-        self.weight = torch.nn.Parameter(torch.ones(3, dtype=dtype) / 3)
-        self.bias = torch.nn.Parameter(torch.zeros(1, dtype=dtype))
-        self.eps = eps
-
-    def forward(
-        self,
-        x: torch.Tensor,
-    ):
-        return PolyNormFunction.apply(x, self.weight, self.bias, self.eps)
-
-    def reset_parameters(self) -> None:
-        """
-        Resets parameters based on their initialization used in __init__.
-        """
-        init.ones_(self.weight)
-        init.zeros_(self.bias)
-
-
-class FusedMulPolyNorm(nn.Module):
-
-    def __init__(self, eps=1e-6, dtype: torch.dtype = torch.float32):
-        super().__init__()
-        self.weight = torch.nn.Parameter(torch.ones(3, dtype=dtype) / 3)
-        self.bias = torch.nn.Parameter(torch.zeros(1, dtype=dtype))
-        self.eps = eps
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mul: torch.Tensor,
-    ):
-        return FusedMulPolyNormFunction.apply(x, mul, self.weight, self.bias,
-                                              self.eps)
-
-    def reset_parameters(self) -> None:
-        """
-        Resets parameters based on their initialization used in __init__.
-        """
-        init.ones_(self.weight)
-        init.zeros_(self.bias)
-
-
-class RMSNorm(nn.Module):
-
-    def __init__(self, dim: int, eps=1e-6, dtype: torch.dtype = torch.float32):
-        super().__init__()
-        self.weight = torch.nn.Parameter(torch.ones(dim, dtype=dtype))
-        self.eps = eps
-
-    def forward(
-        self,
-        x: torch.Tensor,
-    ):
-        return RMSNormFunction.apply(x, self.weight, self.eps)
-
-    def reset_parameters(self) -> None:
-        """
-        Resets parameters based on their initialization used in __init__.
-        """
-        init.ones_(self.weight)
-
-
-class FusedAddRMSNorm(nn.Module):
-
-    def __init__(self, dim: int, eps=1e-6, dtype: torch.dtype = torch.float32):
-        super().__init__()
-        self.weight = torch.nn.Parameter(torch.ones(dim, dtype=dtype))
-        self.eps = eps
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        residual: torch.Tensor,
-    ):
-        return FusedAddRMSNormFunction.apply(x, residual, self.weight,
-                                             self.eps)
-
-    def reset_parameters(self) -> None:
-        """
-        Resets parameters based on their initialization used in __init__.
-        """
-        init.ones_(self.weight)
diff --git a/build/torch29-cxx11-cu128-x86_64-linux/activation/poly_norm.py b/build/torch29-cxx11-cu128-x86_64-linux/activation/poly_norm.py
deleted file mode 100644
index 8a0fd85f1835e02a36eb9184874d77dcad8221f9..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu128-x86_64-linux/activation/poly_norm.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import torch
-
-from ._ops import ops
-
-
-# Inherit from Function
-class PolyNormFunction(torch.autograd.Function):
-    # Note that forward, setup_context, and backward are @staticmethods
-    @staticmethod
-    def forward(input, weight, bias, eps):
-        output = torch.empty_like(input)
-        ops.poly_norm(output, input, weight, bias, eps)
-        return output
-
-    @staticmethod
-    # inputs is a Tuple of all of the inputs passed to forward.
-    # output is the output of the forward().
-    def setup_context(ctx, inputs, output):
-        input, weight, bias, eps = inputs
-        ctx.save_for_backward(input, weight)
-        ctx.eps = eps
-
-    # This function has only a single output, so it gets only one gradient
-    @staticmethod
-    def backward(ctx, output_grad):
-        input, weight = ctx.saved_tensors
-        eps = ctx.eps
-
-        input_grad = torch.empty_like(
-            input) if ctx.needs_input_grad[0] else None
-        weight_grad = torch.empty_like(
-            weight) if ctx.needs_input_grad[1] else None
-        bias_grad = (torch.empty(1, dtype=weight.dtype, device=weight.device)
-                     if ctx.needs_input_grad[2] else None)
-
-        ops.poly_norm_backward(input_grad, weight_grad, bias_grad, output_grad,
-                               input, weight, eps)
-
-        return input_grad, weight_grad, bias_grad, None
-
-
-class FusedMulPolyNormFunction(torch.autograd.Function):
-    # Note that forward, setup_context, and backward are @staticmethods
-    @staticmethod
-    def forward(input, mul, weight, bias, eps):
-        output = torch.empty_like(input)
-        ops.fused_mul_poly_norm(output, input, mul, weight, bias, eps)
-        return output
-
-    @staticmethod
-    # inputs is a Tuple of all of the inputs passed to forward.
-    # output is the output of the forward().
-    def setup_context(ctx, inputs, output):
-        input, mul, weight, bias, eps = inputs
-        ctx.save_for_backward(input, mul, weight, bias)
-        ctx.eps = eps
-
-    # This function has only a single output, so it gets only one gradient
-    @staticmethod
-    def backward(ctx, output_grad):
-        input, mul, weight, bias = ctx.saved_tensors
-        eps = ctx.eps
-
-        input_grad = torch.empty_like(
-            input) if ctx.needs_input_grad[0] else None
-        mul_grad = torch.empty_like(mul) if ctx.needs_input_grad[1] else None
-        weight_grad = torch.empty_like(
-            weight) if ctx.needs_input_grad[2] else None
-        bias_grad = (torch.empty(1, dtype=weight.dtype, device=weight.device)
-                     if ctx.needs_input_grad[3] else None)
-
-        ops.fused_mul_poly_norm_backward(input_grad, mul_grad, weight_grad,
-                                         bias_grad, output_grad, input, mul,
-                                         weight, bias, eps)
-
-        return input_grad, mul_grad, weight_grad, bias_grad, None
diff --git a/build/torch29-cxx11-cu128-x86_64-linux/activation/rms_norm.py b/build/torch29-cxx11-cu128-x86_64-linux/activation/rms_norm.py
deleted file mode 100644
index 7f9a470d9bb3833083cfa711e9d16c336b73238d..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu128-x86_64-linux/activation/rms_norm.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import torch
-
-from ._ops import ops
-
-
-# Inherit from Function
-class RMSNormFunction(torch.autograd.Function):
-    # Note that forward, setup_context, and backward are @staticmethods
-    @staticmethod
-    def forward(input, weight, eps):
-        output = torch.empty_like(input)
-        ops.rms_norm(output, input, weight, eps)
-        return output
-
-    @staticmethod
-    # inputs is a Tuple of all of the inputs passed to forward.
-    # output is the output of the forward().
-    def setup_context(ctx, inputs, output):
-        input, weight, eps = inputs
-        ctx.save_for_backward(input, weight)
-        ctx.eps = eps
-
-    # This function has only a single output, so it gets only one gradient
-    @staticmethod
-    def backward(ctx, output_grad):
-        input, weight = ctx.saved_tensors
-        eps = ctx.eps
-
-        input_grad = torch.empty_like(
-            input) if ctx.needs_input_grad[0] else None
-        weight_grad = torch.empty_like(
-            weight) if ctx.needs_input_grad[1] else None
-
-        ops.rms_norm_backward(input_grad, weight_grad, output_grad, input,
-                              weight, eps)
-
-        return input_grad, weight_grad, None
-
-
-# Inherit from Function
-class FusedAddRMSNormFunction(torch.autograd.Function):
-    # Note that forward, setup_context, and backward are @staticmethods
-    @staticmethod
-    def forward(input, residual, weight, eps):
-        output = torch.empty_like(input)
-        add_output = torch.empty_like(input)
-        ops.fused_add_rms_norm(output, add_output, input, residual, weight,
-                               eps)
-        return output, add_output
-
-    @staticmethod
-    # inputs is a Tuple of all of the inputs passed to forward.
-    # output is the output of the forward().
-    def setup_context(ctx, inputs, outputs):
-        _, _, weight, eps = inputs
-        _, add_output = outputs
-        ctx.save_for_backward(weight, add_output)
-        ctx.eps = eps
-
-    @staticmethod
-    def backward(ctx, output_grad, add_output_grad):
-        weight, add_output = ctx.saved_tensors
-        eps = ctx.eps
-
-        need_in = ctx.needs_input_grad[0]
-        need_res = ctx.needs_input_grad[1]
-
-        grad = torch.empty_like(output_grad) if need_in or need_res else None
-
-        weight_grad = torch.empty_like(
-            weight) if ctx.needs_input_grad[2] else None
-
-        ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad,
-                                        add_output_grad, add_output, weight,
-                                        eps)
-        input_grad = grad if need_in else None
-        residual_grad = grad if need_res else None
-
-        return input_grad, residual_grad, weight_grad, None
diff --git a/build/torch29-cxx11-cu130-x86_64-linux/activation/__init__.py b/build/torch29-cxx11-cu130-x86_64-linux/activation/__init__.py
deleted file mode 100644
index 938feeff791794d011fec65cf86df957e2c4da2f..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu130-x86_64-linux/activation/__init__.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import torch
-
-from . import layers
-from ._ops import ops
-from .poly_norm import FusedMulPolyNormFunction, PolyNormFunction
-from .rms_norm import FusedAddRMSNormFunction, RMSNormFunction
-
-
-def poly_norm(
-    x: torch.Tensor,
-    weight: torch.Tensor,
-    bias: torch.Tensor,
-    eps: float = 1e-6,
-) -> None:
-    return PolyNormFunction.apply(x, weight, bias, eps)
-
-
-def fused_mul_poly_norm(
-    x: torch.Tensor,
-    mul: torch.Tensor,
-    weight: torch.Tensor,
-    bias: torch.Tensor,
-    eps: float = 1e-6,
-) -> None:
-    return FusedMulPolyNormFunction.apply(x, mul, weight, bias, eps)
-
-
-def rms_norm(
-    x: torch.Tensor,
-    weight: torch.Tensor,
-    eps: float = 1e-6,
-) -> None:
-    return RMSNormFunction.apply(x, weight, eps)
-
-
-def fused_add_rms_norm(
-    x: torch.Tensor,
-    residual: torch.Tensor,
-    weight: torch.Tensor,
-    eps: float = 1e-6,
-) -> None:
-    return FusedAddRMSNormFunction.apply(x, residual, weight, eps)
-
-
-__all__ = [
-    "poly_norm",
-    "fused_mul_poly_norm",
-    "rms_norm",
-    "fused_add_rms_norm",
-    "layers",
-    "ops",
-]
diff --git a/build/torch29-cxx11-cu130-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so b/build/torch29-cxx11-cu130-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so
deleted file mode 100755
index 50672bfe0aa027777c4ad359b7a7054663a86daa..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu130-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:5f5ab76766e053b036b92b81ea832a1604d4d7558457748d17c5928edbdc8fe0
-size 12479496
diff --git a/build/torch29-cxx11-cu130-x86_64-linux/activation/_ops.py b/build/torch29-cxx11-cu130-x86_64-linux/activation/_ops.py
deleted file mode 100644
index 43d5009fa5c18b4b7de8ca60b6c76352542ac995..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu130-x86_64-linux/activation/_ops.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import torch
-from . import _activation_2f66548_dirty
-ops = torch.ops._activation_2f66548_dirty
-
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_activation_2f66548_dirty::{op_name}"
\ No newline at end of file
diff --git a/build/torch29-cxx11-cu130-x86_64-linux/activation/layers.py b/build/torch29-cxx11-cu130-x86_64-linux/activation/layers.py
deleted file mode 100644
index b1880bdbe8dd73ac76d7d4561cf60f9765097ca9..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu130-x86_64-linux/activation/layers.py
+++ /dev/null
@@ -1,94 +0,0 @@
-import torch
-import torch.nn as nn
-from torch.nn import init
-
-from .poly_norm import FusedMulPolyNormFunction, PolyNormFunction
-from .rms_norm import FusedAddRMSNormFunction, RMSNormFunction
-
-
-class PolyNorm(nn.Module):
-
-    def __init__(self, eps=1e-6, dtype: torch.dtype = torch.float32):
-        super().__init__()
-        self.weight = torch.nn.Parameter(torch.ones(3, dtype=dtype) / 3)
-        self.bias = torch.nn.Parameter(torch.zeros(1, dtype=dtype))
-        self.eps = eps
-
-    def forward(
-        self,
-        x: torch.Tensor,
-    ):
-        return PolyNormFunction.apply(x, self.weight, self.bias, self.eps)
-
-    def reset_parameters(self) -> None:
-        """
-        Resets parameters based on their initialization used in __init__.
-        """
-        init.ones_(self.weight)
-        init.zeros_(self.bias)
-
-
-class FusedMulPolyNorm(nn.Module):
-
-    def __init__(self, eps=1e-6, dtype: torch.dtype = torch.float32):
-        super().__init__()
-        self.weight = torch.nn.Parameter(torch.ones(3, dtype=dtype) / 3)
-        self.bias = torch.nn.Parameter(torch.zeros(1, dtype=dtype))
-        self.eps = eps
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mul: torch.Tensor,
-    ):
-        return FusedMulPolyNormFunction.apply(x, mul, self.weight, self.bias,
-                                              self.eps)
-
-    def reset_parameters(self) -> None:
-        """
-        Resets parameters based on their initialization used in __init__.
-        """
-        init.ones_(self.weight)
-        init.zeros_(self.bias)
-
-
-class RMSNorm(nn.Module):
-
-    def __init__(self, dim: int, eps=1e-6, dtype: torch.dtype = torch.float32):
-        super().__init__()
-        self.weight = torch.nn.Parameter(torch.ones(dim, dtype=dtype))
-        self.eps = eps
-
-    def forward(
-        self,
-        x: torch.Tensor,
-    ):
-        return RMSNormFunction.apply(x, self.weight, self.eps)
-
-    def reset_parameters(self) -> None:
-        """
-        Resets parameters based on their initialization used in __init__.
-        """
-        init.ones_(self.weight)
-
-
-class FusedAddRMSNorm(nn.Module):
-
-    def __init__(self, dim: int, eps=1e-6, dtype: torch.dtype = torch.float32):
-        super().__init__()
-        self.weight = torch.nn.Parameter(torch.ones(dim, dtype=dtype))
-        self.eps = eps
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        residual: torch.Tensor,
-    ):
-        return FusedAddRMSNormFunction.apply(x, residual, self.weight,
-                                             self.eps)
-
-    def reset_parameters(self) -> None:
-        """
-        Resets parameters based on their initialization used in __init__.
-        """
-        init.ones_(self.weight)
diff --git a/build/torch29-cxx11-cu130-x86_64-linux/activation/poly_norm.py b/build/torch29-cxx11-cu130-x86_64-linux/activation/poly_norm.py
deleted file mode 100644
index 8a0fd85f1835e02a36eb9184874d77dcad8221f9..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu130-x86_64-linux/activation/poly_norm.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import torch
-
-from ._ops import ops
-
-
-# Inherit from Function
-class PolyNormFunction(torch.autograd.Function):
-    # Note that forward, setup_context, and backward are @staticmethods
-    @staticmethod
-    def forward(input, weight, bias, eps):
-        output = torch.empty_like(input)
-        ops.poly_norm(output, input, weight, bias, eps)
-        return output
-
-    @staticmethod
-    # inputs is a Tuple of all of the inputs passed to forward.
-    # output is the output of the forward().
-    def setup_context(ctx, inputs, output):
-        input, weight, bias, eps = inputs
-        ctx.save_for_backward(input, weight)
-        ctx.eps = eps
-
-    # This function has only a single output, so it gets only one gradient
-    @staticmethod
-    def backward(ctx, output_grad):
-        input, weight = ctx.saved_tensors
-        eps = ctx.eps
-
-        input_grad = torch.empty_like(
-            input) if ctx.needs_input_grad[0] else None
-        weight_grad = torch.empty_like(
-            weight) if ctx.needs_input_grad[1] else None
-        bias_grad = (torch.empty(1, dtype=weight.dtype, device=weight.device)
-                     if ctx.needs_input_grad[2] else None)
-
-        ops.poly_norm_backward(input_grad, weight_grad, bias_grad, output_grad,
-                               input, weight, eps)
-
-        return input_grad, weight_grad, bias_grad, None
-
-
-class FusedMulPolyNormFunction(torch.autograd.Function):
-    # Note that forward, setup_context, and backward are @staticmethods
-    @staticmethod
-    def forward(input, mul, weight, bias, eps):
-        output = torch.empty_like(input)
-        ops.fused_mul_poly_norm(output, input, mul, weight, bias, eps)
-        return output
-
-    @staticmethod
-    # inputs is a Tuple of all of the inputs passed to forward.
-    # output is the output of the forward().
-    def setup_context(ctx, inputs, output):
-        input, mul, weight, bias, eps = inputs
-        ctx.save_for_backward(input, mul, weight, bias)
-        ctx.eps = eps
-
-    # This function has only a single output, so it gets only one gradient
-    @staticmethod
-    def backward(ctx, output_grad):
-        input, mul, weight, bias = ctx.saved_tensors
-        eps = ctx.eps
-
-        input_grad = torch.empty_like(
-            input) if ctx.needs_input_grad[0] else None
-        mul_grad = torch.empty_like(mul) if ctx.needs_input_grad[1] else None
-        weight_grad = torch.empty_like(
-            weight) if ctx.needs_input_grad[2] else None
-        bias_grad = (torch.empty(1, dtype=weight.dtype, device=weight.device)
-                     if ctx.needs_input_grad[3] else None)
-
-        ops.fused_mul_poly_norm_backward(input_grad, mul_grad, weight_grad,
-                                         bias_grad, output_grad, input, mul,
-                                         weight, bias, eps)
-
-        return input_grad, mul_grad, weight_grad, bias_grad, None
diff --git a/build/torch29-cxx11-cu130-x86_64-linux/activation/rms_norm.py b/build/torch29-cxx11-cu130-x86_64-linux/activation/rms_norm.py
deleted file mode 100644
index 7f9a470d9bb3833083cfa711e9d16c336b73238d..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-cu130-x86_64-linux/activation/rms_norm.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import torch
-
-from ._ops import ops
-
-
-# Inherit from Function
-class RMSNormFunction(torch.autograd.Function):
-    # Note that forward, setup_context, and backward are @staticmethods
-    @staticmethod
-    def forward(input, weight, eps):
-        output = torch.empty_like(input)
-        ops.rms_norm(output, input, weight, eps)
-        return output
-
-    @staticmethod
-    # inputs is a Tuple of all of the inputs passed to forward.
-    # output is the output of the forward().
-    def setup_context(ctx, inputs, output):
-        input, weight, eps = inputs
-        ctx.save_for_backward(input, weight)
-        ctx.eps = eps
-
-    # This function has only a single output, so it gets only one gradient
-    @staticmethod
-    def backward(ctx, output_grad):
-        input, weight = ctx.saved_tensors
-        eps = ctx.eps
-
-        input_grad = torch.empty_like(
-            input) if ctx.needs_input_grad[0] else None
-        weight_grad = torch.empty_like(
-            weight) if ctx.needs_input_grad[1] else None
-
-        ops.rms_norm_backward(input_grad, weight_grad, output_grad, input,
-                              weight, eps)
-
-        return input_grad, weight_grad, None
-
-
-# Inherit from Function
-class FusedAddRMSNormFunction(torch.autograd.Function):
-    # Note that forward, setup_context, and backward are @staticmethods
-    @staticmethod
-    def forward(input, residual, weight, eps):
-        output = torch.empty_like(input)
-        add_output = torch.empty_like(input)
-        ops.fused_add_rms_norm(output, add_output, input, residual, weight,
-                               eps)
-        return output, add_output
-
-    @staticmethod
-    # inputs is a Tuple of all of the inputs passed to forward.
-    # output is the output of the forward().
-    def setup_context(ctx, inputs, outputs):
-        _, _, weight, eps = inputs
-        _, add_output = outputs
-        ctx.save_for_backward(weight, add_output)
-        ctx.eps = eps
-
-    @staticmethod
-    def backward(ctx, output_grad, add_output_grad):
-        weight, add_output = ctx.saved_tensors
-        eps = ctx.eps
-
-        need_in = ctx.needs_input_grad[0]
-        need_res = ctx.needs_input_grad[1]
-
-        grad = torch.empty_like(output_grad) if need_in or need_res else None
-
-        weight_grad = torch.empty_like(
-            weight) if ctx.needs_input_grad[2] else None
-
-        ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad,
-                                        add_output_grad, add_output, weight,
-                                        eps)
-        input_grad = grad if need_in else None
-        residual_grad = grad if need_res else None
-
-        return input_grad, residual_grad, weight_grad, None
diff --git a/build/torch29-cxx11-rocm63-x86_64-linux/activation/__init__.py b/build/torch29-cxx11-rocm63-x86_64-linux/activation/__init__.py
deleted file mode 100644
index 938feeff791794d011fec65cf86df957e2c4da2f..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-rocm63-x86_64-linux/activation/__init__.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import torch
-
-from . import layers
-from ._ops import ops
-from .poly_norm import FusedMulPolyNormFunction, PolyNormFunction
-from .rms_norm import FusedAddRMSNormFunction, RMSNormFunction
-
-
-def poly_norm(
-    x: torch.Tensor,
-    weight: torch.Tensor,
-    bias: torch.Tensor,
-    eps: float = 1e-6,
-) -> None:
-    return PolyNormFunction.apply(x, weight, bias, eps)
-
-
-def fused_mul_poly_norm(
-    x: torch.Tensor,
-    mul: torch.Tensor,
-    weight: torch.Tensor,
-    bias: torch.Tensor,
-    eps: float = 1e-6,
-) -> None:
-    return FusedMulPolyNormFunction.apply(x, mul, weight, bias, eps)
-
-
-def rms_norm(
-    x: torch.Tensor,
-    weight: torch.Tensor,
-    eps: float = 1e-6,
-) -> None:
-    return RMSNormFunction.apply(x, weight, eps)
-
-
-def fused_add_rms_norm(
-    x: torch.Tensor,
-    residual: torch.Tensor,
-    weight: torch.Tensor,
-    eps: float = 1e-6,
-) -> None:
-    return FusedAddRMSNormFunction.apply(x, residual, weight, eps)
-
-
-__all__ = [
-    "poly_norm",
-    "fused_mul_poly_norm",
-    "rms_norm",
-    "fused_add_rms_norm",
-    "layers",
-    "ops",
-]
diff --git a/build/torch29-cxx11-rocm63-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so b/build/torch29-cxx11-rocm63-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so
deleted file mode 100755
index 080efed738787f3d76c0883a491d2b363224a892..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-rocm63-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:983cd8035d186be293e3a3e4ab9f6496e3f36f023836ff136a5f71a9c3deca21
-size 2765352
diff --git a/build/torch29-cxx11-rocm63-x86_64-linux/activation/_ops.py b/build/torch29-cxx11-rocm63-x86_64-linux/activation/_ops.py
deleted file mode 100644
index 43d5009fa5c18b4b7de8ca60b6c76352542ac995..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-rocm63-x86_64-linux/activation/_ops.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import torch
-from . import _activation_2f66548_dirty
-ops = torch.ops._activation_2f66548_dirty
-
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_activation_2f66548_dirty::{op_name}"
\ No newline at end of file
diff --git a/build/torch29-cxx11-rocm63-x86_64-linux/activation/layers.py b/build/torch29-cxx11-rocm63-x86_64-linux/activation/layers.py
deleted file mode 100644
index b1880bdbe8dd73ac76d7d4561cf60f9765097ca9..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-rocm63-x86_64-linux/activation/layers.py
+++ /dev/null
@@ -1,94 +0,0 @@
-import torch
-import torch.nn as nn
-from torch.nn import init
-
-from .poly_norm import FusedMulPolyNormFunction, PolyNormFunction
-from .rms_norm import FusedAddRMSNormFunction, RMSNormFunction
-
-
-class PolyNorm(nn.Module):
-
-    def __init__(self, eps=1e-6, dtype: torch.dtype = torch.float32):
-        super().__init__()
-        self.weight = torch.nn.Parameter(torch.ones(3, dtype=dtype) / 3)
-        self.bias = torch.nn.Parameter(torch.zeros(1, dtype=dtype))
-        self.eps = eps
-
-    def forward(
-        self,
-        x: torch.Tensor,
-    ):
-        return PolyNormFunction.apply(x, self.weight, self.bias, self.eps)
-
-    def reset_parameters(self) -> None:
-        """
-        Resets parameters based on their initialization used in __init__.
-        """
-        init.ones_(self.weight)
-        init.zeros_(self.bias)
-
-
-class FusedMulPolyNorm(nn.Module):
-
-    def __init__(self, eps=1e-6, dtype: torch.dtype = torch.float32):
-        super().__init__()
-        self.weight = torch.nn.Parameter(torch.ones(3, dtype=dtype) / 3)
-        self.bias = torch.nn.Parameter(torch.zeros(1, dtype=dtype))
-        self.eps = eps
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mul: torch.Tensor,
-    ):
-        return FusedMulPolyNormFunction.apply(x, mul, self.weight, self.bias,
-                                              self.eps)
-
-    def reset_parameters(self) -> None:
-        """
-        Resets parameters based on their initialization used in __init__.
-        """
-        init.ones_(self.weight)
-        init.zeros_(self.bias)
-
-
-class RMSNorm(nn.Module):
-
-    def __init__(self, dim: int, eps=1e-6, dtype: torch.dtype = torch.float32):
-        super().__init__()
-        self.weight = torch.nn.Parameter(torch.ones(dim, dtype=dtype))
-        self.eps = eps
-
-    def forward(
-        self,
-        x: torch.Tensor,
-    ):
-        return RMSNormFunction.apply(x, self.weight, self.eps)
-
-    def reset_parameters(self) -> None:
-        """
-        Resets parameters based on their initialization used in __init__.
-        """
-        init.ones_(self.weight)
-
-
-class FusedAddRMSNorm(nn.Module):
-
-    def __init__(self, dim: int, eps=1e-6, dtype: torch.dtype = torch.float32):
-        super().__init__()
-        self.weight = torch.nn.Parameter(torch.ones(dim, dtype=dtype))
-        self.eps = eps
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        residual: torch.Tensor,
-    ):
-        return FusedAddRMSNormFunction.apply(x, residual, self.weight,
-                                             self.eps)
-
-    def reset_parameters(self) -> None:
-        """
-        Resets parameters based on their initialization used in __init__.
-        """
-        init.ones_(self.weight)
diff --git a/build/torch29-cxx11-rocm63-x86_64-linux/activation/poly_norm.py b/build/torch29-cxx11-rocm63-x86_64-linux/activation/poly_norm.py
deleted file mode 100644
index 8a0fd85f1835e02a36eb9184874d77dcad8221f9..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-rocm63-x86_64-linux/activation/poly_norm.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import torch
-
-from ._ops import ops
-
-
-# Inherit from Function
-class PolyNormFunction(torch.autograd.Function):
-    # Note that forward, setup_context, and backward are @staticmethods
-    @staticmethod
-    def forward(input, weight, bias, eps):
-        output = torch.empty_like(input)
-        ops.poly_norm(output, input, weight, bias, eps)
-        return output
-
-    @staticmethod
-    # inputs is a Tuple of all of the inputs passed to forward.
-    # output is the output of the forward().
-    def setup_context(ctx, inputs, output):
-        input, weight, bias, eps = inputs
-        ctx.save_for_backward(input, weight)
-        ctx.eps = eps
-
-    # This function has only a single output, so it gets only one gradient
-    @staticmethod
-    def backward(ctx, output_grad):
-        input, weight = ctx.saved_tensors
-        eps = ctx.eps
-
-        input_grad = torch.empty_like(
-            input) if ctx.needs_input_grad[0] else None
-        weight_grad = torch.empty_like(
-            weight) if ctx.needs_input_grad[1] else None
-        bias_grad = (torch.empty(1, dtype=weight.dtype, device=weight.device)
-                     if ctx.needs_input_grad[2] else None)
-
-        ops.poly_norm_backward(input_grad, weight_grad, bias_grad, output_grad,
-                               input, weight, eps)
-
-        return input_grad, weight_grad, bias_grad, None
-
-
-class FusedMulPolyNormFunction(torch.autograd.Function):
-    # Note that forward, setup_context, and backward are @staticmethods
-    @staticmethod
-    def forward(input, mul, weight, bias, eps):
-        output = torch.empty_like(input)
-        ops.fused_mul_poly_norm(output, input, mul, weight, bias, eps)
-        return output
-
-    @staticmethod
-    # inputs is a Tuple of all of the inputs passed to forward.
-    # output is the output of the forward().
-    def setup_context(ctx, inputs, output):
-        input, mul, weight, bias, eps = inputs
-        ctx.save_for_backward(input, mul, weight, bias)
-        ctx.eps = eps
-
-    # This function has only a single output, so it gets only one gradient
-    @staticmethod
-    def backward(ctx, output_grad):
-        input, mul, weight, bias = ctx.saved_tensors
-        eps = ctx.eps
-
-        input_grad = torch.empty_like(
-            input) if ctx.needs_input_grad[0] else None
-        mul_grad = torch.empty_like(mul) if ctx.needs_input_grad[1] else None
-        weight_grad = torch.empty_like(
-            weight) if ctx.needs_input_grad[2] else None
-        bias_grad = (torch.empty(1, dtype=weight.dtype, device=weight.device)
-                     if ctx.needs_input_grad[3] else None)
-
-        ops.fused_mul_poly_norm_backward(input_grad, mul_grad, weight_grad,
-                                         bias_grad, output_grad, input, mul,
-                                         weight, bias, eps)
-
-        return input_grad, mul_grad, weight_grad, bias_grad, None
diff --git a/build/torch29-cxx11-rocm63-x86_64-linux/activation/rms_norm.py b/build/torch29-cxx11-rocm63-x86_64-linux/activation/rms_norm.py
deleted file mode 100644
index 7f9a470d9bb3833083cfa711e9d16c336b73238d..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-rocm63-x86_64-linux/activation/rms_norm.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import torch
-
-from ._ops import ops
-
-
-# Inherit from Function
-class RMSNormFunction(torch.autograd.Function):
-    # Note that forward, setup_context, and backward are @staticmethods
-    @staticmethod
-    def forward(input, weight, eps):
-        output = torch.empty_like(input)
-        ops.rms_norm(output, input, weight, eps)
-        return output
-
-    @staticmethod
-    # inputs is a Tuple of all of the inputs passed to forward.
-    # output is the output of the forward().
-    def setup_context(ctx, inputs, output):
-        input, weight, eps = inputs
-        ctx.save_for_backward(input, weight)
-        ctx.eps = eps
-
-    # This function has only a single output, so it gets only one gradient
-    @staticmethod
-    def backward(ctx, output_grad):
-        input, weight = ctx.saved_tensors
-        eps = ctx.eps
-
-        input_grad = torch.empty_like(
-            input) if ctx.needs_input_grad[0] else None
-        weight_grad = torch.empty_like(
-            weight) if ctx.needs_input_grad[1] else None
-
-        ops.rms_norm_backward(input_grad, weight_grad, output_grad, input,
-                              weight, eps)
-
-        return input_grad, weight_grad, None
-
-
-# Inherit from Function
-class FusedAddRMSNormFunction(torch.autograd.Function):
-    # Note that forward, setup_context, and backward are @staticmethods
-    @staticmethod
-    def forward(input, residual, weight, eps):
-        output = torch.empty_like(input)
-        add_output = torch.empty_like(input)
-        ops.fused_add_rms_norm(output, add_output, input, residual, weight,
-                               eps)
-        return output, add_output
-
-    @staticmethod
-    # inputs is a Tuple of all of the inputs passed to forward.
-    # output is the output of the forward().
-    def setup_context(ctx, inputs, outputs):
-        _, _, weight, eps = inputs
-        _, add_output = outputs
-        ctx.save_for_backward(weight, add_output)
-        ctx.eps = eps
-
-    @staticmethod
-    def backward(ctx, output_grad, add_output_grad):
-        weight, add_output = ctx.saved_tensors
-        eps = ctx.eps
-
-        need_in = ctx.needs_input_grad[0]
-        need_res = ctx.needs_input_grad[1]
-
-        grad = torch.empty_like(output_grad) if need_in or need_res else None
-
-        weight_grad = torch.empty_like(
-            weight) if ctx.needs_input_grad[2] else None
-
-        ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad,
-                                        add_output_grad, add_output, weight,
-                                        eps)
-        input_grad = grad if need_in else None
-        residual_grad = grad if need_res else None
-
-        return input_grad, residual_grad, weight_grad, None
diff --git a/build/torch29-cxx11-rocm64-x86_64-linux/activation/__init__.py b/build/torch29-cxx11-rocm64-x86_64-linux/activation/__init__.py
deleted file mode 100644
index 938feeff791794d011fec65cf86df957e2c4da2f..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-rocm64-x86_64-linux/activation/__init__.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import torch
-
-from . import layers
-from ._ops import ops
-from .poly_norm import FusedMulPolyNormFunction, PolyNormFunction
-from .rms_norm import FusedAddRMSNormFunction, RMSNormFunction
-
-
-def poly_norm(
-    x: torch.Tensor,
-    weight: torch.Tensor,
-    bias: torch.Tensor,
-    eps: float = 1e-6,
-) -> None:
-    return PolyNormFunction.apply(x, weight, bias, eps)
-
-
-def fused_mul_poly_norm(
-    x: torch.Tensor,
-    mul: torch.Tensor,
-    weight: torch.Tensor,
-    bias: torch.Tensor,
-    eps: float = 1e-6,
-) -> None:
-    return FusedMulPolyNormFunction.apply(x, mul, weight, bias, eps)
-
-
-def rms_norm(
-    x: torch.Tensor,
-    weight: torch.Tensor,
-    eps: float = 1e-6,
-) -> None:
-    return RMSNormFunction.apply(x, weight, eps)
-
-
-def fused_add_rms_norm(
-    x: torch.Tensor,
-    residual: torch.Tensor,
-    weight: torch.Tensor,
-    eps: float = 1e-6,
-) -> None:
-    return FusedAddRMSNormFunction.apply(x, residual, weight, eps)
-
-
-__all__ = [
-    "poly_norm",
-    "fused_mul_poly_norm",
-    "rms_norm",
-    "fused_add_rms_norm",
-    "layers",
-    "ops",
-]
diff --git a/build/torch29-cxx11-rocm64-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so b/build/torch29-cxx11-rocm64-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so
deleted file mode 100755
index f0d526869e51aeb490f9e1cb6c1e592c94130250..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-rocm64-x86_64-linux/activation/_activation_2f66548_dirty.abi3.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:dd5655e890b143732fccc63134b02699323959288483c68f7f9149f91f0b1c75
-size 2771056
diff --git a/build/torch29-cxx11-rocm64-x86_64-linux/activation/_ops.py b/build/torch29-cxx11-rocm64-x86_64-linux/activation/_ops.py
deleted file mode 100644
index 43d5009fa5c18b4b7de8ca60b6c76352542ac995..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-rocm64-x86_64-linux/activation/_ops.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import torch
-from . import _activation_2f66548_dirty
-ops = torch.ops._activation_2f66548_dirty
-
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_activation_2f66548_dirty::{op_name}"
\ No newline at end of file
diff --git a/build/torch29-cxx11-rocm64-x86_64-linux/activation/layers.py b/build/torch29-cxx11-rocm64-x86_64-linux/activation/layers.py
deleted file mode 100644
index b1880bdbe8dd73ac76d7d4561cf60f9765097ca9..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-rocm64-x86_64-linux/activation/layers.py
+++ /dev/null
@@ -1,94 +0,0 @@
-import torch
-import torch.nn as nn
-from torch.nn import init
-
-from .poly_norm import FusedMulPolyNormFunction, PolyNormFunction
-from .rms_norm import FusedAddRMSNormFunction, RMSNormFunction
-
-
-class PolyNorm(nn.Module):
-
-    def __init__(self, eps=1e-6, dtype: torch.dtype = torch.float32):
-        super().__init__()
-        self.weight = torch.nn.Parameter(torch.ones(3, dtype=dtype) / 3)
-        self.bias = torch.nn.Parameter(torch.zeros(1, dtype=dtype))
-        self.eps = eps
-
-    def forward(
-        self,
-        x: torch.Tensor,
-    ):
-        return PolyNormFunction.apply(x, self.weight, self.bias, self.eps)
-
-    def reset_parameters(self) -> None:
-        """
-        Resets parameters based on their initialization used in __init__.
-        """
-        init.ones_(self.weight)
-        init.zeros_(self.bias)
-
-
-class FusedMulPolyNorm(nn.Module):
-
-    def __init__(self, eps=1e-6, dtype: torch.dtype = torch.float32):
-        super().__init__()
-        self.weight = torch.nn.Parameter(torch.ones(3, dtype=dtype) / 3)
-        self.bias = torch.nn.Parameter(torch.zeros(1, dtype=dtype))
-        self.eps = eps
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        mul: torch.Tensor,
-    ):
-        return FusedMulPolyNormFunction.apply(x, mul, self.weight, self.bias,
-                                              self.eps)
-
-    def reset_parameters(self) -> None:
-        """
-        Resets parameters based on their initialization used in __init__.
-        """
-        init.ones_(self.weight)
-        init.zeros_(self.bias)
-
-
-class RMSNorm(nn.Module):
-
-    def __init__(self, dim: int, eps=1e-6, dtype: torch.dtype = torch.float32):
-        super().__init__()
-        self.weight = torch.nn.Parameter(torch.ones(dim, dtype=dtype))
-        self.eps = eps
-
-    def forward(
-        self,
-        x: torch.Tensor,
-    ):
-        return RMSNormFunction.apply(x, self.weight, self.eps)
-
-    def reset_parameters(self) -> None:
-        """
-        Resets parameters based on their initialization used in __init__.
-        """
-        init.ones_(self.weight)
-
-
-class FusedAddRMSNorm(nn.Module):
-
-    def __init__(self, dim: int, eps=1e-6, dtype: torch.dtype = torch.float32):
-        super().__init__()
-        self.weight = torch.nn.Parameter(torch.ones(dim, dtype=dtype))
-        self.eps = eps
-
-    def forward(
-        self,
-        x: torch.Tensor,
-        residual: torch.Tensor,
-    ):
-        return FusedAddRMSNormFunction.apply(x, residual, self.weight,
-                                             self.eps)
-
-    def reset_parameters(self) -> None:
-        """
-        Resets parameters based on their initialization used in __init__.
-        """
-        init.ones_(self.weight)
diff --git a/build/torch29-cxx11-rocm64-x86_64-linux/activation/poly_norm.py b/build/torch29-cxx11-rocm64-x86_64-linux/activation/poly_norm.py
deleted file mode 100644
index 8a0fd85f1835e02a36eb9184874d77dcad8221f9..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-rocm64-x86_64-linux/activation/poly_norm.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import torch
-
-from ._ops import ops
-
-
-# Inherit from Function
-class PolyNormFunction(torch.autograd.Function):
-    # Note that forward, setup_context, and backward are @staticmethods
-    @staticmethod
-    def forward(input, weight, bias, eps):
-        output = torch.empty_like(input)
-        ops.poly_norm(output, input, weight, bias, eps)
-        return output
-
-    @staticmethod
-    # inputs is a Tuple of all of the inputs passed to forward.
-    # output is the output of the forward().
-    def setup_context(ctx, inputs, output):
-        input, weight, bias, eps = inputs
-        ctx.save_for_backward(input, weight)
-        ctx.eps = eps
-
-    # This function has only a single output, so it gets only one gradient
-    @staticmethod
-    def backward(ctx, output_grad):
-        input, weight = ctx.saved_tensors
-        eps = ctx.eps
-
-        input_grad = torch.empty_like(
-            input) if ctx.needs_input_grad[0] else None
-        weight_grad = torch.empty_like(
-            weight) if ctx.needs_input_grad[1] else None
-        bias_grad = (torch.empty(1, dtype=weight.dtype, device=weight.device)
-                     if ctx.needs_input_grad[2] else None)
-
-        ops.poly_norm_backward(input_grad, weight_grad, bias_grad, output_grad,
-                               input, weight, eps)
-
-        return input_grad, weight_grad, bias_grad, None
-
-
-class FusedMulPolyNormFunction(torch.autograd.Function):
-    # Note that forward, setup_context, and backward are @staticmethods
-    @staticmethod
-    def forward(input, mul, weight, bias, eps):
-        output = torch.empty_like(input)
-        ops.fused_mul_poly_norm(output, input, mul, weight, bias, eps)
-        return output
-
-    @staticmethod
-    # inputs is a Tuple of all of the inputs passed to forward.
-    # output is the output of the forward().
-    def setup_context(ctx, inputs, output):
-        input, mul, weight, bias, eps = inputs
-        ctx.save_for_backward(input, mul, weight, bias)
-        ctx.eps = eps
-
-    # This function has only a single output, so it gets only one gradient
-    @staticmethod
-    def backward(ctx, output_grad):
-        input, mul, weight, bias = ctx.saved_tensors
-        eps = ctx.eps
-
-        input_grad = torch.empty_like(
-            input) if ctx.needs_input_grad[0] else None
-        mul_grad = torch.empty_like(mul) if ctx.needs_input_grad[1] else None
-        weight_grad = torch.empty_like(
-            weight) if ctx.needs_input_grad[2] else None
-        bias_grad = (torch.empty(1, dtype=weight.dtype, device=weight.device)
-                     if ctx.needs_input_grad[3] else None)
-
-        ops.fused_mul_poly_norm_backward(input_grad, mul_grad, weight_grad,
-                                         bias_grad, output_grad, input, mul,
-                                         weight, bias, eps)
-
-        return input_grad, mul_grad, weight_grad, bias_grad, None
diff --git a/build/torch29-cxx11-rocm64-x86_64-linux/activation/rms_norm.py b/build/torch29-cxx11-rocm64-x86_64-linux/activation/rms_norm.py
deleted file mode 100644
index 7f9a470d9bb3833083cfa711e9d16c336b73238d..0000000000000000000000000000000000000000
--- a/build/torch29-cxx11-rocm64-x86_64-linux/activation/rms_norm.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import torch
-
-from ._ops import ops
-
-
-# Inherit from Function
-class RMSNormFunction(torch.autograd.Function):
-    # Note that forward, setup_context, and backward are @staticmethods
-    @staticmethod
-    def forward(input, weight, eps):
-        output = torch.empty_like(input)
-        ops.rms_norm(output, input, weight, eps)
-        return output
-
-    @staticmethod
-    # inputs is a Tuple of all of the inputs passed to forward.
-    # output is the output of the forward().
-    def setup_context(ctx, inputs, output):
-        input, weight, eps = inputs
-        ctx.save_for_backward(input, weight)
-        ctx.eps = eps
-
-    # This function has only a single output, so it gets only one gradient
-    @staticmethod
-    def backward(ctx, output_grad):
-        input, weight = ctx.saved_tensors
-        eps = ctx.eps
-
-        input_grad = torch.empty_like(
-            input) if ctx.needs_input_grad[0] else None
-        weight_grad = torch.empty_like(
-            weight) if ctx.needs_input_grad[1] else None
-
-        ops.rms_norm_backward(input_grad, weight_grad, output_grad, input,
-                              weight, eps)
-
-        return input_grad, weight_grad, None
-
-
-# Inherit from Function
-class FusedAddRMSNormFunction(torch.autograd.Function):
-    # Note that forward, setup_context, and backward are @staticmethods
-    @staticmethod
-    def forward(input, residual, weight, eps):
-        output = torch.empty_like(input)
-        add_output = torch.empty_like(input)
-        ops.fused_add_rms_norm(output, add_output, input, residual, weight,
-                               eps)
-        return output, add_output
-
-    @staticmethod
-    # inputs is a Tuple of all of the inputs passed to forward.
-    # output is the output of the forward().
-    def setup_context(ctx, inputs, outputs):
-        _, _, weight, eps = inputs
-        _, add_output = outputs
-        ctx.save_for_backward(weight, add_output)
-        ctx.eps = eps
-
-    @staticmethod
-    def backward(ctx, output_grad, add_output_grad):
-        weight, add_output = ctx.saved_tensors
-        eps = ctx.eps
-
-        need_in = ctx.needs_input_grad[0]
-        need_res = ctx.needs_input_grad[1]
-
-        grad = torch.empty_like(output_grad) if need_in or need_res else None
-
-        weight_grad = torch.empty_like(
-            weight) if ctx.needs_input_grad[2] else None
-
-        ops.fused_add_rms_norm_backward(grad, weight_grad, output_grad,
-                                        add_output_grad, add_output, weight,
-                                        eps)
-        input_grad = grad if need_in else None
-        residual_grad = grad if need_res else None
-
-        return input_grad, residual_grad, weight_grad, None