medmekk commited on Sep 22, 2025

Commit

5d4178a

1 Parent(s): f622ea1

Add Builds

Browse files

Files changed (25) hide show

build.toml +5 -1
build/torch27-cxx11-cu118-x86_64-linux/layer_norm/{_layer_norm_4e9c226_dirty.abi3.so → _layer_norm_f622ea1_dirty.abi3.so} +2 -2
build/torch27-cxx11-cu118-x86_64-linux/layer_norm/_ops.py +3 -3
build/torch27-cxx11-cu118-x86_64-linux/layer_norm/layers.py +7 -5
build/torch27-cxx11-cu126-x86_64-linux/layer_norm/{_layer_norm_4e9c226_dirty.abi3.so → _layer_norm_f622ea1_dirty.abi3.so} +2 -2
build/torch27-cxx11-cu126-x86_64-linux/layer_norm/_ops.py +3 -3
build/torch27-cxx11-cu126-x86_64-linux/layer_norm/layers.py +7 -5
build/{torch28-cxx11-cu126-x86_64-linux/layer_norm/_layer_norm_4e9c226_dirty.abi3.so → torch27-cxx11-cu128-x86_64-linux/layer_norm/_layer_norm_f622ea1_dirty.abi3.so} +2 -2
build/torch27-cxx11-cu128-x86_64-linux/layer_norm/_ops.py +3 -3
build/torch27-cxx11-cu128-x86_64-linux/layer_norm/layers.py +7 -5
build/{torch27-cxx11-cu128-x86_64-linux/layer_norm/_layer_norm_4e9c226_dirty.abi3.so → torch28-cxx11-cu126-x86_64-linux/layer_norm/_layer_norm_f622ea1_dirty.abi3.so} +2 -2
build/torch28-cxx11-cu126-x86_64-linux/layer_norm/_ops.py +3 -3
build/torch28-cxx11-cu126-x86_64-linux/layer_norm/layers.py +7 -5
build/torch28-cxx11-cu128-x86_64-linux/layer_norm/_layer_norm_4e9c226_dirty.abi3.so +0 -3
build/torch28-cxx11-cu128-x86_64-linux/layer_norm/_layer_norm_f622ea1_dirty.abi3.so +3 -0
build/torch28-cxx11-cu128-x86_64-linux/layer_norm/_ops.py +3 -3
build/torch28-cxx11-cu128-x86_64-linux/layer_norm/layers.py +7 -5
build/torch28-cxx11-cu129-x86_64-linux/layer_norm/_layer_norm_4e9c226_dirty.abi3.so +0 -3
build/torch28-cxx11-cu129-x86_64-linux/layer_norm/_layer_norm_f622ea1_dirty.abi3.so +3 -0
build/torch28-cxx11-cu129-x86_64-linux/layer_norm/_ops.py +3 -3
build/torch28-cxx11-cu129-x86_64-linux/layer_norm/layers.py +7 -5
torch-ext/layer_norm/_layer_norm_711aa42_dirty.abi3.so +0 -3
torch-ext/layer_norm/_ops.py +0 -9
torch-ext/layer_norm/layers.py +7 -5
torch-ext/registration.h +0 -30

build.toml CHANGED Viewed

@@ -12,7 +12,11 @@ src = [
 depends = ["torch"]
 backend = "cuda"
 cuda-capabilities = [
-    "9.0"
 ]
 include = ["."]
 src = [

 depends = ["torch"]
 backend = "cuda"
 cuda-capabilities = [
+    "8.0",
+    "8.9",
+    "9.0",
+    "10.0",
+    "12.0",
 ]
 include = ["."]
 src = [

build/torch27-cxx11-cu118-x86_64-linux/layer_norm/{_layer_norm_4e9c226_dirty.abi3.so → _layer_norm_f622ea1_dirty.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:34e4a57b8d721c4dafb541a81e161435d25198632e3e4c8e2bc66c17eccc236f
-size 248321384

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe0515daaf1bbfd1246d18bd5c1a5cd6f366059090a8b6e402955d06caaa6392
+size 716945976

build/torch27-cxx11-cu118-x86_64-linux/layer_norm/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _layer_norm_4e9c226_dirty
-ops = torch.ops._layer_norm_4e9c226_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_layer_norm_4e9c226_dirty::{op_name}"

 import torch
+from . import _layer_norm_f622ea1_dirty
+ops = torch.ops._layer_norm_f622ea1_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_layer_norm_f622ea1_dirty::{op_name}"

build/torch27-cxx11-cu118-x86_64-linux/layer_norm/layers.py CHANGED Viewed

@@ -9,8 +9,8 @@ class LayerNorm(nn.Module):
     variance_epsilon: float
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        return ops.dropout_add_ln_fwd(
-            hidden_states,
             gamma = self.weight,
             beta = None,
             rowscale = None,
@@ -25,14 +25,15 @@ class LayerNorm(nn.Module):
             residual_in_fp32 = False,
             is_rms_norm = False,
         )
 class LlamaRMSNorm(nn.Module):
     weight: torch.Tensor
     variance_epsilon: float
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        return ops.dropout_add_ln_fwd(
-            hidden_states,
             gamma = self.weight,
             beta = None,
             rowscale = None,
@@ -46,4 +47,5 @@ class LlamaRMSNorm(nn.Module):
             gen = None,
             residual_in_fp32 = False,
             is_rms_norm = True,
-        )

     variance_epsilon: float
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        output = ops.dropout_add_ln_fwd(
+            hidden_states.view(hidden_states.shape[0], -1),
             gamma = self.weight,
             beta = None,
             rowscale = None,
             residual_in_fp32 = False,
             is_rms_norm = False,
         )
+        return output[0].view(hidden_states.shape)
 class LlamaRMSNorm(nn.Module):
     weight: torch.Tensor
     variance_epsilon: float
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        output = ops.dropout_add_ln_fwd(
+            hidden_states.view(hidden_states.shape[0], -1),
             gamma = self.weight,
             beta = None,
             rowscale = None,
             gen = None,
             residual_in_fp32 = False,
             is_rms_norm = True,
+        )
+        return output[0].view(hidden_states.shape)

build/torch27-cxx11-cu126-x86_64-linux/layer_norm/{_layer_norm_4e9c226_dirty.abi3.so → _layer_norm_f622ea1_dirty.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f541911e5471865e47faf1641da36bcee3b206aa4993949a3cac966c3b936d27
-size 247115320

 version https://git-lfs.github.com/spec/v1
+oid sha256:04095de2e4bf9cd03f9ec481084d0c9e9e0baa0bab17a0ec9715f22f69bdfd33
+size 712024848

build/torch27-cxx11-cu126-x86_64-linux/layer_norm/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _layer_norm_4e9c226_dirty
-ops = torch.ops._layer_norm_4e9c226_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_layer_norm_4e9c226_dirty::{op_name}"

 import torch
+from . import _layer_norm_f622ea1_dirty
+ops = torch.ops._layer_norm_f622ea1_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_layer_norm_f622ea1_dirty::{op_name}"

build/torch27-cxx11-cu126-x86_64-linux/layer_norm/layers.py CHANGED Viewed

@@ -9,8 +9,8 @@ class LayerNorm(nn.Module):
     variance_epsilon: float
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        return ops.dropout_add_ln_fwd(
-            hidden_states,
             gamma = self.weight,
             beta = None,
             rowscale = None,
@@ -25,14 +25,15 @@ class LayerNorm(nn.Module):
             residual_in_fp32 = False,
             is_rms_norm = False,
         )
 class LlamaRMSNorm(nn.Module):
     weight: torch.Tensor
     variance_epsilon: float
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        return ops.dropout_add_ln_fwd(
-            hidden_states,
             gamma = self.weight,
             beta = None,
             rowscale = None,
@@ -46,4 +47,5 @@ class LlamaRMSNorm(nn.Module):
             gen = None,
             residual_in_fp32 = False,
             is_rms_norm = True,
-        )

     variance_epsilon: float
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        output = ops.dropout_add_ln_fwd(
+            hidden_states.view(hidden_states.shape[0], -1),
             gamma = self.weight,
             beta = None,
             rowscale = None,
             residual_in_fp32 = False,
             is_rms_norm = False,
         )
+        return output[0].view(hidden_states.shape)
 class LlamaRMSNorm(nn.Module):
     weight: torch.Tensor
     variance_epsilon: float
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        output = ops.dropout_add_ln_fwd(
+            hidden_states.view(hidden_states.shape[0], -1),
             gamma = self.weight,
             beta = None,
             rowscale = None,
             gen = None,
             residual_in_fp32 = False,
             is_rms_norm = True,
+        )
+        return output[0].view(hidden_states.shape)

build/{torch28-cxx11-cu126-x86_64-linux/layer_norm/_layer_norm_4e9c226_dirty.abi3.so → torch27-cxx11-cu128-x86_64-linux/layer_norm/_layer_norm_f622ea1_dirty.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5b28a4d7885c08614b479490306561990c4cf6e5958dedd5ce59c2ee10bd0f0a
-size 247115408

 version https://git-lfs.github.com/spec/v1
+oid sha256:ae0d54be8ee4e3ae33f47f0b27243c9cbd5668ff7756b1dfb5dcd9e2430f5a35
+size 1231333392

build/torch27-cxx11-cu128-x86_64-linux/layer_norm/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _layer_norm_4e9c226_dirty
-ops = torch.ops._layer_norm_4e9c226_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_layer_norm_4e9c226_dirty::{op_name}"

 import torch
+from . import _layer_norm_f622ea1_dirty
+ops = torch.ops._layer_norm_f622ea1_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_layer_norm_f622ea1_dirty::{op_name}"

build/torch27-cxx11-cu128-x86_64-linux/layer_norm/layers.py CHANGED Viewed

@@ -9,8 +9,8 @@ class LayerNorm(nn.Module):
     variance_epsilon: float
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        return ops.dropout_add_ln_fwd(
-            hidden_states,
             gamma = self.weight,
             beta = None,
             rowscale = None,
@@ -25,14 +25,15 @@ class LayerNorm(nn.Module):
             residual_in_fp32 = False,
             is_rms_norm = False,
         )
 class LlamaRMSNorm(nn.Module):
     weight: torch.Tensor
     variance_epsilon: float
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        return ops.dropout_add_ln_fwd(
-            hidden_states,
             gamma = self.weight,
             beta = None,
             rowscale = None,
@@ -46,4 +47,5 @@ class LlamaRMSNorm(nn.Module):
             gen = None,
             residual_in_fp32 = False,
             is_rms_norm = True,
-        )

     variance_epsilon: float
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        output = ops.dropout_add_ln_fwd(
+            hidden_states.view(hidden_states.shape[0], -1),
             gamma = self.weight,
             beta = None,
             rowscale = None,
             residual_in_fp32 = False,
             is_rms_norm = False,
         )
+        return output[0].view(hidden_states.shape)
 class LlamaRMSNorm(nn.Module):
     weight: torch.Tensor
     variance_epsilon: float
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        output = ops.dropout_add_ln_fwd(
+            hidden_states.view(hidden_states.shape[0], -1),
             gamma = self.weight,
             beta = None,
             rowscale = None,
             gen = None,
             residual_in_fp32 = False,
             is_rms_norm = True,
+        )
+        return output[0].view(hidden_states.shape)

build/{torch27-cxx11-cu128-x86_64-linux/layer_norm/_layer_norm_4e9c226_dirty.abi3.so → torch28-cxx11-cu126-x86_64-linux/layer_norm/_layer_norm_f622ea1_dirty.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7db683e74d55a1a71dc520a504521af3f08fb07724675d2097ce3d4ab3481e3d
-size 246751936

 version https://git-lfs.github.com/spec/v1
+oid sha256:12b6de6cef24c5ee7a390d91ee2ea7069533e66440cf78ae5df7ae3beff5c1ca
+size 712024936

build/torch28-cxx11-cu126-x86_64-linux/layer_norm/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _layer_norm_4e9c226_dirty
-ops = torch.ops._layer_norm_4e9c226_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_layer_norm_4e9c226_dirty::{op_name}"

 import torch
+from . import _layer_norm_f622ea1_dirty
+ops = torch.ops._layer_norm_f622ea1_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_layer_norm_f622ea1_dirty::{op_name}"

build/torch28-cxx11-cu126-x86_64-linux/layer_norm/layers.py CHANGED Viewed

@@ -9,8 +9,8 @@ class LayerNorm(nn.Module):
     variance_epsilon: float
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        return ops.dropout_add_ln_fwd(
-            hidden_states,
             gamma = self.weight,
             beta = None,
             rowscale = None,
@@ -25,14 +25,15 @@ class LayerNorm(nn.Module):
             residual_in_fp32 = False,
             is_rms_norm = False,
         )
 class LlamaRMSNorm(nn.Module):
     weight: torch.Tensor
     variance_epsilon: float
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        return ops.dropout_add_ln_fwd(
-            hidden_states,
             gamma = self.weight,
             beta = None,
             rowscale = None,
@@ -46,4 +47,5 @@ class LlamaRMSNorm(nn.Module):
             gen = None,
             residual_in_fp32 = False,
             is_rms_norm = True,
-        )

     variance_epsilon: float
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        output = ops.dropout_add_ln_fwd(
+            hidden_states.view(hidden_states.shape[0], -1),
             gamma = self.weight,
             beta = None,
             rowscale = None,
             residual_in_fp32 = False,
             is_rms_norm = False,
         )
+        return output[0].view(hidden_states.shape)
 class LlamaRMSNorm(nn.Module):
     weight: torch.Tensor
     variance_epsilon: float
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        output = ops.dropout_add_ln_fwd(
+            hidden_states.view(hidden_states.shape[0], -1),
             gamma = self.weight,
             beta = None,
             rowscale = None,
             gen = None,
             residual_in_fp32 = False,
             is_rms_norm = True,
+        )
+        return output[0].view(hidden_states.shape)

build/torch28-cxx11-cu128-x86_64-linux/layer_norm/_layer_norm_4e9c226_dirty.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:69c897ea7e96a6988909ac3878f74baa2b598b0301a2ee3227f9f1c9804fb64d
-size 246756512

build/torch28-cxx11-cu128-x86_64-linux/layer_norm/_layer_norm_f622ea1_dirty.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d51ec6b6da7095cf5fc18493eb4b0b1c20485f01dff4b38370979ea3d0a9dd60
+size 1231337968

build/torch28-cxx11-cu128-x86_64-linux/layer_norm/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _layer_norm_4e9c226_dirty
-ops = torch.ops._layer_norm_4e9c226_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_layer_norm_4e9c226_dirty::{op_name}"

 import torch
+from . import _layer_norm_f622ea1_dirty
+ops = torch.ops._layer_norm_f622ea1_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_layer_norm_f622ea1_dirty::{op_name}"

build/torch28-cxx11-cu128-x86_64-linux/layer_norm/layers.py CHANGED Viewed

@@ -9,8 +9,8 @@ class LayerNorm(nn.Module):
     variance_epsilon: float
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        return ops.dropout_add_ln_fwd(
-            hidden_states,
             gamma = self.weight,
             beta = None,
             rowscale = None,
@@ -25,14 +25,15 @@ class LayerNorm(nn.Module):
             residual_in_fp32 = False,
             is_rms_norm = False,
         )
 class LlamaRMSNorm(nn.Module):
     weight: torch.Tensor
     variance_epsilon: float
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        return ops.dropout_add_ln_fwd(
-            hidden_states,
             gamma = self.weight,
             beta = None,
             rowscale = None,
@@ -46,4 +47,5 @@ class LlamaRMSNorm(nn.Module):
             gen = None,
             residual_in_fp32 = False,
             is_rms_norm = True,
-        )

     variance_epsilon: float
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        output = ops.dropout_add_ln_fwd(
+            hidden_states.view(hidden_states.shape[0], -1),
             gamma = self.weight,
             beta = None,
             rowscale = None,
             residual_in_fp32 = False,
             is_rms_norm = False,
         )
+        return output[0].view(hidden_states.shape)
 class LlamaRMSNorm(nn.Module):
     weight: torch.Tensor
     variance_epsilon: float
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        output = ops.dropout_add_ln_fwd(
+            hidden_states.view(hidden_states.shape[0], -1),
             gamma = self.weight,
             beta = None,
             rowscale = None,
             gen = None,
             residual_in_fp32 = False,
             is_rms_norm = True,
+        )
+        return output[0].view(hidden_states.shape)

build/torch28-cxx11-cu129-x86_64-linux/layer_norm/_layer_norm_4e9c226_dirty.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:594fd2ab65b273a4fee370bab7e03cb79cbc9c320eb37364466940a60ef154fa
-size 248443760

build/torch28-cxx11-cu129-x86_64-linux/layer_norm/_layer_norm_f622ea1_dirty.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9080934ece3b5e09db6178b1baa15b8baf9f6873e234a951a2122071e1190fba
+size 1283037376

build/torch28-cxx11-cu129-x86_64-linux/layer_norm/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _layer_norm_4e9c226_dirty
-ops = torch.ops._layer_norm_4e9c226_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_layer_norm_4e9c226_dirty::{op_name}"

 import torch
+from . import _layer_norm_f622ea1_dirty
+ops = torch.ops._layer_norm_f622ea1_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_layer_norm_f622ea1_dirty::{op_name}"

build/torch28-cxx11-cu129-x86_64-linux/layer_norm/layers.py CHANGED Viewed

@@ -9,8 +9,8 @@ class LayerNorm(nn.Module):
     variance_epsilon: float
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        return ops.dropout_add_ln_fwd(
-            hidden_states,
             gamma = self.weight,
             beta = None,
             rowscale = None,
@@ -25,14 +25,15 @@ class LayerNorm(nn.Module):
             residual_in_fp32 = False,
             is_rms_norm = False,
         )
 class LlamaRMSNorm(nn.Module):
     weight: torch.Tensor
     variance_epsilon: float
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        return ops.dropout_add_ln_fwd(
-            hidden_states,
             gamma = self.weight,
             beta = None,
             rowscale = None,
@@ -46,4 +47,5 @@ class LlamaRMSNorm(nn.Module):
             gen = None,
             residual_in_fp32 = False,
             is_rms_norm = True,
-        )

     variance_epsilon: float
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        output = ops.dropout_add_ln_fwd(
+            hidden_states.view(hidden_states.shape[0], -1),
             gamma = self.weight,
             beta = None,
             rowscale = None,
             residual_in_fp32 = False,
             is_rms_norm = False,
         )
+        return output[0].view(hidden_states.shape)
 class LlamaRMSNorm(nn.Module):
     weight: torch.Tensor
     variance_epsilon: float
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        output = ops.dropout_add_ln_fwd(
+            hidden_states.view(hidden_states.shape[0], -1),
             gamma = self.weight,
             beta = None,
             rowscale = None,
             gen = None,
             residual_in_fp32 = False,
             is_rms_norm = True,
+        )
+        return output[0].view(hidden_states.shape)

torch-ext/layer_norm/_layer_norm_711aa42_dirty.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c824a0d2b400f4a89ccf293975ccfedc32733174dad4386a402149c440946674
-size 247782208

torch-ext/layer_norm/_ops.py DELETED Viewed

@@ -1,9 +0,0 @@
-import torch
-from . import _layer_norm_711aa42_dirty
-ops = torch.ops._layer_norm_711aa42_dirty
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_layer_norm_711aa42_dirty::{op_name}"

torch-ext/layer_norm/layers.py CHANGED Viewed

@@ -9,8 +9,8 @@ class LayerNorm(nn.Module):
     variance_epsilon: float
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        return ops.dropout_add_ln_fwd(
-            hidden_states,
             gamma = self.weight,
             beta = None,
             rowscale = None,
@@ -25,14 +25,15 @@ class LayerNorm(nn.Module):
             residual_in_fp32 = False,
             is_rms_norm = False,
         )
 class LlamaRMSNorm(nn.Module):
     weight: torch.Tensor
     variance_epsilon: float
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        return ops.dropout_add_ln_fwd(
-            hidden_states,
             gamma = self.weight,
             beta = None,
             rowscale = None,
@@ -46,4 +47,5 @@ class LlamaRMSNorm(nn.Module):
             gen = None,
             residual_in_fp32 = False,
             is_rms_norm = True,
-        )

     variance_epsilon: float
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        output = ops.dropout_add_ln_fwd(
+            hidden_states.view(hidden_states.shape[0], -1),
             gamma = self.weight,
             beta = None,
             rowscale = None,
             residual_in_fp32 = False,
             is_rms_norm = False,
         )
+        return output[0].view(hidden_states.shape)
 class LlamaRMSNorm(nn.Module):
     weight: torch.Tensor
     variance_epsilon: float
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        output = ops.dropout_add_ln_fwd(
+            hidden_states.view(hidden_states.shape[0], -1),
             gamma = self.weight,
             beta = None,
             rowscale = None,
             gen = None,
             residual_in_fp32 = False,
             is_rms_norm = True,
+        )
+        return output[0].view(hidden_states.shape)

torch-ext/registration.h DELETED Viewed

@@ -1,30 +0,0 @@
-// Registration macros from vLLM:
-// https://github.com/vllm-project/vllm/blob/main/csrc/core/registration.h
-#pragma once
-#include <Python.h>
-#define _CONCAT(A, B) A##B
-#define CONCAT(A, B) _CONCAT(A, B)
-#define _STRINGIFY(A) #A
-#define STRINGIFY(A) _STRINGIFY(A)
-// A version of the TORCH_LIBRARY macro that expands the NAME, i.e. so NAME
-// could be a macro instead of a literal token.
-#define TORCH_LIBRARY_EXPAND(NAME, MODULE) TORCH_LIBRARY(NAME, MODULE)
-// A version of the TORCH_LIBRARY_IMPL macro that expands the NAME, i.e. so NAME
-// could be a macro instead of a literal token.
-#define TORCH_LIBRARY_IMPL_EXPAND(NAME, DEVICE, MODULE) \
-  TORCH_LIBRARY_IMPL(NAME, DEVICE, MODULE)
-// REGISTER_EXTENSION allows the shared library to be loaded and initialized
-// via python's import statement.
-#define REGISTER_EXTENSION(NAME)                                               \
-  PyMODINIT_FUNC CONCAT(PyInit_, NAME)() {                                     \
-    static struct PyModuleDef module = {PyModuleDef_HEAD_INIT,                 \
-                                        STRINGIFY(NAME), nullptr, 0, nullptr}; \
-    return PyModule_Create(&module);                                           \
-  }