First builds
Browse files- .gitattributes +1 -0
- build/torch27-cxx11-cu118-x86_64-linux/rmsnorm_kernel/__init__.py +21 -0
- build/torch27-cxx11-cu118-x86_64-linux/rmsnorm_kernel/__pycache__/__init__.cpython-313.pyc +0 -0
- build/torch27-cxx11-cu118-x86_64-linux/rmsnorm_kernel/__pycache__/_ops.cpython-313.pyc +0 -0
- build/torch27-cxx11-cu118-x86_64-linux/rmsnorm_kernel/_ops.py +9 -0
- build/torch27-cxx11-cu118-x86_64-linux/rmsnorm_kernel/_rmsnorm_kernel_538355f_dirty.abi3.so +3 -0
- build/torch27-cxx11-cu126-x86_64-linux/rmsnorm_kernel/__init__.py +21 -0
- build/torch27-cxx11-cu126-x86_64-linux/rmsnorm_kernel/__pycache__/__init__.cpython-313.pyc +0 -0
- build/torch27-cxx11-cu126-x86_64-linux/rmsnorm_kernel/__pycache__/_ops.cpython-313.pyc +0 -0
- build/torch27-cxx11-cu126-x86_64-linux/rmsnorm_kernel/_ops.py +9 -0
- build/torch27-cxx11-cu126-x86_64-linux/rmsnorm_kernel/_rmsnorm_kernel_538355f_dirty.abi3.so +3 -0
- build/torch27-cxx11-cu128-x86_64-linux/rmsnorm_kernel/__init__.py +21 -0
- build/torch27-cxx11-cu128-x86_64-linux/rmsnorm_kernel/__pycache__/__init__.cpython-313.pyc +0 -0
- build/torch27-cxx11-cu128-x86_64-linux/rmsnorm_kernel/__pycache__/_ops.cpython-313.pyc +0 -0
- build/torch27-cxx11-cu128-x86_64-linux/rmsnorm_kernel/_ops.py +9 -0
- build/torch27-cxx11-cu128-x86_64-linux/rmsnorm_kernel/_rmsnorm_kernel_538355f_dirty.abi3.so +3 -0
- build/torch28-cxx11-cu126-x86_64-linux/rmsnorm_kernel/__init__.py +21 -0
- build/torch28-cxx11-cu126-x86_64-linux/rmsnorm_kernel/__pycache__/__init__.cpython-313.pyc +0 -0
- build/torch28-cxx11-cu126-x86_64-linux/rmsnorm_kernel/__pycache__/_ops.cpython-313.pyc +0 -0
- build/torch28-cxx11-cu126-x86_64-linux/rmsnorm_kernel/_ops.py +9 -0
- build/torch28-cxx11-cu126-x86_64-linux/rmsnorm_kernel/_rmsnorm_kernel_538355f_dirty.abi3.so +3 -0
- build/torch28-cxx11-cu128-x86_64-linux/rmsnorm_kernel/__init__.py +21 -0
- build/torch28-cxx11-cu128-x86_64-linux/rmsnorm_kernel/__pycache__/__init__.cpython-313.pyc +0 -0
- build/torch28-cxx11-cu128-x86_64-linux/rmsnorm_kernel/__pycache__/_ops.cpython-313.pyc +0 -0
- build/torch28-cxx11-cu128-x86_64-linux/rmsnorm_kernel/_ops.py +9 -0
- build/torch28-cxx11-cu128-x86_64-linux/rmsnorm_kernel/_rmsnorm_kernel_538355f_dirty.abi3.so +3 -0
- build/torch28-cxx11-cu129-x86_64-linux/rmsnorm_kernel/__init__.py +21 -0
- build/torch28-cxx11-cu129-x86_64-linux/rmsnorm_kernel/__pycache__/__init__.cpython-313.pyc +0 -0
- build/torch28-cxx11-cu129-x86_64-linux/rmsnorm_kernel/__pycache__/_ops.cpython-313.pyc +0 -0
- build/torch28-cxx11-cu129-x86_64-linux/rmsnorm_kernel/_ops.py +9 -0
- build/torch28-cxx11-cu129-x86_64-linux/rmsnorm_kernel/_rmsnorm_kernel_538355f_dirty.abi3.so +3 -0
- rmsnorm_kernel/rmsnorm.cu +2 -2
- torch-ext/torch_binding.h +1 -1
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.so filter=lfs diff=lfs merge=lfs -text
|
build/torch27-cxx11-cu118-x86_64-linux/rmsnorm_kernel/__init__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
|
| 4 |
+
from ._ops import ops
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class LlamaRMSNorm(nn.Module):
|
| 8 |
+
weight: torch.Tensor
|
| 9 |
+
variance_epsilon: float
|
| 10 |
+
|
| 11 |
+
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
|
| 12 |
+
return ops.rmsnorm_forward(
|
| 13 |
+
hidden_states,
|
| 14 |
+
self.weight,
|
| 15 |
+
bias=None,
|
| 16 |
+
residual=None,
|
| 17 |
+
eps=self.variance_epsilon,
|
| 18 |
+
dropout_p=0.0,
|
| 19 |
+
prenorm=False,
|
| 20 |
+
residual_in_fp32=False,
|
| 21 |
+
)
|
build/torch27-cxx11-cu118-x86_64-linux/rmsnorm_kernel/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (1.13 kB). View file
|
|
|
build/torch27-cxx11-cu118-x86_64-linux/rmsnorm_kernel/__pycache__/_ops.cpython-313.pyc
ADDED
|
Binary file (550 Bytes). View file
|
|
|
build/torch27-cxx11-cu118-x86_64-linux/rmsnorm_kernel/_ops.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from . import _rmsnorm_kernel_538355f_dirty
|
| 3 |
+
ops = torch.ops._rmsnorm_kernel_538355f_dirty
|
| 4 |
+
|
| 5 |
+
def add_op_namespace_prefix(op_name: str):
|
| 6 |
+
"""
|
| 7 |
+
Prefix op by namespace.
|
| 8 |
+
"""
|
| 9 |
+
return f"_rmsnorm_kernel_538355f_dirty::{op_name}"
|
build/torch27-cxx11-cu118-x86_64-linux/rmsnorm_kernel/_rmsnorm_kernel_538355f_dirty.abi3.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9f5117ccb9a81d1c1b5330385a72dd2d17eadd56c2f87d584d5b7259d02715cb
|
| 3 |
+
size 2111512
|
build/torch27-cxx11-cu126-x86_64-linux/rmsnorm_kernel/__init__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
|
| 4 |
+
from ._ops import ops
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class LlamaRMSNorm(nn.Module):
|
| 8 |
+
weight: torch.Tensor
|
| 9 |
+
variance_epsilon: float
|
| 10 |
+
|
| 11 |
+
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
|
| 12 |
+
return ops.rmsnorm_forward(
|
| 13 |
+
hidden_states,
|
| 14 |
+
self.weight,
|
| 15 |
+
bias=None,
|
| 16 |
+
residual=None,
|
| 17 |
+
eps=self.variance_epsilon,
|
| 18 |
+
dropout_p=0.0,
|
| 19 |
+
prenorm=False,
|
| 20 |
+
residual_in_fp32=False,
|
| 21 |
+
)
|
build/torch27-cxx11-cu126-x86_64-linux/rmsnorm_kernel/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (1.13 kB). View file
|
|
|
build/torch27-cxx11-cu126-x86_64-linux/rmsnorm_kernel/__pycache__/_ops.cpython-313.pyc
ADDED
|
Binary file (550 Bytes). View file
|
|
|
build/torch27-cxx11-cu126-x86_64-linux/rmsnorm_kernel/_ops.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from . import _rmsnorm_kernel_538355f_dirty
|
| 3 |
+
ops = torch.ops._rmsnorm_kernel_538355f_dirty
|
| 4 |
+
|
| 5 |
+
def add_op_namespace_prefix(op_name: str):
|
| 6 |
+
"""
|
| 7 |
+
Prefix op by namespace.
|
| 8 |
+
"""
|
| 9 |
+
return f"_rmsnorm_kernel_538355f_dirty::{op_name}"
|
build/torch27-cxx11-cu126-x86_64-linux/rmsnorm_kernel/_rmsnorm_kernel_538355f_dirty.abi3.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bced7bf4cd62415785bb30e3808e907246341d4e63572e70f22397258a022a41
|
| 3 |
+
size 2250040
|
build/torch27-cxx11-cu128-x86_64-linux/rmsnorm_kernel/__init__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
|
| 4 |
+
from ._ops import ops
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class LlamaRMSNorm(nn.Module):
|
| 8 |
+
weight: torch.Tensor
|
| 9 |
+
variance_epsilon: float
|
| 10 |
+
|
| 11 |
+
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
|
| 12 |
+
return ops.rmsnorm_forward(
|
| 13 |
+
hidden_states,
|
| 14 |
+
self.weight,
|
| 15 |
+
bias=None,
|
| 16 |
+
residual=None,
|
| 17 |
+
eps=self.variance_epsilon,
|
| 18 |
+
dropout_p=0.0,
|
| 19 |
+
prenorm=False,
|
| 20 |
+
residual_in_fp32=False,
|
| 21 |
+
)
|
build/torch27-cxx11-cu128-x86_64-linux/rmsnorm_kernel/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (1.13 kB). View file
|
|
|
build/torch27-cxx11-cu128-x86_64-linux/rmsnorm_kernel/__pycache__/_ops.cpython-313.pyc
ADDED
|
Binary file (550 Bytes). View file
|
|
|
build/torch27-cxx11-cu128-x86_64-linux/rmsnorm_kernel/_ops.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from . import _rmsnorm_kernel_538355f_dirty
|
| 3 |
+
ops = torch.ops._rmsnorm_kernel_538355f_dirty
|
| 4 |
+
|
| 5 |
+
def add_op_namespace_prefix(op_name: str):
|
| 6 |
+
"""
|
| 7 |
+
Prefix op by namespace.
|
| 8 |
+
"""
|
| 9 |
+
return f"_rmsnorm_kernel_538355f_dirty::{op_name}"
|
build/torch27-cxx11-cu128-x86_64-linux/rmsnorm_kernel/_rmsnorm_kernel_538355f_dirty.abi3.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b1eae77e817b754c22ccf4828f0f022866b97c4815059edc0f2a7ca13f5a82f
|
| 3 |
+
size 2506984
|
build/torch28-cxx11-cu126-x86_64-linux/rmsnorm_kernel/__init__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
|
| 4 |
+
from ._ops import ops
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class LlamaRMSNorm(nn.Module):
|
| 8 |
+
weight: torch.Tensor
|
| 9 |
+
variance_epsilon: float
|
| 10 |
+
|
| 11 |
+
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
|
| 12 |
+
return ops.rmsnorm_forward(
|
| 13 |
+
hidden_states,
|
| 14 |
+
self.weight,
|
| 15 |
+
bias=None,
|
| 16 |
+
residual=None,
|
| 17 |
+
eps=self.variance_epsilon,
|
| 18 |
+
dropout_p=0.0,
|
| 19 |
+
prenorm=False,
|
| 20 |
+
residual_in_fp32=False,
|
| 21 |
+
)
|
build/torch28-cxx11-cu126-x86_64-linux/rmsnorm_kernel/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (1.13 kB). View file
|
|
|
build/torch28-cxx11-cu126-x86_64-linux/rmsnorm_kernel/__pycache__/_ops.cpython-313.pyc
ADDED
|
Binary file (550 Bytes). View file
|
|
|
build/torch28-cxx11-cu126-x86_64-linux/rmsnorm_kernel/_ops.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from . import _rmsnorm_kernel_538355f_dirty
|
| 3 |
+
ops = torch.ops._rmsnorm_kernel_538355f_dirty
|
| 4 |
+
|
| 5 |
+
def add_op_namespace_prefix(op_name: str):
|
| 6 |
+
"""
|
| 7 |
+
Prefix op by namespace.
|
| 8 |
+
"""
|
| 9 |
+
return f"_rmsnorm_kernel_538355f_dirty::{op_name}"
|
build/torch28-cxx11-cu126-x86_64-linux/rmsnorm_kernel/_rmsnorm_kernel_538355f_dirty.abi3.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d17e952ae2d5df0c7f4357b8bfbdc485c06750274dbadeda9e143272f3656332
|
| 3 |
+
size 2198136
|
build/torch28-cxx11-cu128-x86_64-linux/rmsnorm_kernel/__init__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
|
| 4 |
+
from ._ops import ops
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class LlamaRMSNorm(nn.Module):
|
| 8 |
+
weight: torch.Tensor
|
| 9 |
+
variance_epsilon: float
|
| 10 |
+
|
| 11 |
+
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
|
| 12 |
+
return ops.rmsnorm_forward(
|
| 13 |
+
hidden_states,
|
| 14 |
+
self.weight,
|
| 15 |
+
bias=None,
|
| 16 |
+
residual=None,
|
| 17 |
+
eps=self.variance_epsilon,
|
| 18 |
+
dropout_p=0.0,
|
| 19 |
+
prenorm=False,
|
| 20 |
+
residual_in_fp32=False,
|
| 21 |
+
)
|
build/torch28-cxx11-cu128-x86_64-linux/rmsnorm_kernel/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (1.13 kB). View file
|
|
|
build/torch28-cxx11-cu128-x86_64-linux/rmsnorm_kernel/__pycache__/_ops.cpython-313.pyc
ADDED
|
Binary file (550 Bytes). View file
|
|
|
build/torch28-cxx11-cu128-x86_64-linux/rmsnorm_kernel/_ops.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from . import _rmsnorm_kernel_538355f_dirty
|
| 3 |
+
ops = torch.ops._rmsnorm_kernel_538355f_dirty
|
| 4 |
+
|
| 5 |
+
def add_op_namespace_prefix(op_name: str):
|
| 6 |
+
"""
|
| 7 |
+
Prefix op by namespace.
|
| 8 |
+
"""
|
| 9 |
+
return f"_rmsnorm_kernel_538355f_dirty::{op_name}"
|
build/torch28-cxx11-cu128-x86_64-linux/rmsnorm_kernel/_rmsnorm_kernel_538355f_dirty.abi3.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff6d1ec9ceead35e32eca260b38a3828167e32f5d911f50cc9752533f81b5d40
|
| 3 |
+
size 2446200
|
build/torch28-cxx11-cu129-x86_64-linux/rmsnorm_kernel/__init__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
|
| 4 |
+
from ._ops import ops
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class LlamaRMSNorm(nn.Module):
|
| 8 |
+
weight: torch.Tensor
|
| 9 |
+
variance_epsilon: float
|
| 10 |
+
|
| 11 |
+
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
|
| 12 |
+
return ops.rmsnorm_forward(
|
| 13 |
+
hidden_states,
|
| 14 |
+
self.weight,
|
| 15 |
+
bias=None,
|
| 16 |
+
residual=None,
|
| 17 |
+
eps=self.variance_epsilon,
|
| 18 |
+
dropout_p=0.0,
|
| 19 |
+
prenorm=False,
|
| 20 |
+
residual_in_fp32=False,
|
| 21 |
+
)
|
build/torch28-cxx11-cu129-x86_64-linux/rmsnorm_kernel/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (1.13 kB). View file
|
|
|
build/torch28-cxx11-cu129-x86_64-linux/rmsnorm_kernel/__pycache__/_ops.cpython-313.pyc
ADDED
|
Binary file (550 Bytes). View file
|
|
|
build/torch28-cxx11-cu129-x86_64-linux/rmsnorm_kernel/_ops.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from . import _rmsnorm_kernel_538355f_dirty
|
| 3 |
+
ops = torch.ops._rmsnorm_kernel_538355f_dirty
|
| 4 |
+
|
| 5 |
+
def add_op_namespace_prefix(op_name: str):
|
| 6 |
+
"""
|
| 7 |
+
Prefix op by namespace.
|
| 8 |
+
"""
|
| 9 |
+
return f"_rmsnorm_kernel_538355f_dirty::{op_name}"
|
build/torch28-cxx11-cu129-x86_64-linux/rmsnorm_kernel/_rmsnorm_kernel_538355f_dirty.abi3.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92b07efad367bee6cbd2371081bbfdf9a53d4fe41b9ebef76250c6a36dba8ad3
|
| 3 |
+
size 2458896
|
rmsnorm_kernel/rmsnorm.cu
CHANGED
|
@@ -65,7 +65,7 @@ struct RmsnormFunctor {
|
|
| 65 |
}
|
| 66 |
};
|
| 67 |
|
| 68 |
-
torch::Tensor rmsnorm_forward(torch::Tensor x, torch::Tensor gamma) {
|
| 69 |
int B = x.size(0), S = x.size(1), H = x.size(2);
|
| 70 |
int rows = B * S;
|
| 71 |
|
|
@@ -160,4 +160,4 @@ torch::Tensor rmsnorm_forward(torch::Tensor x, torch::Tensor gamma) {
|
|
| 160 |
// std::cout << (max_diff < 1e-5 ? "PASSED: Results match!" : "FAILED: Results don't match!") << std::endl;
|
| 161 |
|
| 162 |
// return 0;
|
| 163 |
-
// }
|
|
|
|
| 65 |
}
|
| 66 |
};
|
| 67 |
|
| 68 |
+
torch::Tensor rmsnorm_forward(torch::Tensor &x, torch::Tensor &gamma) {
|
| 69 |
int B = x.size(0), S = x.size(1), H = x.size(2);
|
| 70 |
int rows = B * S;
|
| 71 |
|
|
|
|
| 160 |
// std::cout << (max_diff < 1e-5 ? "PASSED: Results match!" : "FAILED: Results don't match!") << std::endl;
|
| 161 |
|
| 162 |
// return 0;
|
| 163 |
+
// }
|
torch-ext/torch_binding.h
CHANGED
|
@@ -2,4 +2,4 @@
|
|
| 2 |
|
| 3 |
#include <torch/torch.h>
|
| 4 |
|
| 5 |
-
torch::Tensor rmsnorm_forward(torch::Tensor
|
|
|
|
| 2 |
|
| 3 |
#include <torch/torch.h>
|
| 4 |
|
| 5 |
+
torch::Tensor rmsnorm_forward(torch::Tensor &input, torch::Tensor &gamma);
|