Instructions to use odyssey-systems/layer-norm with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Kernels
How to use odyssey-systems/layer-norm with Kernels:
# !pip install kernels from kernels import get_kernel kernel = get_kernel("odyssey-systems/layer-norm") - Notebooks
- Google Colab
- Kaggle
graft torch211-cu130 builds (legacy flash-attn signature, sm80/90/100/120)
Browse files- .gitattributes +2 -0
- build/torch211-cxx11-cu130-aarch64-linux/__init__.py +26 -14
- build/torch211-cxx11-cu130-aarch64-linux/dropout_layer_norm.cpython-312-aarch64-linux-gnu.so +3 -0
- build/torch211-cxx11-cu130-aarch64-linux/metadata.json +3 -6
- build/torch211-cxx11-cu130-x86_64-linux/__init__.py +26 -14
- build/torch211-cxx11-cu130-x86_64-linux/__pycache__/__init__.cpython-312.pyc +0 -0
- build/torch211-cxx11-cu130-x86_64-linux/dropout_layer_norm.cpython-312-x86_64-linux-gnu.so +3 -0
- build/torch211-cxx11-cu130-x86_64-linux/layer_norm/__pycache__/__init__.cpython-312.pyc +0 -0
- build/torch211-cxx11-cu130-x86_64-linux/metadata.json +3 -6
.gitattributes
CHANGED
|
@@ -107,3 +107,5 @@ build/torch212-cxx11-cu130-x86_64-linux/_layer_norm_cuda_73ccd0c.abi3.so filter=
|
|
| 107 |
build/torch212-cxx11-cu132-x86_64-linux/_layer_norm_cuda_73ccd0c.abi3.so filter=lfs diff=lfs merge=lfs -text
|
| 108 |
build/torch29-cxx11-cu128-aarch64-linux/dropout_layer_norm.cpython-312-aarch64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 109 |
build/torch29-cxx11-cu128-x86_64-linux/dropout_layer_norm.cpython-312-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 107 |
build/torch212-cxx11-cu132-x86_64-linux/_layer_norm_cuda_73ccd0c.abi3.so filter=lfs diff=lfs merge=lfs -text
|
| 108 |
build/torch29-cxx11-cu128-aarch64-linux/dropout_layer_norm.cpython-312-aarch64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 109 |
build/torch29-cxx11-cu128-x86_64-linux/dropout_layer_norm.cpython-312-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 110 |
+
build/torch211-cxx11-cu130-aarch64-linux/dropout_layer_norm.cpython-312-aarch64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 111 |
+
build/torch211-cxx11-cu130-x86_64-linux/dropout_layer_norm.cpython-312-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
build/torch211-cxx11-cu130-aarch64-linux/__init__.py
CHANGED
|
@@ -1,26 +1,38 @@
|
|
| 1 |
-
|
| 2 |
-
import torch.nn as nn
|
| 3 |
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
-
|
| 9 |
-
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
|
|
|
| 13 |
|
| 14 |
-
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
| 19 |
|
| 20 |
__all__ = [
|
| 21 |
-
"
|
| 22 |
"dropout_add_ln_fwd",
|
| 23 |
"dropout_add_ln_bwd",
|
| 24 |
"dropout_add_ln_parallel_residual_fwd",
|
| 25 |
"dropout_add_ln_parallel_residual_bwd",
|
| 26 |
-
]
|
|
|
|
| 1 |
+
"""torch 2.9 / cu12.8 build variant grafted from the odysseyml flash-attention fork.
|
|
|
|
| 2 |
|
| 3 |
+
Wraps the pre-built ``dropout_layer_norm`` extension from the
|
| 4 |
+
``odyssey-fused-kernels`` wheel (tag ``odyssey-v2.8.3-fused-1``, built for
|
| 5 |
+
sm_80/90/100/120) because upstream kernels-community/layer-norm only provides
|
| 6 |
+
a cu129 build for torch 2.9.
|
| 7 |
|
| 8 |
+
Unlike the kernels-community builds (which drop ``residual`` from the fused
|
| 9 |
+
op signatures), this build keeps flash-attention's original signature with
|
| 10 |
+
``residual`` as the second argument. Consumers can check ``SUPPORTS_RESIDUAL``.
|
| 11 |
+
"""
|
| 12 |
|
| 13 |
+
import importlib.util
|
| 14 |
+
from pathlib import Path
|
| 15 |
|
| 16 |
+
# True: kernels with flash-attention's original signature: fwd takes
|
| 17 |
+
# (x0, residual, gamma, beta, ...) and bwd is supported.
|
| 18 |
+
SUPPORTS_RESIDUAL = True
|
| 19 |
|
| 20 |
+
# The extension's PyInit_* symbol is derived from the module name passed to
|
| 21 |
+
# the loader, so the spec name must exactly equal the .so module name.
|
| 22 |
+
_so_path = next(Path(__file__).parent.glob("dropout_layer_norm*.so"))
|
| 23 |
+
_spec = importlib.util.spec_from_file_location("dropout_layer_norm", _so_path)
|
| 24 |
+
_ext = importlib.util.module_from_spec(_spec)
|
| 25 |
+
_spec.loader.exec_module(_ext)
|
| 26 |
|
| 27 |
+
dropout_add_ln_fwd = _ext.dropout_add_ln_fwd
|
| 28 |
+
dropout_add_ln_bwd = _ext.dropout_add_ln_bwd
|
| 29 |
+
dropout_add_ln_parallel_residual_fwd = _ext.dropout_add_ln_parallel_residual_fwd
|
| 30 |
+
dropout_add_ln_parallel_residual_bwd = _ext.dropout_add_ln_parallel_residual_bwd
|
| 31 |
|
| 32 |
__all__ = [
|
| 33 |
+
"SUPPORTS_RESIDUAL",
|
| 34 |
"dropout_add_ln_fwd",
|
| 35 |
"dropout_add_ln_bwd",
|
| 36 |
"dropout_add_ln_parallel_residual_fwd",
|
| 37 |
"dropout_add_ln_parallel_residual_bwd",
|
| 38 |
+
]
|
build/torch211-cxx11-cu130-aarch64-linux/dropout_layer_norm.cpython-312-aarch64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d074846723309a009d9624dda9c55847a05299e0c9492d6e15add2c6c3eb1bb1
|
| 3 |
+
size 1017902136
|
build/torch211-cxx11-cu130-aarch64-linux/metadata.json
CHANGED
|
@@ -1,17 +1,14 @@
|
|
| 1 |
{
|
| 2 |
-
"name": "layer-norm",
|
| 3 |
-
"id": "_layer_norm_cuda_73ccd0c",
|
| 4 |
"version": 1,
|
| 5 |
"license": "BSD-3-Clause",
|
| 6 |
"python-depends": [],
|
| 7 |
"backend": {
|
| 8 |
"type": "cuda",
|
| 9 |
"archs": [
|
| 10 |
-
"10.0",
|
| 11 |
-
"12.0",
|
| 12 |
"8.0",
|
| 13 |
-
"
|
| 14 |
-
"
|
|
|
|
| 15 |
]
|
| 16 |
}
|
| 17 |
}
|
|
|
|
| 1 |
{
|
|
|
|
|
|
|
| 2 |
"version": 1,
|
| 3 |
"license": "BSD-3-Clause",
|
| 4 |
"python-depends": [],
|
| 5 |
"backend": {
|
| 6 |
"type": "cuda",
|
| 7 |
"archs": [
|
|
|
|
|
|
|
| 8 |
"8.0",
|
| 9 |
+
"9.0",
|
| 10 |
+
"10.0",
|
| 11 |
+
"12.0"
|
| 12 |
]
|
| 13 |
}
|
| 14 |
}
|
build/torch211-cxx11-cu130-x86_64-linux/__init__.py
CHANGED
|
@@ -1,26 +1,38 @@
|
|
| 1 |
-
|
| 2 |
-
import torch.nn as nn
|
| 3 |
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
-
|
| 9 |
-
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
|
|
|
| 13 |
|
| 14 |
-
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
| 19 |
|
| 20 |
__all__ = [
|
| 21 |
-
"
|
| 22 |
"dropout_add_ln_fwd",
|
| 23 |
"dropout_add_ln_bwd",
|
| 24 |
"dropout_add_ln_parallel_residual_fwd",
|
| 25 |
"dropout_add_ln_parallel_residual_bwd",
|
| 26 |
-
]
|
|
|
|
| 1 |
+
"""torch 2.9 / cu12.8 build variant grafted from the odysseyml flash-attention fork.
|
|
|
|
| 2 |
|
| 3 |
+
Wraps the pre-built ``dropout_layer_norm`` extension from the
|
| 4 |
+
``odyssey-fused-kernels`` wheel (tag ``odyssey-v2.8.3-fused-1``, built for
|
| 5 |
+
sm_80/90/100/120) because upstream kernels-community/layer-norm only provides
|
| 6 |
+
a cu129 build for torch 2.9.
|
| 7 |
|
| 8 |
+
Unlike the kernels-community builds (which drop ``residual`` from the fused
|
| 9 |
+
op signatures), this build keeps flash-attention's original signature with
|
| 10 |
+
``residual`` as the second argument. Consumers can check ``SUPPORTS_RESIDUAL``.
|
| 11 |
+
"""
|
| 12 |
|
| 13 |
+
import importlib.util
|
| 14 |
+
from pathlib import Path
|
| 15 |
|
| 16 |
+
# True: kernels with flash-attention's original signature: fwd takes
|
| 17 |
+
# (x0, residual, gamma, beta, ...) and bwd is supported.
|
| 18 |
+
SUPPORTS_RESIDUAL = True
|
| 19 |
|
| 20 |
+
# The extension's PyInit_* symbol is derived from the module name passed to
|
| 21 |
+
# the loader, so the spec name must exactly equal the .so module name.
|
| 22 |
+
_so_path = next(Path(__file__).parent.glob("dropout_layer_norm*.so"))
|
| 23 |
+
_spec = importlib.util.spec_from_file_location("dropout_layer_norm", _so_path)
|
| 24 |
+
_ext = importlib.util.module_from_spec(_spec)
|
| 25 |
+
_spec.loader.exec_module(_ext)
|
| 26 |
|
| 27 |
+
dropout_add_ln_fwd = _ext.dropout_add_ln_fwd
|
| 28 |
+
dropout_add_ln_bwd = _ext.dropout_add_ln_bwd
|
| 29 |
+
dropout_add_ln_parallel_residual_fwd = _ext.dropout_add_ln_parallel_residual_fwd
|
| 30 |
+
dropout_add_ln_parallel_residual_bwd = _ext.dropout_add_ln_parallel_residual_bwd
|
| 31 |
|
| 32 |
__all__ = [
|
| 33 |
+
"SUPPORTS_RESIDUAL",
|
| 34 |
"dropout_add_ln_fwd",
|
| 35 |
"dropout_add_ln_bwd",
|
| 36 |
"dropout_add_ln_parallel_residual_fwd",
|
| 37 |
"dropout_add_ln_parallel_residual_bwd",
|
| 38 |
+
]
|
build/torch211-cxx11-cu130-x86_64-linux/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (1.69 kB). View file
|
|
|
build/torch211-cxx11-cu130-x86_64-linux/dropout_layer_norm.cpython-312-x86_64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fe763019c15a09480c5f154293f80e3d2db79f0039ac041a8d58ccf75e80eb60
|
| 3 |
+
size 1021207608
|
build/torch211-cxx11-cu130-x86_64-linux/layer_norm/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (1.61 kB). View file
|
|
|
build/torch211-cxx11-cu130-x86_64-linux/metadata.json
CHANGED
|
@@ -1,17 +1,14 @@
|
|
| 1 |
{
|
| 2 |
-
"name": "layer-norm",
|
| 3 |
-
"id": "_layer_norm_cuda_73ccd0c",
|
| 4 |
"version": 1,
|
| 5 |
"license": "BSD-3-Clause",
|
| 6 |
"python-depends": [],
|
| 7 |
"backend": {
|
| 8 |
"type": "cuda",
|
| 9 |
"archs": [
|
| 10 |
-
"10.0",
|
| 11 |
-
"12.0",
|
| 12 |
"8.0",
|
| 13 |
-
"
|
| 14 |
-
"
|
|
|
|
| 15 |
]
|
| 16 |
}
|
| 17 |
}
|
|
|
|
| 1 |
{
|
|
|
|
|
|
|
| 2 |
"version": 1,
|
| 3 |
"license": "BSD-3-Clause",
|
| 4 |
"python-depends": [],
|
| 5 |
"backend": {
|
| 6 |
"type": "cuda",
|
| 7 |
"archs": [
|
|
|
|
|
|
|
| 8 |
"8.0",
|
| 9 |
+
"9.0",
|
| 10 |
+
"10.0",
|
| 11 |
+
"12.0"
|
| 12 |
]
|
| 13 |
}
|
| 14 |
}
|