medmekk commited on Oct 13, 2025

Commit

55ce07b

verified ·

1 Parent(s): 67c9ab5

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +9 -0
README.md +1 -0
build.toml +20 -0
build/torch27-cxx11-cu118-x86_64-linux/mra/__init__.py +25 -0
build/torch27-cxx11-cu118-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc +0 -0
build/torch27-cxx11-cu118-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc +0 -0
build/torch27-cxx11-cu118-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so +3 -0
build/torch27-cxx11-cu118-x86_64-linux/mra/_ops.py +9 -0
build/torch27-cxx11-cu126-x86_64-linux/mra/__init__.py +25 -0
build/torch27-cxx11-cu126-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc +0 -0
build/torch27-cxx11-cu126-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc +0 -0
build/torch27-cxx11-cu126-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so +3 -0
build/torch27-cxx11-cu126-x86_64-linux/mra/_ops.py +9 -0
build/torch27-cxx11-cu128-x86_64-linux/mra/__init__.py +25 -0
build/torch27-cxx11-cu128-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc +0 -0
build/torch27-cxx11-cu128-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc +0 -0
build/torch27-cxx11-cu128-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so +3 -0
build/torch27-cxx11-cu128-x86_64-linux/mra/_ops.py +9 -0
build/torch28-cxx11-cu126-x86_64-linux/mra/__init__.py +25 -0
build/torch28-cxx11-cu126-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc +0 -0
build/torch28-cxx11-cu126-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc +0 -0
build/torch28-cxx11-cu126-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so +3 -0
build/torch28-cxx11-cu126-x86_64-linux/mra/_ops.py +9 -0
build/torch28-cxx11-cu128-x86_64-linux/mra/__init__.py +25 -0
build/torch28-cxx11-cu128-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc +0 -0
build/torch28-cxx11-cu128-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc +0 -0
build/torch28-cxx11-cu128-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so +3 -0
build/torch28-cxx11-cu128-x86_64-linux/mra/_ops.py +9 -0
build/torch28-cxx11-cu129-x86_64-linux/mra/__init__.py +25 -0
build/torch28-cxx11-cu129-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc +0 -0
build/torch28-cxx11-cu129-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc +0 -0
build/torch28-cxx11-cu129-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so +3 -0
build/torch28-cxx11-cu129-x86_64-linux/mra/_ops.py +9 -0
build/torch29-cxx11-cu126-x86_64-linux/mra/__init__.py +25 -0
build/torch29-cxx11-cu126-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc +0 -0
build/torch29-cxx11-cu126-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc +0 -0
build/torch29-cxx11-cu126-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so +3 -0
build/torch29-cxx11-cu126-x86_64-linux/mra/_ops.py +9 -0
build/torch29-cxx11-cu128-x86_64-linux/mra/__init__.py +25 -0
build/torch29-cxx11-cu128-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc +0 -0
build/torch29-cxx11-cu128-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc +0 -0
build/torch29-cxx11-cu128-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so +3 -0
build/torch29-cxx11-cu128-x86_64-linux/mra/_ops.py +9 -0
build/torch29-cxx11-cu130-x86_64-linux/mra/__init__.py +25 -0
build/torch29-cxx11-cu130-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc +0 -0
build/torch29-cxx11-cu130-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc +0 -0
build/torch29-cxx11-cu130-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so +3 -0
build/torch29-cxx11-cu130-x86_64-linux/mra/_ops.py +9 -0
flake.lock +168 -0
flake.nix +17 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,12 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+build/torch27-cxx11-cu118-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch27-cxx11-cu126-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch27-cxx11-cu128-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch28-cxx11-cu126-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch28-cxx11-cu128-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch28-cxx11-cu129-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch29-cxx11-cu126-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch29-cxx11-cu128-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
+build/torch29-cxx11-cu130-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1 @@


1	+ MRA kernels for transformers

build.toml ADDED Viewed

	@@ -0,0 +1,20 @@

+[general]
+name = "mra"
+universal = false
+[torch]
+src = [
+    "torch-ext/torch_binding.cpp",
+    "torch-ext/cuda_launch.h",
+]
+[kernel.mra]
+backend = "cuda"
+depends = ["torch"]
+src = [
+    "mra/cuda_kernel.cu",
+    "mra/cuda_kernel.h",
+    "mra/cuda_launch.cu",
+    "mra/cuda_launch.h",
+]

build/torch27-cxx11-cu118-x86_64-linux/mra/__init__.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from ._ops import ops
+import torch
+def index_max(index_vals: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
+    return ops.index_max(index_vals, indices, A_num_block, B_num_block)
+def mm_to_sparse(dense_A: torch.Tensor, dense_B: torch.Tensor, indices: torch.Tensor):
+    return ops.mm_to_sparse(dense_A, dense_B, indices)
+def sparse_dense_mm(sparse_A: torch.Tensor, indices: torch.Tensor, dense_B: torch.Tensor, A_num_block: int):
+    return ops.sparse_dense_mm(sparse_A, indices, dense_B, A_num_block)
+def reduce_sum(sparse_A: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
+    return ops.reduce_sum(sparse_A, indices, A_num_block, B_num_block)
+def scatter(dense_A: torch.Tensor, indices: torch.Tensor, B_num_block: int):
+    return ops.scatter(dense_A, indices, B_num_block)
+__all__ = [
+    "index_max",
+    "mm_to_sparse",
+    "sparse_dense_mm",
+    "reduce_sum",
+    "scatter",
+]

build/torch27-cxx11-cu118-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (1.85 kB). View file

build/torch27-cxx11-cu118-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc ADDED Viewed

Binary file (517 Bytes). View file

build/torch27-cxx11-cu118-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e07be154aab143447264cbd25ba8987760af84f50304cc0940419cae754d8fc2
+size 2289096

build/torch27-cxx11-cu118-x86_64-linux/mra/_ops.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from . import _mra_e8307c7_dirty
+ops = torch.ops._mra_e8307c7_dirty
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_mra_e8307c7_dirty::{op_name}"

build/torch27-cxx11-cu126-x86_64-linux/mra/__init__.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from ._ops import ops
+import torch
+def index_max(index_vals: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
+    return ops.index_max(index_vals, indices, A_num_block, B_num_block)
+def mm_to_sparse(dense_A: torch.Tensor, dense_B: torch.Tensor, indices: torch.Tensor):
+    return ops.mm_to_sparse(dense_A, dense_B, indices)
+def sparse_dense_mm(sparse_A: torch.Tensor, indices: torch.Tensor, dense_B: torch.Tensor, A_num_block: int):
+    return ops.sparse_dense_mm(sparse_A, indices, dense_B, A_num_block)
+def reduce_sum(sparse_A: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
+    return ops.reduce_sum(sparse_A, indices, A_num_block, B_num_block)
+def scatter(dense_A: torch.Tensor, indices: torch.Tensor, B_num_block: int):
+    return ops.scatter(dense_A, indices, B_num_block)
+__all__ = [
+    "index_max",
+    "mm_to_sparse",
+    "sparse_dense_mm",
+    "reduce_sum",
+    "scatter",
+]

build/torch27-cxx11-cu126-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (1.85 kB). View file

build/torch27-cxx11-cu126-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc ADDED Viewed

Binary file (517 Bytes). View file

build/torch27-cxx11-cu126-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:579d1b3e91773c7802fc4c5b58b5fac62235b4555c7a836af0306e34f7bb0719
+size 2334496

build/torch27-cxx11-cu126-x86_64-linux/mra/_ops.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from . import _mra_e8307c7_dirty
+ops = torch.ops._mra_e8307c7_dirty
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_mra_e8307c7_dirty::{op_name}"

build/torch27-cxx11-cu128-x86_64-linux/mra/__init__.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from ._ops import ops
+import torch
+def index_max(index_vals: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
+    return ops.index_max(index_vals, indices, A_num_block, B_num_block)
+def mm_to_sparse(dense_A: torch.Tensor, dense_B: torch.Tensor, indices: torch.Tensor):
+    return ops.mm_to_sparse(dense_A, dense_B, indices)
+def sparse_dense_mm(sparse_A: torch.Tensor, indices: torch.Tensor, dense_B: torch.Tensor, A_num_block: int):
+    return ops.sparse_dense_mm(sparse_A, indices, dense_B, A_num_block)
+def reduce_sum(sparse_A: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
+    return ops.reduce_sum(sparse_A, indices, A_num_block, B_num_block)
+def scatter(dense_A: torch.Tensor, indices: torch.Tensor, B_num_block: int):
+    return ops.scatter(dense_A, indices, B_num_block)
+__all__ = [
+    "index_max",
+    "mm_to_sparse",
+    "sparse_dense_mm",
+    "reduce_sum",
+    "scatter",
+]

build/torch27-cxx11-cu128-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (1.85 kB). View file

build/torch27-cxx11-cu128-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc ADDED Viewed

Binary file (517 Bytes). View file

build/torch27-cxx11-cu128-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:40e6e3cc4433f6333afc56fdce4dd0e5aaaf007d701b7a0582d46234d93d57ec
+size 2602656

build/torch27-cxx11-cu128-x86_64-linux/mra/_ops.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from . import _mra_e8307c7_dirty
+ops = torch.ops._mra_e8307c7_dirty
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_mra_e8307c7_dirty::{op_name}"

build/torch28-cxx11-cu126-x86_64-linux/mra/__init__.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from ._ops import ops
+import torch
+def index_max(index_vals: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
+    return ops.index_max(index_vals, indices, A_num_block, B_num_block)
+def mm_to_sparse(dense_A: torch.Tensor, dense_B: torch.Tensor, indices: torch.Tensor):
+    return ops.mm_to_sparse(dense_A, dense_B, indices)
+def sparse_dense_mm(sparse_A: torch.Tensor, indices: torch.Tensor, dense_B: torch.Tensor, A_num_block: int):
+    return ops.sparse_dense_mm(sparse_A, indices, dense_B, A_num_block)
+def reduce_sum(sparse_A: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
+    return ops.reduce_sum(sparse_A, indices, A_num_block, B_num_block)
+def scatter(dense_A: torch.Tensor, indices: torch.Tensor, B_num_block: int):
+    return ops.scatter(dense_A, indices, B_num_block)
+__all__ = [
+    "index_max",
+    "mm_to_sparse",
+    "sparse_dense_mm",
+    "reduce_sum",
+    "scatter",
+]

build/torch28-cxx11-cu126-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (1.85 kB). View file

build/torch28-cxx11-cu126-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc ADDED Viewed

Binary file (517 Bytes). View file

build/torch28-cxx11-cu126-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c4b972c43e1a8b2a6941a3ab44b99a638d253f9ef1e67cb973fff0abd2664926
+size 2334520

build/torch28-cxx11-cu126-x86_64-linux/mra/_ops.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from . import _mra_e8307c7_dirty
+ops = torch.ops._mra_e8307c7_dirty
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_mra_e8307c7_dirty::{op_name}"

build/torch28-cxx11-cu128-x86_64-linux/mra/__init__.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from ._ops import ops
+import torch
+def index_max(index_vals: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
+    return ops.index_max(index_vals, indices, A_num_block, B_num_block)
+def mm_to_sparse(dense_A: torch.Tensor, dense_B: torch.Tensor, indices: torch.Tensor):
+    return ops.mm_to_sparse(dense_A, dense_B, indices)
+def sparse_dense_mm(sparse_A: torch.Tensor, indices: torch.Tensor, dense_B: torch.Tensor, A_num_block: int):
+    return ops.sparse_dense_mm(sparse_A, indices, dense_B, A_num_block)
+def reduce_sum(sparse_A: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
+    return ops.reduce_sum(sparse_A, indices, A_num_block, B_num_block)
+def scatter(dense_A: torch.Tensor, indices: torch.Tensor, B_num_block: int):
+    return ops.scatter(dense_A, indices, B_num_block)
+__all__ = [
+    "index_max",
+    "mm_to_sparse",
+    "sparse_dense_mm",
+    "reduce_sum",
+    "scatter",
+]

build/torch28-cxx11-cu128-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (1.85 kB). View file

build/torch28-cxx11-cu128-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc ADDED Viewed

Binary file (517 Bytes). View file

build/torch28-cxx11-cu128-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c1ab899e2e33ce5edf4b75e5b4138d4ca30e1f62a91ecef4111b4121b408dd5a
+size 2602880

build/torch28-cxx11-cu128-x86_64-linux/mra/_ops.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from . import _mra_e8307c7_dirty
+ops = torch.ops._mra_e8307c7_dirty
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_mra_e8307c7_dirty::{op_name}"

build/torch28-cxx11-cu129-x86_64-linux/mra/__init__.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from ._ops import ops
+import torch
+def index_max(index_vals: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
+    return ops.index_max(index_vals, indices, A_num_block, B_num_block)
+def mm_to_sparse(dense_A: torch.Tensor, dense_B: torch.Tensor, indices: torch.Tensor):
+    return ops.mm_to_sparse(dense_A, dense_B, indices)
+def sparse_dense_mm(sparse_A: torch.Tensor, indices: torch.Tensor, dense_B: torch.Tensor, A_num_block: int):
+    return ops.sparse_dense_mm(sparse_A, indices, dense_B, A_num_block)
+def reduce_sum(sparse_A: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
+    return ops.reduce_sum(sparse_A, indices, A_num_block, B_num_block)
+def scatter(dense_A: torch.Tensor, indices: torch.Tensor, B_num_block: int):
+    return ops.scatter(dense_A, indices, B_num_block)
+__all__ = [
+    "index_max",
+    "mm_to_sparse",
+    "sparse_dense_mm",
+    "reduce_sum",
+    "scatter",
+]

build/torch28-cxx11-cu129-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (1.85 kB). View file

build/torch28-cxx11-cu129-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc ADDED Viewed

Binary file (517 Bytes). View file

build/torch28-cxx11-cu129-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c10f2bd0af7f0564de136062d345541ed2bb493e21de5fc5cfc30942342abf22
+size 2632568

build/torch28-cxx11-cu129-x86_64-linux/mra/_ops.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from . import _mra_e8307c7_dirty
+ops = torch.ops._mra_e8307c7_dirty
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_mra_e8307c7_dirty::{op_name}"

build/torch29-cxx11-cu126-x86_64-linux/mra/__init__.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from ._ops import ops
+import torch
+def index_max(index_vals: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
+    return ops.index_max(index_vals, indices, A_num_block, B_num_block)
+def mm_to_sparse(dense_A: torch.Tensor, dense_B: torch.Tensor, indices: torch.Tensor):
+    return ops.mm_to_sparse(dense_A, dense_B, indices)
+def sparse_dense_mm(sparse_A: torch.Tensor, indices: torch.Tensor, dense_B: torch.Tensor, A_num_block: int):
+    return ops.sparse_dense_mm(sparse_A, indices, dense_B, A_num_block)
+def reduce_sum(sparse_A: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
+    return ops.reduce_sum(sparse_A, indices, A_num_block, B_num_block)
+def scatter(dense_A: torch.Tensor, indices: torch.Tensor, B_num_block: int):
+    return ops.scatter(dense_A, indices, B_num_block)
+__all__ = [
+    "index_max",
+    "mm_to_sparse",
+    "sparse_dense_mm",
+    "reduce_sum",
+    "scatter",
+]

build/torch29-cxx11-cu126-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (1.85 kB). View file

build/torch29-cxx11-cu126-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc ADDED Viewed

Binary file (517 Bytes). View file

build/torch29-cxx11-cu126-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac28155160d68c77778beccc3e0fa7041e5e7d2b96822322a75e0d09eaf452f5
+size 2334496

build/torch29-cxx11-cu126-x86_64-linux/mra/_ops.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from . import _mra_e8307c7_dirty
+ops = torch.ops._mra_e8307c7_dirty
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_mra_e8307c7_dirty::{op_name}"

build/torch29-cxx11-cu128-x86_64-linux/mra/__init__.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from ._ops import ops
+import torch
+def index_max(index_vals: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
+    return ops.index_max(index_vals, indices, A_num_block, B_num_block)
+def mm_to_sparse(dense_A: torch.Tensor, dense_B: torch.Tensor, indices: torch.Tensor):
+    return ops.mm_to_sparse(dense_A, dense_B, indices)
+def sparse_dense_mm(sparse_A: torch.Tensor, indices: torch.Tensor, dense_B: torch.Tensor, A_num_block: int):
+    return ops.sparse_dense_mm(sparse_A, indices, dense_B, A_num_block)
+def reduce_sum(sparse_A: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
+    return ops.reduce_sum(sparse_A, indices, A_num_block, B_num_block)
+def scatter(dense_A: torch.Tensor, indices: torch.Tensor, B_num_block: int):
+    return ops.scatter(dense_A, indices, B_num_block)
+__all__ = [
+    "index_max",
+    "mm_to_sparse",
+    "sparse_dense_mm",
+    "reduce_sum",
+    "scatter",
+]

build/torch29-cxx11-cu128-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (1.85 kB). View file

build/torch29-cxx11-cu128-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc ADDED Viewed

Binary file (517 Bytes). View file

build/torch29-cxx11-cu128-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6f668c4d60ff23112e2b6c62a2271bb2dd8812a8ed5c51a845ce1248a9e13cbf
+size 2606944

build/torch29-cxx11-cu128-x86_64-linux/mra/_ops.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from . import _mra_e8307c7_dirty
+ops = torch.ops._mra_e8307c7_dirty
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_mra_e8307c7_dirty::{op_name}"

build/torch29-cxx11-cu130-x86_64-linux/mra/__init__.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from ._ops import ops
+import torch
+def index_max(index_vals: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
+    return ops.index_max(index_vals, indices, A_num_block, B_num_block)
+def mm_to_sparse(dense_A: torch.Tensor, dense_B: torch.Tensor, indices: torch.Tensor):
+    return ops.mm_to_sparse(dense_A, dense_B, indices)
+def sparse_dense_mm(sparse_A: torch.Tensor, indices: torch.Tensor, dense_B: torch.Tensor, A_num_block: int):
+    return ops.sparse_dense_mm(sparse_A, indices, dense_B, A_num_block)
+def reduce_sum(sparse_A: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
+    return ops.reduce_sum(sparse_A, indices, A_num_block, B_num_block)
+def scatter(dense_A: torch.Tensor, indices: torch.Tensor, B_num_block: int):
+    return ops.scatter(dense_A, indices, B_num_block)
+__all__ = [
+    "index_max",
+    "mm_to_sparse",
+    "sparse_dense_mm",
+    "reduce_sum",
+    "scatter",
+]

build/torch29-cxx11-cu130-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (1.85 kB). View file

build/torch29-cxx11-cu130-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc ADDED Viewed

Binary file (517 Bytes). View file

build/torch29-cxx11-cu130-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a37b346ec5d1df0c97fdf3c9cfdf9eaead44ba9a8e162cd5f00d2a73ecf3e4b
+size 2569704

build/torch29-cxx11-cu130-x86_64-linux/mra/_ops.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+from . import _mra_e8307c7_dirty
+ops = torch.ops._mra_e8307c7_dirty
+def add_op_namespace_prefix(op_name: str):
+    """
+    Prefix op by namespace.
+    """
+    return f"_mra_e8307c7_dirty::{op_name}"

flake.lock ADDED Viewed

	@@ -0,0 +1,168 @@

+{
+  "nodes": {
+    "flake-compat": {
+      "locked": {
+        "lastModified": 1747046372,
+        "narHash": "sha256-CIVLLkVgvHYbgI2UpXvIIBJ12HWgX+fjA8Xf8PUmqCY=",
+        "owner": "edolstra",
+        "repo": "flake-compat",
+        "rev": "9100a0f413b0c601e0533d1d94ffd501ce2e7885",
+        "type": "github"
+      },
+      "original": {
+        "owner": "edolstra",
+        "repo": "flake-compat",
+        "type": "github"
+      }
+    },
+    "flake-compat_2": {
+      "locked": {
+        "lastModified": 1747046372,
+        "narHash": "sha256-CIVLLkVgvHYbgI2UpXvIIBJ12HWgX+fjA8Xf8PUmqCY=",
+        "owner": "edolstra",
+        "repo": "flake-compat",
+        "rev": "9100a0f413b0c601e0533d1d94ffd501ce2e7885",
+        "type": "github"
+      },
+      "original": {
+        "owner": "edolstra",
+        "repo": "flake-compat",
+        "type": "github"
+      }
+    },
+    "flake-utils": {
+      "inputs": {
+        "systems": "systems"
+      },
+      "locked": {
+        "lastModified": 1731533236,
+        "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
+        "type": "github"
+      },
+      "original": {
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "type": "github"
+      }
+    },
+    "flake-utils_2": {
+      "inputs": {
+        "systems": "systems_2"
+      },
+      "locked": {
+        "lastModified": 1731533236,
+        "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
+        "type": "github"
+      },
+      "original": {
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "type": "github"
+      }
+    },
+    "hf-nix": {
+      "inputs": {
+        "flake-compat": "flake-compat_2",
+        "flake-utils": "flake-utils_2",
+        "nixpkgs": "nixpkgs"
+      },
+      "locked": {
+        "lastModified": 1759851564,
+        "narHash": "sha256-Xybkhm0FM/VzlZ5WndTYq/X/9MAeddd4EQ2Vz8GdkOA=",
+        "owner": "huggingface",
+        "repo": "hf-nix",
+        "rev": "351655d9f124805ed7c1193aa61550ce245f4570",
+        "type": "github"
+      },
+      "original": {
+        "owner": "huggingface",
+        "repo": "hf-nix",
+        "type": "github"
+      }
+    },
+    "kernel-builder": {
+      "inputs": {
+        "flake-compat": "flake-compat",
+        "flake-utils": "flake-utils",
+        "hf-nix": "hf-nix",
+        "nixpkgs": [
+          "kernel-builder",
+          "hf-nix",
+          "nixpkgs"
+        ]
+      },
+      "locked": {
+        "lastModified": 1760035358,
+        "narHash": "sha256-N5vmCrgwcIluPclf/hmnofLK77EJJYh5PR8SRvw++es=",
+        "owner": "huggingface",
+        "repo": "kernel-builder",
+        "rev": "a48cbd19ae7e425dfc1865188ef06dac43ab9244",
+        "type": "github"
+      },
+      "original": {
+        "owner": "huggingface",
+        "repo": "kernel-builder",
+        "type": "github"
+      }
+    },
+    "nixpkgs": {
+      "locked": {
+        "lastModified": 1755963616,
+        "narHash": "sha256-6yD0ww/S8n+U2uPYcJZ3DRURP8Kx036GRpR2uPNZroE=",
+        "owner": "nixos",
+        "repo": "nixpkgs",
+        "rev": "73e96df7cff5783f45e21342a75a1540c4eddce4",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nixos",
+        "ref": "nixos-unstable-small",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "root": {
+      "inputs": {
+        "kernel-builder": "kernel-builder"
+      }
+    },
+    "systems": {
+      "locked": {
+        "lastModified": 1681028828,
+        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
+        "owner": "nix-systems",
+        "repo": "default",
+        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nix-systems",
+        "repo": "default",
+        "type": "github"
+      }
+    },
+    "systems_2": {
+      "locked": {
+        "lastModified": 1681028828,
+        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
+        "owner": "nix-systems",
+        "repo": "default",
+        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nix-systems",
+        "repo": "default",
+        "type": "github"
+      }
+    }
+  },
+  "root": "root",
+  "version": 7
+}

flake.nix ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+  description = "Flake for mra kernels";
+  inputs = {
+    kernel-builder.url = "github:huggingface/kernel-builder";
+  };
+  outputs =
+    {
+      self,
+      kernel-builder,
+    }:
+    kernel-builder.lib.genFlakeOutputs {
+      path = ./.;
+      rev = self.shortRev or self.dirtyShortRev or self.lastModifiedDate;
+    };
+}