medmekk HF Staff commited on
Commit
55ce07b
·
verified ·
1 Parent(s): 67c9ab5

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +9 -0
  2. README.md +1 -0
  3. build.toml +20 -0
  4. build/torch27-cxx11-cu118-x86_64-linux/mra/__init__.py +25 -0
  5. build/torch27-cxx11-cu118-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc +0 -0
  6. build/torch27-cxx11-cu118-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc +0 -0
  7. build/torch27-cxx11-cu118-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so +3 -0
  8. build/torch27-cxx11-cu118-x86_64-linux/mra/_ops.py +9 -0
  9. build/torch27-cxx11-cu126-x86_64-linux/mra/__init__.py +25 -0
  10. build/torch27-cxx11-cu126-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc +0 -0
  11. build/torch27-cxx11-cu126-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc +0 -0
  12. build/torch27-cxx11-cu126-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so +3 -0
  13. build/torch27-cxx11-cu126-x86_64-linux/mra/_ops.py +9 -0
  14. build/torch27-cxx11-cu128-x86_64-linux/mra/__init__.py +25 -0
  15. build/torch27-cxx11-cu128-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc +0 -0
  16. build/torch27-cxx11-cu128-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc +0 -0
  17. build/torch27-cxx11-cu128-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so +3 -0
  18. build/torch27-cxx11-cu128-x86_64-linux/mra/_ops.py +9 -0
  19. build/torch28-cxx11-cu126-x86_64-linux/mra/__init__.py +25 -0
  20. build/torch28-cxx11-cu126-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc +0 -0
  21. build/torch28-cxx11-cu126-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc +0 -0
  22. build/torch28-cxx11-cu126-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so +3 -0
  23. build/torch28-cxx11-cu126-x86_64-linux/mra/_ops.py +9 -0
  24. build/torch28-cxx11-cu128-x86_64-linux/mra/__init__.py +25 -0
  25. build/torch28-cxx11-cu128-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc +0 -0
  26. build/torch28-cxx11-cu128-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc +0 -0
  27. build/torch28-cxx11-cu128-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so +3 -0
  28. build/torch28-cxx11-cu128-x86_64-linux/mra/_ops.py +9 -0
  29. build/torch28-cxx11-cu129-x86_64-linux/mra/__init__.py +25 -0
  30. build/torch28-cxx11-cu129-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc +0 -0
  31. build/torch28-cxx11-cu129-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc +0 -0
  32. build/torch28-cxx11-cu129-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so +3 -0
  33. build/torch28-cxx11-cu129-x86_64-linux/mra/_ops.py +9 -0
  34. build/torch29-cxx11-cu126-x86_64-linux/mra/__init__.py +25 -0
  35. build/torch29-cxx11-cu126-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc +0 -0
  36. build/torch29-cxx11-cu126-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc +0 -0
  37. build/torch29-cxx11-cu126-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so +3 -0
  38. build/torch29-cxx11-cu126-x86_64-linux/mra/_ops.py +9 -0
  39. build/torch29-cxx11-cu128-x86_64-linux/mra/__init__.py +25 -0
  40. build/torch29-cxx11-cu128-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc +0 -0
  41. build/torch29-cxx11-cu128-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc +0 -0
  42. build/torch29-cxx11-cu128-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so +3 -0
  43. build/torch29-cxx11-cu128-x86_64-linux/mra/_ops.py +9 -0
  44. build/torch29-cxx11-cu130-x86_64-linux/mra/__init__.py +25 -0
  45. build/torch29-cxx11-cu130-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc +0 -0
  46. build/torch29-cxx11-cu130-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc +0 -0
  47. build/torch29-cxx11-cu130-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so +3 -0
  48. build/torch29-cxx11-cu130-x86_64-linux/mra/_ops.py +9 -0
  49. flake.lock +168 -0
  50. flake.nix +17 -0
.gitattributes CHANGED
@@ -33,3 +33,12 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ build/torch27-cxx11-cu118-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
37
+ build/torch27-cxx11-cu126-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
38
+ build/torch27-cxx11-cu128-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
39
+ build/torch28-cxx11-cu126-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
40
+ build/torch28-cxx11-cu128-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
41
+ build/torch28-cxx11-cu129-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
42
+ build/torch29-cxx11-cu126-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
43
+ build/torch29-cxx11-cu128-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
44
+ build/torch29-cxx11-cu130-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1 @@
 
 
1
+ MRA kernels for transformers
build.toml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [general]
2
+ name = "mra"
3
+ universal = false
4
+
5
+ [torch]
6
+ src = [
7
+ "torch-ext/torch_binding.cpp",
8
+ "torch-ext/cuda_launch.h",
9
+ ]
10
+
11
+
12
+ [kernel.mra]
13
+ backend = "cuda"
14
+ depends = ["torch"]
15
+ src = [
16
+ "mra/cuda_kernel.cu",
17
+ "mra/cuda_kernel.h",
18
+ "mra/cuda_launch.cu",
19
+ "mra/cuda_launch.h",
20
+ ]
build/torch27-cxx11-cu118-x86_64-linux/mra/__init__.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ._ops import ops
2
+ import torch
3
+
4
+ def index_max(index_vals: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
5
+ return ops.index_max(index_vals, indices, A_num_block, B_num_block)
6
+
7
+ def mm_to_sparse(dense_A: torch.Tensor, dense_B: torch.Tensor, indices: torch.Tensor):
8
+ return ops.mm_to_sparse(dense_A, dense_B, indices)
9
+
10
+ def sparse_dense_mm(sparse_A: torch.Tensor, indices: torch.Tensor, dense_B: torch.Tensor, A_num_block: int):
11
+ return ops.sparse_dense_mm(sparse_A, indices, dense_B, A_num_block)
12
+
13
+ def reduce_sum(sparse_A: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
14
+ return ops.reduce_sum(sparse_A, indices, A_num_block, B_num_block)
15
+
16
+ def scatter(dense_A: torch.Tensor, indices: torch.Tensor, B_num_block: int):
17
+ return ops.scatter(dense_A, indices, B_num_block)
18
+
19
+ __all__ = [
20
+ "index_max",
21
+ "mm_to_sparse",
22
+ "sparse_dense_mm",
23
+ "reduce_sum",
24
+ "scatter",
25
+ ]
build/torch27-cxx11-cu118-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (1.85 kB). View file
 
build/torch27-cxx11-cu118-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc ADDED
Binary file (517 Bytes). View file
 
build/torch27-cxx11-cu118-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e07be154aab143447264cbd25ba8987760af84f50304cc0940419cae754d8fc2
3
+ size 2289096
build/torch27-cxx11-cu118-x86_64-linux/mra/_ops.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from . import _mra_e8307c7_dirty
3
+ ops = torch.ops._mra_e8307c7_dirty
4
+
5
+ def add_op_namespace_prefix(op_name: str):
6
+ """
7
+ Prefix op by namespace.
8
+ """
9
+ return f"_mra_e8307c7_dirty::{op_name}"
build/torch27-cxx11-cu126-x86_64-linux/mra/__init__.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ._ops import ops
2
+ import torch
3
+
4
+ def index_max(index_vals: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
5
+ return ops.index_max(index_vals, indices, A_num_block, B_num_block)
6
+
7
+ def mm_to_sparse(dense_A: torch.Tensor, dense_B: torch.Tensor, indices: torch.Tensor):
8
+ return ops.mm_to_sparse(dense_A, dense_B, indices)
9
+
10
+ def sparse_dense_mm(sparse_A: torch.Tensor, indices: torch.Tensor, dense_B: torch.Tensor, A_num_block: int):
11
+ return ops.sparse_dense_mm(sparse_A, indices, dense_B, A_num_block)
12
+
13
+ def reduce_sum(sparse_A: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
14
+ return ops.reduce_sum(sparse_A, indices, A_num_block, B_num_block)
15
+
16
+ def scatter(dense_A: torch.Tensor, indices: torch.Tensor, B_num_block: int):
17
+ return ops.scatter(dense_A, indices, B_num_block)
18
+
19
+ __all__ = [
20
+ "index_max",
21
+ "mm_to_sparse",
22
+ "sparse_dense_mm",
23
+ "reduce_sum",
24
+ "scatter",
25
+ ]
build/torch27-cxx11-cu126-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (1.85 kB). View file
 
build/torch27-cxx11-cu126-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc ADDED
Binary file (517 Bytes). View file
 
build/torch27-cxx11-cu126-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:579d1b3e91773c7802fc4c5b58b5fac62235b4555c7a836af0306e34f7bb0719
3
+ size 2334496
build/torch27-cxx11-cu126-x86_64-linux/mra/_ops.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from . import _mra_e8307c7_dirty
3
+ ops = torch.ops._mra_e8307c7_dirty
4
+
5
+ def add_op_namespace_prefix(op_name: str):
6
+ """
7
+ Prefix op by namespace.
8
+ """
9
+ return f"_mra_e8307c7_dirty::{op_name}"
build/torch27-cxx11-cu128-x86_64-linux/mra/__init__.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ._ops import ops
2
+ import torch
3
+
4
+ def index_max(index_vals: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
5
+ return ops.index_max(index_vals, indices, A_num_block, B_num_block)
6
+
7
+ def mm_to_sparse(dense_A: torch.Tensor, dense_B: torch.Tensor, indices: torch.Tensor):
8
+ return ops.mm_to_sparse(dense_A, dense_B, indices)
9
+
10
+ def sparse_dense_mm(sparse_A: torch.Tensor, indices: torch.Tensor, dense_B: torch.Tensor, A_num_block: int):
11
+ return ops.sparse_dense_mm(sparse_A, indices, dense_B, A_num_block)
12
+
13
+ def reduce_sum(sparse_A: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
14
+ return ops.reduce_sum(sparse_A, indices, A_num_block, B_num_block)
15
+
16
+ def scatter(dense_A: torch.Tensor, indices: torch.Tensor, B_num_block: int):
17
+ return ops.scatter(dense_A, indices, B_num_block)
18
+
19
+ __all__ = [
20
+ "index_max",
21
+ "mm_to_sparse",
22
+ "sparse_dense_mm",
23
+ "reduce_sum",
24
+ "scatter",
25
+ ]
build/torch27-cxx11-cu128-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (1.85 kB). View file
 
build/torch27-cxx11-cu128-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc ADDED
Binary file (517 Bytes). View file
 
build/torch27-cxx11-cu128-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40e6e3cc4433f6333afc56fdce4dd0e5aaaf007d701b7a0582d46234d93d57ec
3
+ size 2602656
build/torch27-cxx11-cu128-x86_64-linux/mra/_ops.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from . import _mra_e8307c7_dirty
3
+ ops = torch.ops._mra_e8307c7_dirty
4
+
5
+ def add_op_namespace_prefix(op_name: str):
6
+ """
7
+ Prefix op by namespace.
8
+ """
9
+ return f"_mra_e8307c7_dirty::{op_name}"
build/torch28-cxx11-cu126-x86_64-linux/mra/__init__.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ._ops import ops
2
+ import torch
3
+
4
+ def index_max(index_vals: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
5
+ return ops.index_max(index_vals, indices, A_num_block, B_num_block)
6
+
7
+ def mm_to_sparse(dense_A: torch.Tensor, dense_B: torch.Tensor, indices: torch.Tensor):
8
+ return ops.mm_to_sparse(dense_A, dense_B, indices)
9
+
10
+ def sparse_dense_mm(sparse_A: torch.Tensor, indices: torch.Tensor, dense_B: torch.Tensor, A_num_block: int):
11
+ return ops.sparse_dense_mm(sparse_A, indices, dense_B, A_num_block)
12
+
13
+ def reduce_sum(sparse_A: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
14
+ return ops.reduce_sum(sparse_A, indices, A_num_block, B_num_block)
15
+
16
+ def scatter(dense_A: torch.Tensor, indices: torch.Tensor, B_num_block: int):
17
+ return ops.scatter(dense_A, indices, B_num_block)
18
+
19
+ __all__ = [
20
+ "index_max",
21
+ "mm_to_sparse",
22
+ "sparse_dense_mm",
23
+ "reduce_sum",
24
+ "scatter",
25
+ ]
build/torch28-cxx11-cu126-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (1.85 kB). View file
 
build/torch28-cxx11-cu126-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc ADDED
Binary file (517 Bytes). View file
 
build/torch28-cxx11-cu126-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4b972c43e1a8b2a6941a3ab44b99a638d253f9ef1e67cb973fff0abd2664926
3
+ size 2334520
build/torch28-cxx11-cu126-x86_64-linux/mra/_ops.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from . import _mra_e8307c7_dirty
3
+ ops = torch.ops._mra_e8307c7_dirty
4
+
5
+ def add_op_namespace_prefix(op_name: str):
6
+ """
7
+ Prefix op by namespace.
8
+ """
9
+ return f"_mra_e8307c7_dirty::{op_name}"
build/torch28-cxx11-cu128-x86_64-linux/mra/__init__.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ._ops import ops
2
+ import torch
3
+
4
+ def index_max(index_vals: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
5
+ return ops.index_max(index_vals, indices, A_num_block, B_num_block)
6
+
7
+ def mm_to_sparse(dense_A: torch.Tensor, dense_B: torch.Tensor, indices: torch.Tensor):
8
+ return ops.mm_to_sparse(dense_A, dense_B, indices)
9
+
10
+ def sparse_dense_mm(sparse_A: torch.Tensor, indices: torch.Tensor, dense_B: torch.Tensor, A_num_block: int):
11
+ return ops.sparse_dense_mm(sparse_A, indices, dense_B, A_num_block)
12
+
13
+ def reduce_sum(sparse_A: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
14
+ return ops.reduce_sum(sparse_A, indices, A_num_block, B_num_block)
15
+
16
+ def scatter(dense_A: torch.Tensor, indices: torch.Tensor, B_num_block: int):
17
+ return ops.scatter(dense_A, indices, B_num_block)
18
+
19
+ __all__ = [
20
+ "index_max",
21
+ "mm_to_sparse",
22
+ "sparse_dense_mm",
23
+ "reduce_sum",
24
+ "scatter",
25
+ ]
build/torch28-cxx11-cu128-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (1.85 kB). View file
 
build/torch28-cxx11-cu128-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc ADDED
Binary file (517 Bytes). View file
 
build/torch28-cxx11-cu128-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1ab899e2e33ce5edf4b75e5b4138d4ca30e1f62a91ecef4111b4121b408dd5a
3
+ size 2602880
build/torch28-cxx11-cu128-x86_64-linux/mra/_ops.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from . import _mra_e8307c7_dirty
3
+ ops = torch.ops._mra_e8307c7_dirty
4
+
5
+ def add_op_namespace_prefix(op_name: str):
6
+ """
7
+ Prefix op by namespace.
8
+ """
9
+ return f"_mra_e8307c7_dirty::{op_name}"
build/torch28-cxx11-cu129-x86_64-linux/mra/__init__.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ._ops import ops
2
+ import torch
3
+
4
+ def index_max(index_vals: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
5
+ return ops.index_max(index_vals, indices, A_num_block, B_num_block)
6
+
7
+ def mm_to_sparse(dense_A: torch.Tensor, dense_B: torch.Tensor, indices: torch.Tensor):
8
+ return ops.mm_to_sparse(dense_A, dense_B, indices)
9
+
10
+ def sparse_dense_mm(sparse_A: torch.Tensor, indices: torch.Tensor, dense_B: torch.Tensor, A_num_block: int):
11
+ return ops.sparse_dense_mm(sparse_A, indices, dense_B, A_num_block)
12
+
13
+ def reduce_sum(sparse_A: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
14
+ return ops.reduce_sum(sparse_A, indices, A_num_block, B_num_block)
15
+
16
+ def scatter(dense_A: torch.Tensor, indices: torch.Tensor, B_num_block: int):
17
+ return ops.scatter(dense_A, indices, B_num_block)
18
+
19
+ __all__ = [
20
+ "index_max",
21
+ "mm_to_sparse",
22
+ "sparse_dense_mm",
23
+ "reduce_sum",
24
+ "scatter",
25
+ ]
build/torch28-cxx11-cu129-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (1.85 kB). View file
 
build/torch28-cxx11-cu129-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc ADDED
Binary file (517 Bytes). View file
 
build/torch28-cxx11-cu129-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c10f2bd0af7f0564de136062d345541ed2bb493e21de5fc5cfc30942342abf22
3
+ size 2632568
build/torch28-cxx11-cu129-x86_64-linux/mra/_ops.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from . import _mra_e8307c7_dirty
3
+ ops = torch.ops._mra_e8307c7_dirty
4
+
5
+ def add_op_namespace_prefix(op_name: str):
6
+ """
7
+ Prefix op by namespace.
8
+ """
9
+ return f"_mra_e8307c7_dirty::{op_name}"
build/torch29-cxx11-cu126-x86_64-linux/mra/__init__.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ._ops import ops
2
+ import torch
3
+
4
+ def index_max(index_vals: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
5
+ return ops.index_max(index_vals, indices, A_num_block, B_num_block)
6
+
7
+ def mm_to_sparse(dense_A: torch.Tensor, dense_B: torch.Tensor, indices: torch.Tensor):
8
+ return ops.mm_to_sparse(dense_A, dense_B, indices)
9
+
10
+ def sparse_dense_mm(sparse_A: torch.Tensor, indices: torch.Tensor, dense_B: torch.Tensor, A_num_block: int):
11
+ return ops.sparse_dense_mm(sparse_A, indices, dense_B, A_num_block)
12
+
13
+ def reduce_sum(sparse_A: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
14
+ return ops.reduce_sum(sparse_A, indices, A_num_block, B_num_block)
15
+
16
+ def scatter(dense_A: torch.Tensor, indices: torch.Tensor, B_num_block: int):
17
+ return ops.scatter(dense_A, indices, B_num_block)
18
+
19
+ __all__ = [
20
+ "index_max",
21
+ "mm_to_sparse",
22
+ "sparse_dense_mm",
23
+ "reduce_sum",
24
+ "scatter",
25
+ ]
build/torch29-cxx11-cu126-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (1.85 kB). View file
 
build/torch29-cxx11-cu126-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc ADDED
Binary file (517 Bytes). View file
 
build/torch29-cxx11-cu126-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac28155160d68c77778beccc3e0fa7041e5e7d2b96822322a75e0d09eaf452f5
3
+ size 2334496
build/torch29-cxx11-cu126-x86_64-linux/mra/_ops.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from . import _mra_e8307c7_dirty
3
+ ops = torch.ops._mra_e8307c7_dirty
4
+
5
+ def add_op_namespace_prefix(op_name: str):
6
+ """
7
+ Prefix op by namespace.
8
+ """
9
+ return f"_mra_e8307c7_dirty::{op_name}"
build/torch29-cxx11-cu128-x86_64-linux/mra/__init__.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ._ops import ops
2
+ import torch
3
+
4
+ def index_max(index_vals: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
5
+ return ops.index_max(index_vals, indices, A_num_block, B_num_block)
6
+
7
+ def mm_to_sparse(dense_A: torch.Tensor, dense_B: torch.Tensor, indices: torch.Tensor):
8
+ return ops.mm_to_sparse(dense_A, dense_B, indices)
9
+
10
+ def sparse_dense_mm(sparse_A: torch.Tensor, indices: torch.Tensor, dense_B: torch.Tensor, A_num_block: int):
11
+ return ops.sparse_dense_mm(sparse_A, indices, dense_B, A_num_block)
12
+
13
+ def reduce_sum(sparse_A: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
14
+ return ops.reduce_sum(sparse_A, indices, A_num_block, B_num_block)
15
+
16
+ def scatter(dense_A: torch.Tensor, indices: torch.Tensor, B_num_block: int):
17
+ return ops.scatter(dense_A, indices, B_num_block)
18
+
19
+ __all__ = [
20
+ "index_max",
21
+ "mm_to_sparse",
22
+ "sparse_dense_mm",
23
+ "reduce_sum",
24
+ "scatter",
25
+ ]
build/torch29-cxx11-cu128-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (1.85 kB). View file
 
build/torch29-cxx11-cu128-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc ADDED
Binary file (517 Bytes). View file
 
build/torch29-cxx11-cu128-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f668c4d60ff23112e2b6c62a2271bb2dd8812a8ed5c51a845ce1248a9e13cbf
3
+ size 2606944
build/torch29-cxx11-cu128-x86_64-linux/mra/_ops.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from . import _mra_e8307c7_dirty
3
+ ops = torch.ops._mra_e8307c7_dirty
4
+
5
+ def add_op_namespace_prefix(op_name: str):
6
+ """
7
+ Prefix op by namespace.
8
+ """
9
+ return f"_mra_e8307c7_dirty::{op_name}"
build/torch29-cxx11-cu130-x86_64-linux/mra/__init__.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ._ops import ops
2
+ import torch
3
+
4
+ def index_max(index_vals: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
5
+ return ops.index_max(index_vals, indices, A_num_block, B_num_block)
6
+
7
+ def mm_to_sparse(dense_A: torch.Tensor, dense_B: torch.Tensor, indices: torch.Tensor):
8
+ return ops.mm_to_sparse(dense_A, dense_B, indices)
9
+
10
+ def sparse_dense_mm(sparse_A: torch.Tensor, indices: torch.Tensor, dense_B: torch.Tensor, A_num_block: int):
11
+ return ops.sparse_dense_mm(sparse_A, indices, dense_B, A_num_block)
12
+
13
+ def reduce_sum(sparse_A: torch.Tensor, indices: torch.Tensor, A_num_block: int, B_num_block: int):
14
+ return ops.reduce_sum(sparse_A, indices, A_num_block, B_num_block)
15
+
16
+ def scatter(dense_A: torch.Tensor, indices: torch.Tensor, B_num_block: int):
17
+ return ops.scatter(dense_A, indices, B_num_block)
18
+
19
+ __all__ = [
20
+ "index_max",
21
+ "mm_to_sparse",
22
+ "sparse_dense_mm",
23
+ "reduce_sum",
24
+ "scatter",
25
+ ]
build/torch29-cxx11-cu130-x86_64-linux/mra/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (1.85 kB). View file
 
build/torch29-cxx11-cu130-x86_64-linux/mra/__pycache__/_ops.cpython-313.pyc ADDED
Binary file (517 Bytes). View file
 
build/torch29-cxx11-cu130-x86_64-linux/mra/_mra_e8307c7_dirty.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a37b346ec5d1df0c97fdf3c9cfdf9eaead44ba9a8e162cd5f00d2a73ecf3e4b
3
+ size 2569704
build/torch29-cxx11-cu130-x86_64-linux/mra/_ops.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from . import _mra_e8307c7_dirty
3
+ ops = torch.ops._mra_e8307c7_dirty
4
+
5
+ def add_op_namespace_prefix(op_name: str):
6
+ """
7
+ Prefix op by namespace.
8
+ """
9
+ return f"_mra_e8307c7_dirty::{op_name}"
flake.lock ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nodes": {
3
+ "flake-compat": {
4
+ "locked": {
5
+ "lastModified": 1747046372,
6
+ "narHash": "sha256-CIVLLkVgvHYbgI2UpXvIIBJ12HWgX+fjA8Xf8PUmqCY=",
7
+ "owner": "edolstra",
8
+ "repo": "flake-compat",
9
+ "rev": "9100a0f413b0c601e0533d1d94ffd501ce2e7885",
10
+ "type": "github"
11
+ },
12
+ "original": {
13
+ "owner": "edolstra",
14
+ "repo": "flake-compat",
15
+ "type": "github"
16
+ }
17
+ },
18
+ "flake-compat_2": {
19
+ "locked": {
20
+ "lastModified": 1747046372,
21
+ "narHash": "sha256-CIVLLkVgvHYbgI2UpXvIIBJ12HWgX+fjA8Xf8PUmqCY=",
22
+ "owner": "edolstra",
23
+ "repo": "flake-compat",
24
+ "rev": "9100a0f413b0c601e0533d1d94ffd501ce2e7885",
25
+ "type": "github"
26
+ },
27
+ "original": {
28
+ "owner": "edolstra",
29
+ "repo": "flake-compat",
30
+ "type": "github"
31
+ }
32
+ },
33
+ "flake-utils": {
34
+ "inputs": {
35
+ "systems": "systems"
36
+ },
37
+ "locked": {
38
+ "lastModified": 1731533236,
39
+ "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
40
+ "owner": "numtide",
41
+ "repo": "flake-utils",
42
+ "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
43
+ "type": "github"
44
+ },
45
+ "original": {
46
+ "owner": "numtide",
47
+ "repo": "flake-utils",
48
+ "type": "github"
49
+ }
50
+ },
51
+ "flake-utils_2": {
52
+ "inputs": {
53
+ "systems": "systems_2"
54
+ },
55
+ "locked": {
56
+ "lastModified": 1731533236,
57
+ "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
58
+ "owner": "numtide",
59
+ "repo": "flake-utils",
60
+ "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
61
+ "type": "github"
62
+ },
63
+ "original": {
64
+ "owner": "numtide",
65
+ "repo": "flake-utils",
66
+ "type": "github"
67
+ }
68
+ },
69
+ "hf-nix": {
70
+ "inputs": {
71
+ "flake-compat": "flake-compat_2",
72
+ "flake-utils": "flake-utils_2",
73
+ "nixpkgs": "nixpkgs"
74
+ },
75
+ "locked": {
76
+ "lastModified": 1759851564,
77
+ "narHash": "sha256-Xybkhm0FM/VzlZ5WndTYq/X/9MAeddd4EQ2Vz8GdkOA=",
78
+ "owner": "huggingface",
79
+ "repo": "hf-nix",
80
+ "rev": "351655d9f124805ed7c1193aa61550ce245f4570",
81
+ "type": "github"
82
+ },
83
+ "original": {
84
+ "owner": "huggingface",
85
+ "repo": "hf-nix",
86
+ "type": "github"
87
+ }
88
+ },
89
+ "kernel-builder": {
90
+ "inputs": {
91
+ "flake-compat": "flake-compat",
92
+ "flake-utils": "flake-utils",
93
+ "hf-nix": "hf-nix",
94
+ "nixpkgs": [
95
+ "kernel-builder",
96
+ "hf-nix",
97
+ "nixpkgs"
98
+ ]
99
+ },
100
+ "locked": {
101
+ "lastModified": 1760035358,
102
+ "narHash": "sha256-N5vmCrgwcIluPclf/hmnofLK77EJJYh5PR8SRvw++es=",
103
+ "owner": "huggingface",
104
+ "repo": "kernel-builder",
105
+ "rev": "a48cbd19ae7e425dfc1865188ef06dac43ab9244",
106
+ "type": "github"
107
+ },
108
+ "original": {
109
+ "owner": "huggingface",
110
+ "repo": "kernel-builder",
111
+ "type": "github"
112
+ }
113
+ },
114
+ "nixpkgs": {
115
+ "locked": {
116
+ "lastModified": 1755963616,
117
+ "narHash": "sha256-6yD0ww/S8n+U2uPYcJZ3DRURP8Kx036GRpR2uPNZroE=",
118
+ "owner": "nixos",
119
+ "repo": "nixpkgs",
120
+ "rev": "73e96df7cff5783f45e21342a75a1540c4eddce4",
121
+ "type": "github"
122
+ },
123
+ "original": {
124
+ "owner": "nixos",
125
+ "ref": "nixos-unstable-small",
126
+ "repo": "nixpkgs",
127
+ "type": "github"
128
+ }
129
+ },
130
+ "root": {
131
+ "inputs": {
132
+ "kernel-builder": "kernel-builder"
133
+ }
134
+ },
135
+ "systems": {
136
+ "locked": {
137
+ "lastModified": 1681028828,
138
+ "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
139
+ "owner": "nix-systems",
140
+ "repo": "default",
141
+ "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
142
+ "type": "github"
143
+ },
144
+ "original": {
145
+ "owner": "nix-systems",
146
+ "repo": "default",
147
+ "type": "github"
148
+ }
149
+ },
150
+ "systems_2": {
151
+ "locked": {
152
+ "lastModified": 1681028828,
153
+ "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
154
+ "owner": "nix-systems",
155
+ "repo": "default",
156
+ "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
157
+ "type": "github"
158
+ },
159
+ "original": {
160
+ "owner": "nix-systems",
161
+ "repo": "default",
162
+ "type": "github"
163
+ }
164
+ }
165
+ },
166
+ "root": "root",
167
+ "version": 7
168
+ }
flake.nix ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ description = "Flake for mra kernels";
3
+
4
+ inputs = {
5
+ kernel-builder.url = "github:huggingface/kernel-builder";
6
+ };
7
+
8
+ outputs =
9
+ {
10
+ self,
11
+ kernel-builder,
12
+ }:
13
+ kernel-builder.lib.genFlakeOutputs {
14
+ path = ./.;
15
+ rev = self.shortRev or self.dirtyShortRev or self.lastModifiedDate;
16
+ };
17
+ }